summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--storage/archive/archive_reader.c8
-rw-r--r--storage/archive/ha_archive.cc60
-rw-r--r--storage/archive/ha_archive.h4
-rw-r--r--storage/blackhole/ha_blackhole.cc11
-rw-r--r--storage/columnstore/CMakeLists.txt6
m---------storage/columnstore/columnstore0
-rw-r--r--storage/connect/ha_connect.cc10
-rw-r--r--storage/connect/ha_connect.h14
-rw-r--r--storage/connect/mysql-test/connect/r/index.result29
-rw-r--r--storage/connect/mysql-test/connect/r/mysql_index.result13
-rw-r--r--storage/connect/mysql-test/connect/t/index.test7
-rw-r--r--storage/connect/mysql-test/connect/t/mysql_index.test3
-rw-r--r--storage/connect/tabext.cpp2
-rw-r--r--storage/csv/ha_tina.h7
-rw-r--r--storage/example/ha_example.h33
-rw-r--r--storage/federated/ha_federated.cc22
-rw-r--r--storage/federated/ha_federated.h37
-rw-r--r--storage/federatedx/ha_federatedx.cc21
-rw-r--r--storage/federatedx/ha_federatedx.h31
-rw-r--r--storage/heap/ha_heap.cc61
-rw-r--r--storage/heap/ha_heap.h114
-rw-r--r--storage/innobase/CMakeLists.txt3
-rw-r--r--storage/innobase/btr/btr0btr.cc489
-rw-r--r--storage/innobase/btr/btr0bulk.cc12
-rw-r--r--storage/innobase/btr/btr0cur.cc556
-rw-r--r--storage/innobase/btr/btr0defragment.cc42
-rw-r--r--storage/innobase/btr/btr0pcur.cc24
-rw-r--r--storage/innobase/btr/btr0sea.cc18
-rw-r--r--storage/innobase/buf/buf0buddy.cc4
-rw-r--r--storage/innobase/buf/buf0buf.cc509
-rw-r--r--storage/innobase/buf/buf0flu.cc29
-rw-r--r--storage/innobase/buf/buf0lru.cc17
-rw-r--r--storage/innobase/buf/buf0rea.cc377
-rw-r--r--storage/innobase/data/data0type.cc9
-rw-r--r--storage/innobase/dict/dict0boot.cc41
-rw-r--r--storage/innobase/dict/dict0defrag_bg.cc7
-rw-r--r--storage/innobase/dict/dict0dict.cc38
-rw-r--r--storage/innobase/dict/dict0load.cc16
-rw-r--r--storage/innobase/dict/dict0stats.cc33
-rw-r--r--storage/innobase/fil/fil0fil.cc147
-rw-r--r--storage/innobase/fil/fil0pagecompress.cc3
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc25
-rw-r--r--storage/innobase/gis/gis0rtree.cc276
-rw-r--r--storage/innobase/gis/gis0sea.cc83
-rw-r--r--storage/innobase/handler/ha_innodb.cc334
-rw-r--r--storage/innobase/handler/ha_innodb.h9
-rw-r--r--storage/innobase/handler/handler0alter.cc4
-rw-r--r--storage/innobase/handler/i_s.cc42
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc4656
-rw-r--r--storage/innobase/include/btr0btr.h40
-rw-r--r--storage/innobase/include/btr0cur.h35
-rw-r--r--storage/innobase/include/btr0types.h45
-rw-r--r--storage/innobase/include/buf0buf.h142
-rw-r--r--storage/innobase/include/buf0buf.inl2
-rw-r--r--storage/innobase/include/buf0lru.h20
-rw-r--r--storage/innobase/include/buf0rea.h48
-rw-r--r--storage/innobase/include/data0type.h57
-rw-r--r--storage/innobase/include/data0type.inl122
-rw-r--r--storage/innobase/include/dict0boot.h35
-rw-r--r--storage/innobase/include/dict0dict.h23
-rw-r--r--storage/innobase/include/dict0dict.inl2
-rw-r--r--storage/innobase/include/dict0load.h8
-rw-r--r--storage/innobase/include/dict0mem.h20
-rw-r--r--storage/innobase/include/dict0types.h13
-rw-r--r--storage/innobase/include/fil0fil.h58
-rw-r--r--storage/innobase/include/fsp0types.h16
-rw-r--r--storage/innobase/include/gis0rtree.h65
-rw-r--r--storage/innobase/include/gis0rtree.inl5
-rw-r--r--storage/innobase/include/ibuf0ibuf.h457
-rw-r--r--storage/innobase/include/ibuf0ibuf.inl282
-rw-r--r--storage/innobase/include/log0log.h4
-rw-r--r--storage/innobase/include/log0recv.h12
-rw-r--r--storage/innobase/include/mtr0mtr.h13
-rw-r--r--storage/innobase/include/page0cur.h12
-rw-r--r--storage/innobase/include/page0cur.inl7
-rw-r--r--storage/innobase/include/page0page.h19
-rw-r--r--storage/innobase/include/page0zip.h10
-rw-r--r--storage/innobase/include/page0zip.inl4
-rw-r--r--storage/innobase/include/rem0rec.inl6
-rw-r--r--storage/innobase/include/row0purge.h35
-rw-r--r--storage/innobase/include/row0row.h35
-rw-r--r--storage/innobase/include/srv0mon.h21
-rw-r--r--storage/innobase/include/srv0srv.h15
-rw-r--r--storage/innobase/include/sux_lock.h4
-rw-r--r--storage/innobase/include/trx0trx.h25
-rw-r--r--storage/innobase/include/trx0undo.h6
-rw-r--r--storage/innobase/include/univ.i6
-rw-r--r--storage/innobase/log/log0log.cc48
-rw-r--r--storage/innobase/log/log0recv.cc168
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc5
-rw-r--r--storage/innobase/os/os0file.cc214
-rw-r--r--storage/innobase/page/page0cur.cc16
-rw-r--r--storage/innobase/page/page0page.cc39
-rw-r--r--storage/innobase/page/page0zip.cc14
-rw-r--r--storage/innobase/rem/rem0cmp.cc46
-rw-r--r--storage/innobase/rem/rem0rec.cc9
-rw-r--r--storage/innobase/row/row0import.cc89
-rw-r--r--storage/innobase/row/row0ins.cc63
-rw-r--r--storage/innobase/row/row0log.cc62
-rw-r--r--storage/innobase/row/row0merge.cc12
-rw-r--r--storage/innobase/row/row0mysql.cc15
-rw-r--r--storage/innobase/row/row0purge.cc95
-rw-r--r--storage/innobase/row/row0quiesce.cc15
-rw-r--r--storage/innobase/row/row0row.cc77
-rw-r--r--storage/innobase/row/row0sel.cc14
-rw-r--r--storage/innobase/row/row0uins.cc27
-rw-r--r--storage/innobase/row/row0umod.cc49
-rw-r--r--storage/innobase/row/row0upd.cc41
-rw-r--r--storage/innobase/srv/srv0mon.cc112
-rw-r--r--storage/innobase/srv/srv0srv.cc76
-rw-r--r--storage/innobase/srv/srv0start.cc244
-rw-r--r--storage/innobase/trx/trx0purge.cc11
-rw-r--r--storage/innobase/trx/trx0rseg.cc8
-rw-r--r--storage/innobase/trx/trx0sys.cc10
-rw-r--r--storage/innobase/trx/trx0trx.cc12
-rw-r--r--storage/innobase/trx/trx0undo.cc9
-rw-r--r--storage/maria/CMakeLists.txt1
-rw-r--r--storage/maria/aria_chk.c12
-rw-r--r--storage/maria/aria_dump_log.c10
-rw-r--r--storage/maria/aria_pack.c8
-rw-r--r--storage/maria/aria_read_log.c13
-rw-r--r--storage/maria/aria_s3_copy.152
-rw-r--r--storage/maria/aria_s3_copy.cc8
-rw-r--r--storage/maria/ha_maria.cc103
-rw-r--r--storage/maria/ha_maria.h9
-rw-r--r--storage/maria/ma_bitmap.c4
-rw-r--r--storage/maria/ma_blockrec.c4
-rw-r--r--storage/maria/ma_check.c21
-rw-r--r--storage/maria/ma_control_file.c2
-rw-r--r--storage/maria/ma_control_file.h2
-rw-r--r--storage/maria/ma_extra.c9
-rw-r--r--storage/maria/ma_info.c8
-rw-r--r--storage/maria/ma_key.c40
-rw-r--r--storage/maria/ma_loghandler.c133
-rw-r--r--storage/maria/ma_loghandler.h4
-rw-r--r--storage/maria/ma_pagecache.c4
-rw-r--r--storage/maria/ma_recovery.c12
-rw-r--r--storage/maria/ma_recovery_util.c2
-rw-r--r--storage/maria/ma_rkey.c1
-rw-r--r--storage/maria/ma_scan.c4
-rw-r--r--storage/maria/ma_write.c9
-rw-r--r--storage/maria/maria_def.h39
-rw-r--r--storage/mroonga/ha_mroonga.cpp87
-rw-r--r--storage/mroonga/ha_mroonga.hpp20
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/r/optimization_count_skip_index_not_equal.result3
-rw-r--r--storage/mroonga/mysql-test/mroonga/storage/t/optimization_count_skip_index_not_equal.test1
-rw-r--r--storage/mroonga/mysql-test/mroonga/wrapper/r/geometry_contains.result2
-rw-r--r--storage/myisam/ha_myisam.cc63
-rw-r--r--storage/myisam/ha_myisam.h169
-rw-r--r--storage/myisam/mi_extra.c4
-rw-r--r--storage/myisam/mi_key.c42
-rw-r--r--storage/myisam/mi_rkey.c2
-rw-r--r--storage/myisam/mi_scan.c4
-rw-r--r--storage/myisam/myisamchk.c10
-rw-r--r--storage/myisam/myisamdef.h13
-rw-r--r--storage/myisam/myisamlog.c5
-rw-r--r--storage/myisam/myisampack.c9
-rw-r--r--storage/myisammrg/ha_myisammrg.cc35
-rw-r--r--storage/myisammrg/ha_myisammrg.h115
-rw-r--r--storage/oqgraph/ha_oqgraph.h7
-rw-r--r--storage/perfschema/ha_perfschema.h6
-rw-r--r--storage/rocksdb/CMakeLists.txt2
-rw-r--r--storage/rocksdb/ha_rocksdb.cc39
-rw-r--r--storage/rocksdb/ha_rocksdb.h16
-rw-r--r--storage/rocksdb/mariadb-ldb.116
-rw-r--r--storage/rocksdb/myrocks_hotbackup.182
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result60
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/select.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test4
-rw-r--r--storage/rocksdb/tools/mysql_ldb.cc1
-rw-r--r--storage/sequence/mysql-test/sequence/group_by.result2
-rw-r--r--storage/sequence/sequence.cc66
-rw-r--r--storage/sphinx/ha_sphinx.h30
-rw-r--r--storage/spider/ha_spider.cc82
-rw-r--r--storage/spider/ha_spider.h13
-rw-r--r--storage/spider/mysql-test/spider/bg/r/spider_fixes.result1
-rw-r--r--storage/spider/mysql-test/spider/bugfix/include/sql_mode_init.inc16
-rw-r--r--storage/spider/mysql-test/spider/bugfix/r/quick_mode_1.result4
-rw-r--r--storage/spider/mysql-test/spider/bugfix/t/mdev_22246.test1
-rw-r--r--storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test4
-rw-r--r--storage/spider/mysql-test/spider/bugfix/t/quick_mode_1.test1
-rw-r--r--storage/spider/mysql-test/spider/r/direct_left_join_nullable.result2
-rw-r--r--storage/spider/mysql-test/spider/r/direct_left_right_join_nullable.result2
-rw-r--r--storage/spider/mysql-test/spider/r/direct_right_join_nullable.result2
-rw-r--r--storage/spider/mysql-test/spider/r/direct_right_left_right_join_nullable.result2
-rw-r--r--storage/spider/mysql-test/spider/r/partition_mrr.result48
-rw-r--r--storage/spider/mysql-test/spider/r/spider_fixes.result1
-rw-r--r--storage/spider/mysql-test/spider/t/partition_mrr.test1
-rw-r--r--storage/spider/spd_conn.cc41
-rw-r--r--storage/spider/spd_conn.h3
-rw-r--r--storage/spider/spd_copy_tables.cc9
-rw-r--r--storage/spider/spd_db_conn.cc3
-rw-r--r--storage/spider/spd_db_include.h2
-rw-r--r--storage/spider/spd_db_mysql.cc89
-rw-r--r--storage/spider/spd_direct_sql.cc2
-rw-r--r--storage/spider/spd_include.h2
-rw-r--r--storage/spider/spd_init_query.h33
-rw-r--r--storage/spider/spd_ping_table.cc8
-rw-r--r--storage/spider/spd_sys_table.cc44
-rw-r--r--storage/spider/spd_table.cc52
-rw-r--r--storage/spider/spd_trx.cc43
-rw-r--r--storage/spider/spd_trx.h6
215 files changed, 3874 insertions, 9759 deletions
diff --git a/storage/archive/archive_reader.c b/storage/archive/archive_reader.c
index 0e02127ea32..85637a04391 100644
--- a/storage/archive/archive_reader.c
+++ b/storage/archive/archive_reader.c
@@ -26,7 +26,7 @@
#define BUFFER_LEN 1024
#define ARCHIVE_ROW_HEADER_SIZE 4
-#define SHOW_VERSION "0.1"
+#define VER "0.1"
static void get_options(int *argc,char * * *argv);
static void print_version(void);
@@ -400,12 +400,6 @@ static void usage(void)
my_print_help(my_long_options);
}
-static void print_version(void)
-{
- printf("%s Ver %s Distrib %s, for %s (%s)\n", my_progname, SHOW_VERSION,
- MYSQL_SERVER_VERSION, SYSTEM_TYPE, MACHINE_TYPE);
-}
-
static void get_options(int *argc, char ***argv)
{
load_defaults_or_exit("my", load_default_groups, argc, argv);
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index 19a0ffe028a..2a8deb431b1 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -132,7 +132,8 @@ extern "C" PSI_file_key arch_key_file_data;
static handler *archive_create_handler(handlerton *hton,
TABLE_SHARE *table,
MEM_ROOT *mem_root);
-int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share);
+static int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share);
+static void archive_update_optimizer_costs(OPTIMIZER_COSTS *costs);
/*
Number of rows that will force a bulk insert.
@@ -205,6 +206,7 @@ static const char *ha_archive_exts[] = {
NullS
};
+
int archive_db_init(void *p)
{
DBUG_ENTER("archive_db_init");
@@ -217,10 +219,10 @@ int archive_db_init(void *p)
archive_hton= (handlerton *)p;
archive_hton->db_type= DB_TYPE_ARCHIVE_DB;
archive_hton->create= archive_create_handler;
- archive_hton->flags= HTON_NO_FLAGS;
archive_hton->discover_table= archive_discover;
archive_hton->tablefile_extensions= ha_archive_exts;
-
+ archive_hton->update_optimizer_costs= archive_update_optimizer_costs;
+ archive_hton->flags= HTON_NO_FLAGS;
DBUG_RETURN(0);
}
@@ -267,7 +269,7 @@ ha_archive::ha_archive(handlerton *hton, TABLE_SHARE *table_arg)
archive_reader_open= FALSE;
}
-int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share)
+static int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share)
{
DBUG_ENTER("archive_discover");
DBUG_PRINT("archive_discover", ("db: '%s' name: '%s'", share->db.str,
@@ -1092,6 +1094,54 @@ int ha_archive::index_init(uint keynr, bool sorted)
DBUG_RETURN(0);
}
+#define ARCHIVE_DECOMPRESS_TIME 0.081034543792841 // See optimizer_costs.txt
+
+static void archive_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ costs->disk_read_ratio= 0.20; // Assume 80 % of data is cached by system
+ costs->row_lookup_cost= 0; // See rnd_pos_time
+ costs->key_lookup_cost= 0; // See key_read_time
+ costs->key_next_find_cost= 0; // Only unique indexes
+ costs->index_block_copy_cost= 0;
+}
+
+
+IO_AND_CPU_COST ha_archive::scan_time()
+{
+ IO_AND_CPU_COST cost;
+ ulonglong blocks;
+ DBUG_ENTER("ha_archive::scan_time");
+
+ blocks= stats.data_file_length / IO_SIZE;
+ cost.io= 0; // No cache
+ cost.cpu= (blocks * DISK_READ_COST * DISK_READ_RATIO +
+ blocks* ARCHIVE_DECOMPRESS_TIME);
+ DBUG_RETURN(cost);
+}
+
+
+IO_AND_CPU_COST ha_archive::keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+{
+ IO_AND_CPU_COST cost= scan_time();
+ /*
+ As these is an unique indexe, assume that we have to scan half the file for
+ each range to find the row.
+ */
+ cost.cpu= cost.cpu * ranges / 2;
+ return cost;
+}
+
+
+IO_AND_CPU_COST ha_archive::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost;
+ /* We have to do one azseek() for each row */
+ cost.io= rows2double(rows);
+ cost.cpu= rows * (DISK_READ_COST * DISK_READ_RATIO + ARCHIVE_DECOMPRESS_TIME);
+ return cost;
+}
+
/*
No indexes, so if we get a request for an index search since we tell
@@ -1116,8 +1166,6 @@ int ha_archive::index_read_idx(uchar *buf, uint index, const uchar *key,
current_k_offset= mkey->key_part->offset;
current_key= key;
current_key_len= key_len;
-
-
DBUG_ENTER("ha_archive::index_read_idx");
rc= rnd_init(TRUE);
diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h
index 2e03ac639b5..00d8a56acba 100644
--- a/storage/archive/ha_archive.h
+++ b/storage/archive/ha_archive.h
@@ -109,6 +109,10 @@ public:
uint max_supported_key_length() const { return sizeof(ulonglong); }
uint max_supported_key_part_length() const { return sizeof(ulonglong); }
ha_rows records() { return share->rows_recorded; }
+ IO_AND_CPU_COST scan_time() override;
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks) override;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
int index_init(uint keynr, bool sorted);
virtual int index_read(uchar * buf, const uchar * key,
uint key_len, enum ha_rkey_function find_flag);
diff --git a/storage/blackhole/ha_blackhole.cc b/storage/blackhole/ha_blackhole.cc
index 0134032351e..343f3c70286 100644
--- a/storage/blackhole/ha_blackhole.cc
+++ b/storage/blackhole/ha_blackhole.cc
@@ -182,6 +182,17 @@ int ha_blackhole::info(uint flag)
DBUG_ENTER("ha_blackhole::info");
bzero((char*) &stats, sizeof(stats));
+ /*
+ The following is required to get replication to work as otherwise
+ test_quick_select() will think the table is empty and thus any
+ update/delete will not have any rows to update.
+ */
+ stats.records= 2;
+ /*
+ Block size should not be 0 as this will cause division by zero
+ in scan_time()
+ */
+ stats.block_size= 8192;
if (flag & HA_STATUS_AUTO)
stats.auto_increment_value= 1;
DBUG_RETURN(0);
diff --git a/storage/columnstore/CMakeLists.txt b/storage/columnstore/CMakeLists.txt
index ebb138c70f0..ab29ffc566b 100644
--- a/storage/columnstore/CMakeLists.txt
+++ b/storage/columnstore/CMakeLists.txt
@@ -28,10 +28,14 @@ CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
add_subdirectory(columnstore)
IF(TARGET columnstore)
+ # Redo logic in cmake/plugin to prevent the attempted creation of *Symlinks package
+ SET(CPACK_COMPONENTS_ALL ${CPACK_COMPONENTS_ALL} columnstore-engineSymlinks)
+ SET(CPACK_COMPONENT_COLUMNSTORE-ENGINESYMLINKS_GROUP columnstore-engine PARENT_SCOPE)
+ SET(CPACK_COMPONENT_COLUMNSTORE-ENGINE_GROUP columnstore-engine PARENT_SCOPE)
# Needed to bump the component changes up to the main scope
APPEND_FOR_CPACK(CPACK_COMPONENTS_ALL)
IF (RPM)
- APPEND_FOR_CPACK(CPACK_RPM_columnstore-engine_PACKAGE_REQUIRES " binutils net-tools python3")
+ APPEND_FOR_CPACK(CPACK_RPM_columnstore-engine_PACKAGE_REQUIRES " binutils net-tools python3 MariaDB-client-compat MariaDB-server-compat")
APPEND_FOR_CPACK(CPACK_RPM_columnstore-engine_PACKAGE_RECOMMENDS " jemalloc")
APPEND_FOR_CPACK(CPACK_RPM_columnstore-engine_USER_FILELIST ";%ignore /var/lib;%ignore /var")
APPEND_FOR_CPACK(CPACK_RPM_columnstore-engine_PACKAGE_CONFLICTS " thrift MariaDB-columnstore-platform MariaDB-columnstore-libs")
diff --git a/storage/columnstore/columnstore b/storage/columnstore/columnstore
-Subproject fa286826cbeb654ec90b6a26f206dd75a5e8be9
+Subproject 58da5eea954dbbce8c954c323dd2c8247e54303
diff --git a/storage/connect/ha_connect.cc b/storage/connect/ha_connect.cc
index 23d3c7c1058..502c1f4af6d 100644
--- a/storage/connect/ha_connect.cc
+++ b/storage/connect/ha_connect.cc
@@ -1614,10 +1614,7 @@ void *ha_connect::GetColumnOption(PGLOBAL g, void *field, PCOLINFO pcf)
pcf->Scale= 0;
pcf->Opt= (fop) ? (int)fop->opt : 0;
- if (fp->field_length >= 0)
- pcf->Length= fp->field_length;
- else
- pcf->Length= 256; // BLOB?
+ pcf->Length= fp->field_length;
pcf->Precision= pcf->Length;
@@ -7400,7 +7397,8 @@ int ha_connect::multi_range_read_next(range_id_t *range_info)
ha_rows ha_connect::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
- uint *flags, Cost_estimate *cost)
+ uint *flags, ha_rows limit,
+ Cost_estimate *cost)
{
/*
This call is here because there is no location where this->table would
@@ -7414,7 +7412,7 @@ ha_rows ha_connect::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
*flags|= HA_MRR_USE_DEFAULT_IMPL;
ha_rows rows= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
- bufsz, flags, cost);
+ bufsz, flags, limit, cost);
xp->g->Mrr= !(*flags & HA_MRR_USE_DEFAULT_IMPL);
return rows;
} // end of multi_range_read_info_const
diff --git a/storage/connect/ha_connect.h b/storage/connect/ha_connect.h
index 71ceb7974ba..c83584a62e4 100644
--- a/storage/connect/ha_connect.h
+++ b/storage/connect/ha_connect.h
@@ -308,13 +308,18 @@ public:
/** @brief
Called in test_quick_select to determine if indexes should be used.
*/
- virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; }
+ virtual IO_AND_CPU_COST scan_time()
+ { return { 0, (double) (stats.records+stats.deleted) * DISK_READ_COST }; };
/** @brief
This method will never be called if you do not implement indexes.
*/
- virtual double read_time(uint, uint, ha_rows rows)
- { return (double) rows / 20.0+1; }
+ virtual IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+ {
+ return { 0, (double) rows * 0.001 };
+ }
+
/*
Everything below are methods that we implement in ha_connect.cc.
@@ -497,7 +502,8 @@ int index_prev(uchar *buf);
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
- uint *flags, Cost_estimate *cost);
+ uint *flags, ha_rows limit,
+ Cost_estimate *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
uint key_parts, uint *bufsz,
uint *flags, Cost_estimate *cost);
diff --git a/storage/connect/mysql-test/connect/r/index.result b/storage/connect/mysql-test/connect/r/index.result
index baebf1f1ebe..fdb44d06ee1 100644
--- a/storage/connect/mysql-test/connect/r/index.result
+++ b/storage/connect/mysql-test/connect/r/index.result
@@ -96,18 +96,25 @@ sexe genre
0 Inconnu
1 Masculin
2 Feminin
-SELECT nom, prenom, genre FROM t1 NATURAL JOIN t2 LIMIT 10;
+# t2 has only 3 rows. Force eq_ref by increasing table scan cost!
+set @@optimizer_scan_setup_cost=10000;
+explain SELECT nom, prenom, genre FROM t1 NATURAL JOIN t2 order by nom,prenom LIMIT 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4545 Using filesort
+1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.sexe 1 Using where
+SELECT nom, prenom, genre FROM t1 NATURAL JOIN t2 order by nom,prenom LIMIT 10;
nom prenom genre
-ESCOURCHE BENEDICTE Feminin
-VICENTE LAURENCE Feminin
-NICOLAS ROGER Masculin
-TESSEREAU MARIE HELENE Feminin
-MOGADOR ALAIN Masculin
-CHAUSSEE ERIC DENIS Masculin
-MAILLOT GEORGES Masculin
-CAMILLE NADINE Feminin
-BRUYERES JEAN MARC Masculin
-LONES GERARD Masculin
+ABBADIE MONIQUE Feminin
+ABBAYE ANNICK Feminin
+ABBAYE GERALD Masculin
+ABBE KATIA Feminin
+ABBE MICHELE Feminin
+ABBE SOPHIE Feminin
+ABBEVILLE PASCAL Masculin
+ABEBERRY PATRICK Masculin
+ABEILLES RENE Masculin
+ABEL JEAN PIERRE Masculin
+set @@optimizer_scan_setup_cost=default;
#
# Another table
#
diff --git a/storage/connect/mysql-test/connect/r/mysql_index.result b/storage/connect/mysql-test/connect/r/mysql_index.result
index 54acc7be08d..b6c34add632 100644
--- a/storage/connect/mysql-test/connect/r/mysql_index.result
+++ b/storage/connect/mysql-test/connect/r/mysql_index.result
@@ -7,7 +7,7 @@ msg char(100) DEFAULT NULL,
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
INSERT INTO t1 VALUES(1,'Un'),(3,'Trois'),(5,'Cinq');
-INSERT INTO t1 VALUES(2,'Two'),(4,'Four'),(6,'Six');
+INSERT INTO t1 VALUES(2,'Two'),(4,'Four'),(6,'Six'), (7,'seven');
SELECT * FROM t1;
id msg
1 Un
@@ -16,6 +16,7 @@ id msg
2 Two
4 Four
6 Six
+7 seven
#
# Make local MYSQL table with indexed id column
#
@@ -35,6 +36,7 @@ id msg
2 Two
4 Four
6 Six
+7 seven
SELECT * FROM t2 WHERE id = 3;
id msg
3 Trois
@@ -49,12 +51,14 @@ SELECT * FROM t2 WHERE id > 4;
id msg
5 Cinq
6 Six
+7 seven
SELECT * FROM t2 WHERE id >= 3;
id msg
3 Trois
4 Four
5 Cinq
6 Six
+7 seven
SELECT * FROM t2 WHERE id < 3;
id msg
1 Un
@@ -64,6 +68,10 @@ id msg
1 Un
5 Cinq
6 Six
+7 seven
+explain SELECT * FROM t2 WHERE id <= 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 2 Using where
SELECT * FROM t2 WHERE id <= 3;
id msg
1 Un
@@ -87,6 +95,7 @@ id msg
4 Four
5 Cinq
6 Six
+7 seven
UPDATE t2 SET msg = 'Five' WHERE id = 5;
Warnings:
Note 1105 t1: 1 affected rows
@@ -98,6 +107,7 @@ id msg
2 Two
4 Four
6 Six
+7 seven
DELETE FROM t2 WHERE id = 4;
Warnings:
Note 1105 t1: 1 affected rows
@@ -108,6 +118,7 @@ id msg
5 Five
2 Two
6 Six
+7 seven
DROP TABLE t2;
DROP TABLE t1;
#
diff --git a/storage/connect/mysql-test/connect/t/index.test b/storage/connect/mysql-test/connect/t/index.test
index 47bfbae7680..546d5184e9f 100644
--- a/storage/connect/mysql-test/connect/t/index.test
+++ b/storage/connect/mysql-test/connect/t/index.test
@@ -57,8 +57,11 @@ create table t2
genre CHAR(8) NOT NULL
) ENGINE=CONNECT TABLE_TYPE=CSV FILE_NAME='sexe.csv' SEP_CHAR=';' ENDING=2;
SELECT * FROM t2;
-SELECT nom, prenom, genre FROM t1 NATURAL JOIN t2 LIMIT 10;
-
+--echo # t2 has only 3 rows. Force eq_ref by increasing table scan cost!
+set @@optimizer_scan_setup_cost=10000;
+explain SELECT nom, prenom, genre FROM t1 NATURAL JOIN t2 order by nom,prenom LIMIT 10;
+SELECT nom, prenom, genre FROM t1 NATURAL JOIN t2 order by nom,prenom LIMIT 10;
+set @@optimizer_scan_setup_cost=default;
--echo #
--echo # Another table
--echo #
diff --git a/storage/connect/mysql-test/connect/t/mysql_index.test b/storage/connect/mysql-test/connect/t/mysql_index.test
index cb4a332cdf8..a70ea3fd6f9 100644
--- a/storage/connect/mysql-test/connect/t/mysql_index.test
+++ b/storage/connect/mysql-test/connect/t/mysql_index.test
@@ -30,7 +30,7 @@ CREATE TABLE t1 (
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
INSERT INTO t1 VALUES(1,'Un'),(3,'Trois'),(5,'Cinq');
-INSERT INTO t1 VALUES(2,'Two'),(4,'Four'),(6,'Six');
+INSERT INTO t1 VALUES(2,'Two'),(4,'Four'),(6,'Six'), (7,'seven');
SELECT * FROM t1;
--echo #
@@ -54,6 +54,7 @@ SELECT * FROM t2 WHERE id > 4;
SELECT * FROM t2 WHERE id >= 3;
SELECT * FROM t2 WHERE id < 3;
SELECT * FROM t2 WHERE id < 2 OR id > 4;
+explain SELECT * FROM t2 WHERE id <= 3;
SELECT * FROM t2 WHERE id <= 3;
SELECT * FROM t2 WHERE id BETWEEN 3 AND 5;
SELECT * FROM t2 WHERE id > 2 AND id < 6;
diff --git a/storage/connect/tabext.cpp b/storage/connect/tabext.cpp
index 96a9f70e4a3..6ece7115ea5 100644
--- a/storage/connect/tabext.cpp
+++ b/storage/connect/tabext.cpp
@@ -466,7 +466,7 @@ bool TDBEXT::MakeSQL(PGLOBAL g, bool cnt)
if (Quote) {
// Tabname can have both database and table identifiers, we need to parse
- if (res= strstr(buf, "."))
+ if ((res= strstr(buf, ".")))
{
// Parse schema
my_len= res - buf + 1;
diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h
index 043183444da..856bb789320 100644
--- a/storage/csv/ha_tina.h
+++ b/storage/csv/ha_tina.h
@@ -124,7 +124,12 @@ public:
/*
Called in test_quick_select to determine if indexes should be used.
*/
- virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; }
+ virtual IO_AND_CPU_COST scan_time()
+ {
+ return
+ { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE,
+ (stats.records+stats.deleted) * ROW_NEXT_FIND_COST };
+ }
/* The next method will never be called */
virtual bool fast_key_read() { return 1;}
/*
diff --git a/storage/example/ha_example.h b/storage/example/ha_example.h
index 5d067f7cda9..78b07ed5d9f 100644
--- a/storage/example/ha_example.h
+++ b/storage/example/ha_example.h
@@ -148,15 +148,40 @@ public:
uint max_supported_key_length() const { return 0; }
/** @brief
- Called in test_quick_select to determine if indexes should be used.
+ Called in test_quick_select to determine cost of table scan
*/
- virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; }
+ virtual IO_AND_CPU_COST scan_time()
+ {
+ IO_AND_CPU_COST cost;
+ /* 0 blocks, 0.001 ms / row */
+ cost.io= (double) (stats.records+stats.deleted) * DISK_READ_COST;
+ cost.cpu= 0;
+ return cost;
+ }
/** @brief
This method will never be called if you do not implement indexes.
*/
- virtual double read_time(uint, uint, ha_rows rows)
- { return (double) rows / 20.0+1; }
+ virtual IO_AND_CPU_COST keyread_time(uint, ulong, ha_rows rows,
+ ulonglong blocks)
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= blocks * DISK_READ_COST;
+ cost.cpu= (double) rows * 0.001;
+ return cost;
+ }
+
+ /** @brief
+ Cost of fetching 'rows' records through rnd_pos()
+ */
+ virtual IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
+ {
+ IO_AND_CPU_COST cost;
+ /* 0 blocks, 0.001 ms / row */
+ cost.io= 0;
+ cost.cpu= (double) rows * DISK_READ_COST;
+ return cost;
+ }
/*
Everything below are methods that we implement in ha_example.cc.
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index 25b12de3cd5..efb598bf91e 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -460,6 +460,20 @@ static void init_federated_psi_keys(void)
#endif /* HAVE_PSI_INTERFACE */
/*
+ Federated doesn't need costs.disk_read_ratio as everything is one a
+ remote server and nothing is cached locally
+*/
+
+static void federated_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ Setting disk_read_ratios to 1.0, ensures we are using the costs
+ from rnd_pos_time() and scan_time()
+ */
+ costs->disk_read_ratio= 1.0;
+}
+
+/*
Initialize the federated handler.
SYNOPSIS
@@ -485,6 +499,7 @@ int federated_db_init(void *p)
federated_hton->rollback= federated_rollback;
federated_hton->create= federated_create_handler;
federated_hton->drop_table= [](handlerton *, const char*) { return -1; };
+ federated_hton->update_optimizer_costs= federated_update_optimizer_costs;
federated_hton->flags= HTON_ALTER_NOT_SUPPORTED | HTON_NO_PARTITION;
/*
@@ -909,7 +924,6 @@ ha_federated::ha_federated(handlerton *hton,
bzero(&bulk_insert, sizeof(bulk_insert));
}
-
/*
Convert MySQL result set row to handler internal format
@@ -2879,11 +2893,11 @@ int ha_federated::info(uint flag)
&error);
/*
- size of IO operations (This is based on a good guess, no high science
- involved)
+ Size of IO operations. This is used to calculate time to scan a table.
+ See handler.cc::keyread_time
*/
if (flag & HA_STATUS_CONST)
- stats.block_size= 4096;
+ stats.block_size= 1500; // Typical size of an TCP packet
}
diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h
index fe729f08413..b5ee49755cb 100644
--- a/storage/federated/ha_federated.h
+++ b/storage/federated/ha_federated.h
@@ -180,23 +180,26 @@ public:
The reason for "records * 1000" is that such a large number forces
this to use indexes "
*/
- double scan_time()
+
+ IO_AND_CPU_COST scan_time()
{
DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
- return (double)(stats.records*1000);
+ return
+ {
+ 0,
+ (double) (stats.mean_rec_length * stats.records)/8192 * DISK_READ_COST+
+ 1000,
+ };
}
- /*
- The next method will never be called if you do not implement indexes.
- */
- double read_time(uint index, uint ranges, ha_rows rows)
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
{
- /*
- Per Brian, this number is bugus, but this method must be implemented,
- and at a later date, he intends to document this issue for handler code
- */
- return (double) rows / 20.0+1;
+ return {0, (double) (ranges + rows) * DISK_READ_COST };
+ }
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
+ {
+ return {0, (double) rows * DISK_READ_COST };
}
-
const key_map *keys_to_use_for_scanning() { return &key_map_full; }
/*
Everything below are methods that we implment in ha_federated.cc.
@@ -240,16 +243,11 @@ public:
void position(const uchar *record); //required
/*
A ref is a pointer inside a local buffer. It is not comparable to
- other ref's. This is never called as HA_NON_COMPARABLE_ROWID is set.
+ other ref's.
*/
int cmp_ref(const uchar *ref1, const uchar *ref2)
{
-#ifdef NOT_YET
- DBUG_ASSERT(0);
- return 0;
-#else
- return handler::cmp_ref(ref1,ref2); /* Works if table scan is used */
-#endif
+ return handler::cmp_ref(ref1,ref2); /* Works if table scan is used */
}
int info(uint); //required
int extra(ha_extra_function operation);
@@ -285,4 +283,3 @@ public:
int execute_simple_query(const char *query, int len);
int reset(void);
};
-
diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc
index 598886b8915..62a71aa6db6 100644
--- a/storage/federatedx/ha_federatedx.cc
+++ b/storage/federatedx/ha_federatedx.cc
@@ -412,6 +412,20 @@ static select_handler*
create_federatedx_select_handler(THD* thd, SELECT_LEX *sel);
/*
+ Federated doesn't need costs.disk_read_ratio as everything is one a remote
+ server and nothing is cached locally
+*/
+
+static void federatedx_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ Setting disk_read_ratios to 1.0, ensures we are using the costs
+ from rnd_pos_time() and scan_time()
+ */
+ costs->disk_read_ratio= 0.0;
+}
+
+/*
Initialize the federatedx handler.
SYNOPSIS
@@ -443,6 +457,7 @@ int federatedx_db_init(void *p)
federatedx_hton->flags= HTON_ALTER_NOT_SUPPORTED;
federatedx_hton->create_derived= create_federatedx_derived_handler;
federatedx_hton->create_select= create_federatedx_select_handler;
+ federatedx_hton->update_optimizer_costs= federatedx_update_optimizer_costs;
if (mysql_mutex_init(fe_key_mutex_federatedx,
&federatedx_mutex, MY_MUTEX_INIT_FAST))
@@ -3098,11 +3113,11 @@ int ha_federatedx::info(uint flag)
if (flag & (HA_STATUS_VARIABLE | HA_STATUS_CONST))
{
/*
- size of IO operations (This is based on a good guess, no high science
- involved)
+ Size of IO operations. This is used to calculate time to scan a table.
+ See handler.cc::keyread_time
*/
if (flag & HA_STATUS_CONST)
- stats.block_size= 4096;
+ stats.block_size= 1500; // Typical size of an TCP packet
if ((*iop)->table_metadata(&stats, share->table_name,
(uint)share->table_name_length, flag))
diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h
index 3573c658b11..a67fe1efa8f 100644
--- a/storage/federatedx/ha_federatedx.h
+++ b/storage/federatedx/ha_federatedx.h
@@ -222,7 +222,6 @@ public:
virtual int seek_position(FEDERATEDX_IO_RESULT **io_result,
const void *ref)=0;
virtual void set_thd(void *thd) { }
-
};
@@ -365,29 +364,31 @@ public:
Talk to Kostja about this - how to get the
number of rows * ...
disk scan time on other side (block size, size of the row) + network time ...
- The reason for "records * 1000" is that such a large number forces
- this to use indexes "
+ The reason for "1000" is that such a large number forces this to use indexes "
*/
- double scan_time()
+ IO_AND_CPU_COST scan_time()
{
DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
- return (double)(stats.records*1000);
+ return
+ {
+ 0,
+ (double) (stats.mean_rec_length * stats.records)/8192 * DISK_READ_COST+
+ 1000,
+ };
}
- /*
- The next method will never be called if you do not implement indexes.
- */
- double read_time(uint index, uint ranges, ha_rows rows)
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+ {
+ return {0, (double) (ranges + rows) * DISK_READ_COST };
+ }
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
{
- /*
- Per Brian, this number is bugus, but this method must be implemented,
- and at a later date, he intends to document this issue for handler code
- */
- return (double) rows / 20.0+1;
+ return {0, (double) rows * DISK_READ_COST };
}
const key_map *keys_to_use_for_scanning() { return &key_map_full; }
/*
- Everything below are methods that we implment in ha_federatedx.cc.
+ Everything below are methods that we implement in ha_federatedx.cc.
Most of these methods are not obligatory, skip them and
MySQL will treat them as not implemented
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index 5f7f0c1efa0..cc7dc79e508 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -42,6 +42,28 @@ static int heap_drop_table(handlerton *hton, const char *path)
return error == ENOENT ? -1 : error;
}
+/* See optimizer_costs.txt for how the following values where calculated */
+#define HEAP_ROW_NEXT_FIND_COST 8.0166e-06 // For table scan
+#define BTREE_KEY_NEXT_FIND_COST 0.00007739 // For binary tree scan
+#define HEAP_LOOKUP_COST 0.00016097 // Heap lookup cost
+
+static void heap_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ A lot of values are 0 as heap supports all needed xxx_time() functions
+ */
+ costs->disk_read_cost=0; // All data in memory
+ costs->disk_read_ratio= 0.0; // All data in memory
+ costs->key_next_find_cost= 0;
+ costs->key_copy_cost= 0; // Set in keyread_time()
+ costs->row_copy_cost= 2.334e-06; // This is small as its just a memcpy
+ costs->row_lookup_cost= 0; // Direct pointer
+ costs->row_next_find_cost= 0;
+ costs->key_lookup_cost= 0;
+ costs->key_next_find_cost= 0;
+ costs->index_block_copy_cost= 0;
+}
+
int heap_init(void *p)
{
handlerton *heap_hton;
@@ -53,6 +75,7 @@ int heap_init(void *p)
heap_hton->create= heap_create_handler;
heap_hton->panic= heap_panic;
heap_hton->drop_table= heap_drop_table;
+ heap_hton->update_optimizer_costs= heap_update_optimizer_costs;
heap_hton->flags= HTON_CAN_RECREATE;
return 0;
@@ -73,7 +96,8 @@ static handler *heap_create_handler(handlerton *hton,
ha_heap::ha_heap(handlerton *hton, TABLE_SHARE *table_arg)
:handler(hton, table_arg), file(0), records_changed(0), key_stat_version(0),
internal_table(0)
-{}
+{
+}
/*
Hash index statistics is updated (copied from HP_KEYDEF::hash_buckets to
@@ -228,6 +252,41 @@ void ha_heap::update_key_stats()
}
+IO_AND_CPU_COST ha_heap::keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+{
+ KEY *key=table->key_info+index;
+ if (key->algorithm == HA_KEY_ALG_BTREE)
+ {
+ double lookup_cost;
+ lookup_cost= ranges * costs->key_cmp_cost * log2(stats.records+1);
+ return {0, ranges * lookup_cost + (rows-ranges) * BTREE_KEY_NEXT_FIND_COST };
+ }
+ else
+ {
+ return {0, (ranges * HEAP_LOOKUP_COST +
+ (rows-ranges) * BTREE_KEY_NEXT_FIND_COST) };
+ }
+}
+
+
+IO_AND_CPU_COST ha_heap::scan_time()
+{
+ return {0, (double) (stats.records+stats.deleted) * HEAP_ROW_NEXT_FIND_COST };
+}
+
+
+IO_AND_CPU_COST ha_heap::rnd_pos_time(ha_rows rows)
+{
+ /*
+ The row pointer is a direct pointer to the block. Thus almost instant
+ in practice.
+ Note that ha_rnd_pos_time() will add ROW_COPY_COST to this result
+ */
+ return { 0, 0 };
+}
+
+
int ha_heap::write_row(const uchar * buf)
{
int res;
diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h
index 18e0d1a92d5..eed91176136 100644
--- a/storage/heap/ha_heap.h
+++ b/storage/heap/ha_heap.h
@@ -37,15 +37,15 @@ class ha_heap final : public handler
public:
ha_heap(handlerton *hton, TABLE_SHARE *table);
~ha_heap() = default;
- handler *clone(const char *name, MEM_ROOT *mem_root);
- const char *index_type(uint inx)
+ handler *clone(const char *name, MEM_ROOT *mem_root) override;
+ const char *index_type(uint inx) override
{
return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ?
"BTREE" : "HASH");
}
/* Rows also use a fixed-size format */
- enum row_type get_row_type() const { return ROW_TYPE_FIXED; }
- ulonglong table_flags() const
+ enum row_type get_row_type() const override { return ROW_TYPE_FIXED; }
+ ulonglong table_flags() const override
{
return (HA_FAST_KEY_READ | HA_NO_BLOBS | HA_NULL_IN_KEY |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
@@ -53,73 +53,73 @@ public:
HA_REC_NOT_IN_SEQ | HA_CAN_INSERT_DELAYED | HA_NO_TRANSACTIONS |
HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT | HA_CAN_HASH_KEYS);
}
- ulong index_flags(uint inx, uint part, bool all_parts) const
+ ulong index_flags(uint inx, uint part, bool all_parts) const override
{
return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ?
HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_READ_RANGE :
HA_ONLY_WHOLE_INDEX | HA_KEY_SCAN_NOT_ROR);
}
- const key_map *keys_to_use_for_scanning() { return &btree_keys; }
- uint max_supported_keys() const { return MAX_KEY; }
- uint max_supported_key_part_length() const { return MAX_KEY_LENGTH; }
- double scan_time()
- { return (double) (stats.records+stats.deleted) / 20.0+10; }
- double read_time(uint index, uint ranges, ha_rows rows)
- { return (double) (rows +1)/ 20.0; }
- double keyread_time(uint index, uint ranges, ha_rows rows)
- { return (double) (rows + ranges) / 20.0 ; }
- double avg_io_cost()
- { return 0.05; } /* 1/20 */
- int open(const char *name, int mode, uint test_if_locked);
- int close(void);
- void set_keys_for_scanning(void);
- int write_row(const uchar * buf);
- int update_row(const uchar * old_data, const uchar * new_data);
- int delete_row(const uchar * buf);
- virtual void get_auto_increment(ulonglong offset, ulonglong increment,
- ulonglong nb_desired_values,
- ulonglong *first_value,
- ulonglong *nb_reserved_values);
+ const key_map *keys_to_use_for_scanning() override { return &btree_keys; }
+ uint max_supported_keys() const override { return MAX_KEY; }
+ uint max_supported_key_part_length() const override { return MAX_KEY_LENGTH; }
+ IO_AND_CPU_COST scan_time() override;
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks) override;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
+ /* 0 for avg_io_cost ensures that there are no read-block calculations */
+
+ int open(const char *name, int mode, uint test_if_locked) override;
+ int close(void) override;
+ int write_row(const uchar * buf) override;
+ int update_row(const uchar * old_data, const uchar * new_data) override;
+ int delete_row(const uchar * buf) override;
+ void get_auto_increment(ulonglong offset, ulonglong increment,
+ ulonglong nb_desired_values,
+ ulonglong *first_value,
+ ulonglong *nb_reserved_values) override;
int index_read_map(uchar * buf, const uchar * key, key_part_map keypart_map,
- enum ha_rkey_function find_flag);
- int index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map);
+ enum ha_rkey_function find_flag) override;
+ int index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map)
+ override;
int index_read_idx_map(uchar * buf, uint index, const uchar * key,
key_part_map keypart_map,
- enum ha_rkey_function find_flag);
- int index_next(uchar * buf);
- int index_prev(uchar * buf);
- int index_first(uchar * buf);
- int index_last(uchar * buf);
- int rnd_init(bool scan);
- int rnd_next(uchar *buf);
- int rnd_pos(uchar * buf, uchar *pos);
- void position(const uchar *record);
- int can_continue_handler_scan();
- int info(uint);
- int extra(enum ha_extra_function operation);
- int reset();
- int external_lock(THD *thd, int lock_type);
- int delete_all_rows(void);
- int reset_auto_increment(ulonglong value);
- int disable_indexes(uint mode);
- int enable_indexes(uint mode);
- int indexes_are_disabled(void);
+ enum ha_rkey_function find_flag) override;
+ int index_next(uchar * buf) override;
+ int index_prev(uchar * buf) override;
+ int index_first(uchar * buf) override;
+ int index_last(uchar * buf) override;
+ int rnd_init(bool scan) override;
+ int rnd_next(uchar *buf) override;
+ int rnd_pos(uchar * buf, uchar *pos) override;
+ void position(const uchar *record) override;
+ int can_continue_handler_scan() override;
+ int info(uint) override;
+ int extra(enum ha_extra_function operation) override;
+ int reset() override;
+ int external_lock(THD *thd, int lock_type) override;
+ int delete_all_rows(void) override;
+ int reset_auto_increment(ulonglong value) override;
+ int disable_indexes(uint mode) override;
+ int enable_indexes(uint mode) override;
+ int indexes_are_disabled(void) override;
ha_rows records_in_range(uint inx, const key_range *start_key,
- const key_range *end_key, page_range *pages);
- int delete_table(const char *from);
- void drop_table(const char *name);
- int rename_table(const char * from, const char * to);
- int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info);
- void update_create_info(HA_CREATE_INFO *create_info);
+ const key_range *end_key, page_range *pages) override;
+ int delete_table(const char *from) override;
+ void drop_table(const char *name) override;
+ int rename_table(const char * from, const char * to) override;
+ int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info) override;
+ void update_create_info(HA_CREATE_INFO *create_info) override;
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
- enum thr_lock_type lock_type);
- int cmp_ref(const uchar *ref1, const uchar *ref2)
+ enum thr_lock_type lock_type) override;
+ int cmp_ref(const uchar *ref1, const uchar *ref2) override
{
return memcmp(ref1, ref2, sizeof(HEAP_PTR));
}
- bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes);
- int find_unique_row(uchar *record, uint unique_idx);
+ bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes)
+ override;
+ int find_unique_row(uchar *record, uint unique_idx) override;
private:
void update_key_stats();
+ void set_keys_for_scanning(void);
};
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index a61b762f58b..3b1a285104c 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -1,6 +1,6 @@
# Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2014, 2022, MariaDB Corporation.
+# Copyright (c) 2014, 2023, MariaDB Corporation.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -264,7 +264,6 @@ SET(INNOBASE_SOURCES
include/handler0alter.h
include/hash0hash.h
include/ibuf0ibuf.h
- include/ibuf0ibuf.inl
include/lock0iter.h
include/lock0lock.h
include/lock0lock.inl
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index f28d3929569..9b69fde0408 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -37,7 +37,6 @@ Created 6/2/1994 Heikki Tuuri
#include "btr0defragment.h"
#include "rem0cmp.h"
#include "lock0lock.h"
-#include "ibuf0ibuf.h"
#include "trx0trx.h"
#include "srv0mon.h"
#include "gis0geo.h"
@@ -181,9 +180,8 @@ we allocate pages for the non-leaf levels of the tree.
@param block B-tree root page
@param space tablespace
@return whether the segment header is valid */
-static bool btr_root_fseg_validate(ulint offset,
- const buf_block_t &block,
- const fil_space_t &space)
+bool btr_root_fseg_validate(ulint offset, const buf_block_t &block,
+ const fil_space_t &space)
{
ut_ad(block.page.id().space() == space.id);
const uint16_t hdr= mach_read_from_2(offset + FSEG_HDR_OFFSET +
@@ -213,12 +211,11 @@ ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index)
@param[in] index index tree
@param[in] page page number
@param[in] mode latch mode
-@param[in] merge whether change buffer merge should be attempted
@param[in,out] mtr mini-transaction
@param[out] err error code
@return block */
buf_block_t *btr_block_get(const dict_index_t &index,
- uint32_t page, ulint mode, bool merge,
+ uint32_t page, ulint mode,
mtr_t *mtr, dberr_t *err)
{
dberr_t local_err;
@@ -227,7 +224,7 @@ buf_block_t *btr_block_get(const dict_index_t &index,
buf_block_t *block=
buf_page_get_gen(page_id_t{index.table->space->id, page},
index.table->space->zip_size(), mode, nullptr, BUF_GET,
- mtr, err, merge && !index.is_clust());
+ mtr, err);
ut_ad(!block == (*err != DB_SUCCESS));
if (UNIV_LIKELY(block != nullptr))
@@ -276,7 +273,7 @@ btr_root_block_get(
block=
buf_page_get_gen(page_id_t{index->table->space->id, index->page},
index->table->space->zip_size(), mode, guess, BUF_GET,
- mtr, err, false);
+ mtr, err);
ut_ad(!block == (*err != DB_SUCCESS));
if (UNIV_LIKELY(block != nullptr))
@@ -290,7 +287,6 @@ btr_root_block_get(
*err= DB_PAGE_CORRUPTED;
block= nullptr;
}
- else if (index->is_ibuf());
else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
*block, *index->table->space) ||
!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
@@ -526,47 +522,16 @@ btr_block_reget(mtr_t *mtr, const dict_index_t &index,
}
ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK));
- return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err);
-}
-
-/**************************************************************//**
-Allocates a new file page to be used in an ibuf tree. Takes the page from
-the free list of the tree, which must contain pages!
-@return new allocated block, x-latched */
-static
-buf_block_t*
-btr_page_alloc_for_ibuf(
-/*====================*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr, /*!< in: mtr */
- dberr_t* err) /*!< out: error code */
-{
- buf_block_t *root= btr_get_latched_root(*index, mtr);
- if (UNIV_UNLIKELY(!root))
- return root;
- buf_block_t *new_block=
- buf_page_get_gen(page_id_t(IBUF_SPACE_ID,
- mach_read_from_4(PAGE_HEADER +
- PAGE_BTR_IBUF_FREE_LIST +
- FLST_FIRST + FIL_ADDR_PAGE +
- root->page.frame)),
- 0, RW_X_LATCH, nullptr, BUF_GET, mtr, err);
- if (new_block)
- *err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block,
- PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
- ut_d(if (*err == DB_SUCCESS)
- flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
- return new_block;
+ return btr_block_get(index, id.page_no(), rw_latch, mtr, err);
}
/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents!
@retval NULL if no page could be allocated */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+MY_ATTRIBUTE((nonnull, warn_unused_result))
buf_block_t*
-btr_page_alloc_low(
-/*===============*/
+btr_page_alloc(
dict_index_t* index, /*!< in: index */
uint32_t hint_page_no, /*!< in: hint of a good page */
byte file_direction, /*!< in: direction where a possible
@@ -580,6 +545,8 @@ btr_page_alloc_low(
page should be initialized. */
dberr_t* err) /*!< out: error code */
{
+ ut_ad(level < BTR_MAX_NODE_LEVEL);
+
const auto savepoint= mtr->get_savepoint();
buf_block_t *root= btr_root_block_get(index, RW_NO_LATCH, mtr, err);
if (UNIV_UNLIKELY(!root))
@@ -607,54 +574,6 @@ btr_page_alloc_low(
true, mtr, init_mtr, err);
}
-/**************************************************************//**
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents!
-@retval NULL if no page could be allocated */
-buf_block_t*
-btr_page_alloc(
-/*===========*/
- dict_index_t* index, /*!< in: index */
- uint32_t hint_page_no, /*!< in: hint of a good page */
- byte file_direction, /*!< in: direction where a possible
- page split is made */
- ulint level, /*!< in: level where the page is placed
- in the tree */
- mtr_t* mtr, /*!< in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr, /*!< in/out: mini-transaction
- for x-latching and initializing
- the page */
- dberr_t* err) /*!< out: error code */
-{
- ut_ad(level < BTR_MAX_NODE_LEVEL);
- return index->is_ibuf()
- ? btr_page_alloc_for_ibuf(index, mtr, err)
- : btr_page_alloc_low(index, hint_page_no, file_direction, level,
- mtr, init_mtr, err);
-}
-
-/**************************************************************//**
-Frees a page used in an ibuf tree. Puts the page to the free list of the
-ibuf tree. */
-static
-dberr_t
-btr_page_free_for_ibuf(
-/*===================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
- buf_block_t *root= btr_get_latched_root(*index, mtr);
- dberr_t err=
- flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
- ut_d(if (err == DB_SUCCESS)
- flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
- return err;
-}
-
/** Free an index page.
@param[in,out] index index tree
@param[in,out] block block to be freed
@@ -687,9 +606,6 @@ dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
The page will be freed, so previous changes to it by this
mini-transaction should not matter. */
- if (index->is_ibuf())
- return btr_page_free_for_ibuf(index, block, mtr);
-
fil_space_t *space= index->table->space;
dberr_t err;
@@ -772,8 +688,7 @@ btr_node_ptr_get_child(
return btr_block_get(
*index, btr_node_ptr_get_child_page_no(node_ptr, offsets),
- RW_SX_LATCH, btr_page_get_level(page_align(node_ptr)) == 1,
- mtr, err);
+ RW_SX_LATCH, mtr, err);
}
MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result))
@@ -1036,77 +951,32 @@ btr_create(
mtr_t* mtr,
dberr_t* err)
{
- buf_block_t* block;
-
ut_ad(mtr->is_named_space(space));
ut_ad(index_id != BTR_FREED_INDEX_ID);
ut_ad(index || space == fil_system.sys_space);
- /* Create the two new segments (one, in the case of an ibuf tree) for
- the index tree; the segment headers are put on the allocated root page
- (for an ibuf tree, not in the root, but on a separate ibuf header
- page) */
-
- if (UNIV_UNLIKELY(type & DICT_IBUF)) {
- /* Allocate first the ibuf header page */
- buf_block_t* ibuf_hdr_block = fseg_create(
- space, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr, err);
-
- if (ibuf_hdr_block == NULL) {
- return(FIL_NULL);
- }
-
- ut_ad(ibuf_hdr_block->page.id().page_no()
- == IBUF_HEADER_PAGE_NO);
- /* Allocate then the next page to the segment: it will be the
- tree root page */
-
- block = fseg_alloc_free_page_general(
- buf_block_get_frame(ibuf_hdr_block)
- + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- IBUF_TREE_ROOT_PAGE_NO,
- FSP_UP, false, mtr, mtr, err);
-
- if (block == NULL) {
- return(FIL_NULL);
- }
-
- ut_ad(block->page.id() == page_id_t(0,IBUF_TREE_ROOT_PAGE_NO));
+ /* Create the two new segments for the index tree;
+ the segment headers are put on the allocated root page */
- flst_init(block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
- } else {
- block = fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_TOP,
- mtr, err);
+ buf_block_t *block = fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_TOP,
+ mtr, err);
- if (block == NULL) {
- return(FIL_NULL);
- }
+ if (!block) {
+ return FIL_NULL;
+ }
- if (!fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr,
- err, false, block)) {
- /* Not enough space for new segment, free root
- segment before return. */
- btr_free_root(block, *space, mtr);
- return(FIL_NULL);
- }
+ if (!fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr,
+ err, false, block)) {
+ /* Not enough space for new segment, free root
+ segment before return. */
+ btr_free_root(block, *space, mtr);
+ return FIL_NULL;
}
ut_ad(!page_has_siblings(block->page.frame));
btr_root_page_init(block, index_id, index, mtr);
- /* We reset the free bits for the page in a separate
- mini-transaction to allow creation of several trees in the
- same mtr, otherwise the latch on a bitmap page would prevent
- it because of the latching order.
-
- Note: Insert Buffering is disabled for temporary tables given that
- most temporary tables are smaller in size and short-lived. */
- if (!(type & DICT_CLUSTERED)
- && (!index || !index->table->is_temporary())) {
- ibuf_reset_free_bits(block);
- }
-
/* In the following assertion we test that two records of maximum
allowed size fit on the root page: this fact is needed to ensure
correctness of split algorithms */
@@ -1258,7 +1128,7 @@ void btr_drop_temporary_table(const dict_table_t &table)
{
if (buf_block_t *block= buf_page_get_low({SRV_TMP_SPACE_ID, index->page}, 0,
RW_X_LATCH, nullptr, BUF_GET, &mtr,
- nullptr, false))
+ nullptr))
{
btr_free_but_not_root(block, MTR_LOG_NO_REDO);
mtr.set_log_mode(MTR_LOG_NO_REDO);
@@ -1429,18 +1299,18 @@ static dberr_t btr_page_reorganize_low(page_cur_t *cursor, mtr_t *mtr)
if (page_get_max_trx_id(block->page.frame))
/* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than
clustered index root pages. */
- ut_ad(dict_index_is_sec_or_ibuf(cursor->index)
+ ut_ad(!cursor->index->is_primary()
? page_is_leaf(block->page.frame)
: block->page.id().page_no() == cursor->index->page);
else
/* PAGE_MAX_TRX_ID is unused in clustered index pages (other than
the root where it is repurposed as PAGE_ROOT_AUTO_INC), non-leaf
pages, and in temporary tables. It was always zero-initialized in
- page_create(). PAGE_MAX_TRX_ID must be nonzero on
- dict_index_is_sec_or_ibuf() leaf pages. */
+ page_create(). PAGE_MAX_TRX_ID must be nonzero on secondary index
+ leaf pages. */
ut_ad(cursor->index->table->is_temporary() ||
!page_is_leaf(block->page.frame) ||
- !dict_index_is_sec_or_ibuf(cursor->index));
+ cursor->index->is_primary());
#endif
const uint16_t data_size1= page_get_data_size(old->page.frame);
@@ -1640,15 +1510,7 @@ static dberr_t btr_page_reorganize_low(page_cur_t *cursor, mtr_t *mtr)
return DB_SUCCESS;
}
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
-
+/** Reorganize an index page.
@return error code
@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */
dberr_t
@@ -1667,15 +1529,7 @@ btr_page_reorganize_block(
return btr_page_reorganize_low(&cur, mtr);
}
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
-
+/** Reorganize an index page.
@param cursor page cursor
@param mtr mini-transaction
@return error code
@@ -1901,6 +1755,7 @@ btr_root_raise_and_insert(
ut_ad(!page_is_empty(root->page.frame));
index = btr_cur_get_index(cursor);
ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable));
+ ut_ad(!index->is_spatial());
#ifdef UNIV_ZIP_DEBUG
ut_a(!root_page_zip
|| page_zip_validate(root_page_zip, root->page.frame, index));
@@ -1916,12 +1771,11 @@ btr_root_raise_and_insert(
return nullptr;
}
- if (index->is_ibuf()) {
- } else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
- *root, *index->table->space)
- || !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
- *root, *index->table->space)) {
- return nullptr;
+ if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
+ *root, *index->table->space)
+ || !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
+ *root, *index->table->space)) {
+ return nullptr;
}
/* Allocate a new page to the tree. Root splitting is done by first
@@ -1987,18 +1841,12 @@ btr_root_raise_and_insert(
page_get_infimum_rec(root->page.frame));
}
- /* Move any existing predicate locks */
- if (dict_index_is_spatial(index)) {
- lock_prdt_rec_move(new_block, root_id);
- } else {
- btr_search_move_or_delete_hash_entries(
- new_block, root);
- }
+ btr_search_move_or_delete_hash_entries(new_block, root);
}
constexpr uint16_t max_trx_id = PAGE_HEADER + PAGE_MAX_TRX_ID;
- if (dict_index_is_sec_or_ibuf(index)) {
- /* In secondary indexes and the change buffer,
+ if (!index->is_primary()) {
+ /* In secondary indexes,
PAGE_MAX_TRX_ID can be reset on the root page, because
the field only matters on leaf pages, and the root no
longer is a leaf page. (Older versions of InnoDB did
@@ -2048,16 +1896,8 @@ btr_root_raise_and_insert(
/* Build the node pointer (= node key and page address) for the
child */
- if (dict_index_is_spatial(index)) {
- rtr_mbr_t new_mbr;
-
- rtr_page_cal_mbr(index, new_block, &new_mbr, *heap);
- node_ptr = rtr_index_build_node_ptr(
- index, &new_mbr, rec, new_page_no, *heap);
- } else {
- node_ptr = dict_index_build_node_ptr(
- index, rec, new_page_no, *heap, level);
- }
+ node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, *heap,
+ level);
/* The node pointer must be marked as the predefined minimum record,
as there is no lower alphabetical limit to records in the leftmost
node of a level: */
@@ -2090,13 +1930,6 @@ btr_root_raise_and_insert(
to new_block at this point. Thus, the data should fit. */
ut_a(node_ptr_rec);
- /* We play safe and reset the free bits for the new page */
-
- if (!dict_index_is_clust(index)
- && !index->table->is_temporary()) {
- ibuf_reset_free_bits(new_block);
- }
-
page_cursor->block = new_block;
page_cursor->index = index;
@@ -2462,10 +2295,9 @@ btr_insert_on_non_leaf_level(
rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
rtr_info_update_btr(&cursor, &rtr_info);
- err = rtr_search_to_nth_level(level, tuple,
- PAGE_CUR_RTREE_INSERT,
- BTR_CONT_MODIFY_TREE,
- &cursor, mtr);
+ err = rtr_search_to_nth_level(&cursor, nullptr, tuple,
+ BTR_CONT_MODIFY_TREE, mtr,
+ PAGE_CUR_RTREE_INSERT, level);
} else {
err = btr_cur_search_to_nth_level(level, tuple, RW_X_LATCH,
&cursor, mtr);
@@ -2586,7 +2418,7 @@ btr_attach_half_pages(
ut_ad(mtr->memo_contains(index->lock,
MTR_MEMO_X_LOCK));
prev_block = btr_block_get(*index, prev_page_no,
- RW_X_LATCH, !level, mtr);
+ RW_X_LATCH, mtr);
}
#endif
}
@@ -2598,7 +2430,7 @@ btr_attach_half_pages(
ut_ad(mtr->memo_contains(index->lock,
MTR_MEMO_X_LOCK));
next_block = btr_block_get(*index, next_page_no,
- RW_X_LATCH, !level, mtr);
+ RW_X_LATCH, mtr);
}
#endif
}
@@ -2746,10 +2578,9 @@ btr_insert_into_right_sibling(
page_t* next_page;
btr_cur_t next_father_cursor;
rec_t* rec = nullptr;
- ulint max_size;
next_block = btr_block_get(*cursor->index(), next_page_no, RW_X_LATCH,
- page_is_leaf(page), mtr);
+ mtr);
if (UNIV_UNLIKELY(!next_block)) {
return nullptr;
}
@@ -2772,8 +2603,6 @@ btr_insert_into_right_sibling(
return nullptr;
}
- max_size = page_get_max_insert_size_after_reorganize(next_page, 1);
-
/* Extends gap lock for the next page */
if (is_leaf && cursor->index()->has_locking()) {
lock_update_node_pointer(block, next_block);
@@ -2783,15 +2612,6 @@ btr_insert_into_right_sibling(
n_ext, mtr);
if (!rec) {
- if (is_leaf
- && next_block->page.zip.ssize
- && !dict_index_is_clust(cursor->index())
- && !cursor->index()->table->is_temporary()) {
- /* Reset the IBUF_BITMAP_FREE bits, because
- page_cur_tuple_insert() will have attempted page
- reorganize before failing. */
- ibuf_reset_free_bits(next_block);
- }
return nullptr;
}
@@ -2829,34 +2649,12 @@ btr_insert_into_right_sibling(
}
ut_ad(rec_offs_validate(rec, cursor->index(), *offsets));
-
- if (is_leaf
- && !dict_index_is_clust(cursor->index())
- && !cursor->index()->table->is_temporary()) {
- /* Update the free bits of the B-tree page in the
- insert buffer bitmap. */
-
- if (next_block->page.zip.ssize) {
- ibuf_update_free_bits_zip(next_block, mtr);
- } else {
- ibuf_update_free_bits_if_full(
- next_block, max_size,
- rec_offs_size(*offsets) + PAGE_DIR_SLOT_SIZE);
- }
- }
-
return(rec);
}
/*************************************************************//**
Moves record list end to another page. Moved records include
split_rec.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return error code */
static
dberr_t
@@ -2912,12 +2710,6 @@ page_move_rec_list_end(
/*************************************************************//**
Moves record list start to another page. Moved records do not include
split_rec.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return error code */
static
dberr_t
@@ -2975,15 +2767,10 @@ btr_page_split_and_insert(
ut_ad(*err == DB_SUCCESS);
ut_ad(dtuple_check_typed(tuple));
+ ut_ad(!cursor->index()->is_spatial());
buf_pool.pages_split++;
- if (cursor->index()->is_spatial()) {
- /* Split rtree page and update parent */
- return rtr_page_split_and_insert(flags, cursor, offsets, heap,
- tuple, n_ext, mtr, err);
- }
-
if (!*heap) {
*heap = mem_heap_create(1024);
}
@@ -3348,13 +3135,6 @@ insert_empty:
/* The insert did not fit on the page: loop back to the
start of the function for a new split */
insert_failed:
- /* We play safe and reset the free bits for new_page */
- if (!dict_index_is_clust(page_cursor->index)
- && !page_cursor->index->table->is_temporary()) {
- ibuf_reset_free_bits(new_block);
- ibuf_reset_free_bits(block);
- }
-
n_iterations++;
ut_ad(n_iterations < 2
|| buf_block_get_page_zip(insert_block));
@@ -3364,17 +3144,6 @@ insert_failed:
}
func_exit:
- /* Insert fit on the page: update the free bits for the
- left and right pages in the same mtr */
-
- if (!dict_index_is_clust(page_cursor->index)
- && !page_cursor->index->table->is_temporary()
- && page_is_leaf(page)) {
-
- ibuf_update_free_bits_for_two_pages_low(
- left_block, right_block, mtr);
- }
-
ut_ad(page_validate(buf_block_get_frame(left_block),
page_cursor->index));
ut_ad(page_validate(buf_block_get_frame(right_block),
@@ -3410,8 +3179,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
if (!prev)
{
ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
- prev= btr_block_get(index, id.page_no(), RW_X_LATCH,
- page_is_leaf(block.page.frame), mtr, &err);
+ prev= btr_block_get(index, id.page_no(), RW_X_LATCH, mtr, &err);
if (UNIV_UNLIKELY(!prev))
return err;
}
@@ -3426,8 +3194,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
if (!next)
{
ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
- next= btr_block_get(index, id.page_no(), RW_X_LATCH,
- page_is_leaf(block.page.frame), mtr, &err);
+ next= btr_block_get(index, id.page_no(), RW_X_LATCH, mtr, &err);
if (UNIV_UNLIKELY(!next))
return err;
}
@@ -3452,6 +3219,7 @@ btr_lift_page_up(
must not be empty: use
btr_discard_only_page_on_level if the last
record from the page should be removed */
+ que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr, /*!< in/out: mini-transaction */
dberr_t* err) /*!< out: error code */
{
@@ -3486,7 +3254,8 @@ btr_lift_page_up(
if (index->is_spatial()) {
offsets = rtr_page_get_father_block(
- nullptr, heap, mtr, nullptr, &cursor);
+ nullptr, heap, nullptr, &cursor,
+ thr, mtr);
} else {
offsets = btr_page_get_father_block(offsets, heap,
mtr, &cursor);
@@ -3507,7 +3276,8 @@ btr_lift_page_up(
if (index->is_spatial()) {
offsets = rtr_page_get_father_block(
- nullptr, heap, mtr, nullptr, &cursor);
+ nullptr, heap, nullptr, &cursor, thr,
+ mtr);
} else {
offsets = btr_page_get_father_block(offsets,
heap,
@@ -3638,13 +3408,8 @@ copied:
/* Free the file page */
btr_page_free(index, block, mtr);
- /* We play it safe and reset the free bits for the father */
- if (!dict_index_is_clust(index)
- && !index->table->is_temporary()) {
- ibuf_reset_free_bits(father_block);
- }
ut_ad(page_validate(father_block->page.frame, index));
- ut_ad(btr_check_node_ptr(index, father_block, mtr));
+ ut_ad(btr_check_node_ptr(index, father_block, thr, mtr));
return(lift_father_up ? block_orig : father_block);
}
@@ -3711,8 +3476,10 @@ btr_compress(
father_cursor.page_cur.block = block;
if (index->is_spatial()) {
+ ut_ad(cursor->rtr_info);
offsets = rtr_page_get_father_block(
- NULL, heap, mtr, cursor, &father_cursor);
+ nullptr, heap, cursor, &father_cursor,
+ cursor->rtr_info->thr, mtr);
ut_ad(cursor->page_cur.block->page.id() == block->page.id());
rec_t* my_rec = father_cursor.page_cur.rec;
@@ -3722,10 +3489,10 @@ btr_compress(
ib::info() << "father positioned on page "
<< page_no << "instead of "
<< block->page.id().page_no();
- offsets = btr_page_get_father_block(
- NULL, heap, mtr, &father_cursor);
+ goto get_offsets;
}
} else {
+get_offsets:
offsets = btr_page_get_father_block(
NULL, heap, mtr, &father_cursor);
}
@@ -3735,14 +3502,7 @@ btr_compress(
if (UNIV_UNLIKELY(!nth_rec || nth_rec == ULINT_UNDEFINED)) {
corrupted:
err = DB_CORRUPTION;
- err_exit:
- /* We play it safe and reset the free bits. */
- if (merge_block && merge_block->zip_size()
- && page_is_leaf(merge_block->page.frame)
- && !index->is_clust()) {
- ibuf_reset_free_bits(merge_block);
- }
- goto func_exit;
+ goto err_exit;
}
}
@@ -3750,7 +3510,10 @@ btr_compress(
/* The page is the only one on the level, lift the records
to the father */
- merge_block = btr_lift_page_up(index, block, mtr, &err);
+ merge_block = btr_lift_page_up(index, block,
+ cursor->rtr_info
+ ? cursor->rtr_info->thr
+ : nullptr, mtr, &err);
success:
if (adjust) {
ut_ad(nth_rec > 0);
@@ -3765,7 +3528,7 @@ success:
}
MONITOR_INC(MONITOR_INDEX_MERGE_SUCCESSFUL);
-func_exit:
+err_exit:
mem_heap_free(heap);
DBUG_RETURN(err);
}
@@ -4065,49 +3828,6 @@ cannot_merge:
}
}
- if (!dict_index_is_clust(index)
- && !index->table->is_temporary()
- && page_is_leaf(merge_page)) {
- /* Update the free bits of the B-tree page in the
- insert buffer bitmap. This has to be done in a
- separate mini-transaction that is committed before the
- main mini-transaction. We cannot update the insert
- buffer bitmap in this mini-transaction, because
- btr_compress() can be invoked recursively without
- committing the mini-transaction in between. Since
- insert buffer bitmap pages have a lower rank than
- B-tree pages, we must not access other pages in the
- same mini-transaction after accessing an insert buffer
- bitmap page. */
-
- /* The free bits in the insert buffer bitmap must
- never exceed the free space on a page. It is safe to
- decrement or reset the bits in the bitmap in a
- mini-transaction that is committed before the
- mini-transaction that affects the free space. */
-
- /* It is unsafe to increment the bits in a separately
- committed mini-transaction, because in crash recovery,
- the free bits could momentarily be set too high. */
-
- if (merge_block->zip_size()) {
- /* Because the free bits may be incremented
- and we cannot update the insert buffer bitmap
- in the same mini-transaction, the only safe
- thing we can do here is the pessimistic
- approach: reset the free bits. */
- ibuf_reset_free_bits(merge_block);
- } else {
- /* On uncompressed pages, the free bits will
- never increase here. Thus, it is safe to
- write the bits accurately in a separate
- mini-transaction. */
- ibuf_update_free_bits_if_full(merge_block,
- srv_page_size,
- ULINT_UNDEFINED);
- }
- }
-
ut_ad(page_validate(merge_page, index));
#ifdef UNIV_ZIP_DEBUG
ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page,
@@ -4122,7 +3842,10 @@ cannot_merge:
err = btr_page_free(index, block, mtr);
if (err == DB_SUCCESS) {
ut_ad(leftmost_child
- || btr_check_node_ptr(index, merge_block, mtr));
+ || btr_check_node_ptr(index, merge_block,
+ cursor->rtr_info
+ ? cursor->rtr_info->thr
+ : nullptr, mtr));
goto success;
} else {
goto err_exit;
@@ -4139,11 +3862,13 @@ static
void
btr_discard_only_page_on_level(
/*===========================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page which is the only on its level */
+ btr_cur_t* cur, /*!< in: cursor on a page which is the
+ only on its level */
mtr_t* mtr) /*!< in: mtr */
{
- ulint page_level = 0;
+ dict_index_t* index = cur->index();
+ buf_block_t* block = btr_cur_get_block(cur);
+ ulint page_level = 0;
ut_ad(!index->is_dummy);
@@ -4174,7 +3899,8 @@ btr_discard_only_page_on_level(
if (index->is_spatial()) {
/* Check any concurrent search having this page */
rtr_check_discard_page(index, NULL, block);
- if (!rtr_page_get_father(mtr, nullptr, &cursor)) {
+ if (!rtr_page_get_father(mtr, nullptr, &cursor,
+ cur->rtr_info->thr)) {
return;
}
} else {
@@ -4240,9 +3966,6 @@ btr_discard_only_page_on_level(
index->clear_instant_add();
}
} else if (!index->table->is_temporary()) {
- /* We play it safe and reset the free bits for the root */
- ibuf_reset_free_bits(block);
-
ut_a(max_trx_id);
page_set_max_trx_id(block,
buf_block_get_page_zip(block),
@@ -4279,7 +4002,8 @@ btr_discard_page(
MONITOR_INC(MONITOR_INDEX_DISCARD);
if (index->is_spatial()
- ? !rtr_page_get_father(mtr, cursor, &parent_cursor)
+ ? !rtr_page_get_father(mtr, cursor, &parent_cursor,
+ cursor->rtr_info->thr)
: !btr_page_get_father(mtr, &parent_cursor)) {
return DB_CORRUPTION;
}
@@ -4353,7 +4077,7 @@ btr_discard_page(
return DB_CORRUPTION;
}
} else {
- btr_discard_only_page_on_level(index, block, mtr);
+ btr_discard_only_page_on_level(cursor, mtr);
return DB_SUCCESS;
}
@@ -4408,14 +4132,20 @@ btr_discard_page(
If the merge_block's parent block is not same,
we cannot use btr_check_node_ptr() */
ut_ad(parent_is_different
- || btr_check_node_ptr(index, merge_block, mtr));
+ || btr_check_node_ptr(index, merge_block,
+ cursor->rtr_info
+ ? cursor->rtr_info->thr
+ : nullptr, mtr));
if (btr_cur_get_block(&parent_cursor)->page.id().page_no()
== index->page
&& !page_has_siblings(btr_cur_get_page(&parent_cursor))
&& page_get_n_recs(btr_cur_get_page(&parent_cursor))
== 1) {
- btr_lift_page_up(index, merge_block, mtr, &err);
+ btr_lift_page_up(index, merge_block,
+ cursor->rtr_info
+ ? cursor->rtr_info->thr
+ : nullptr, mtr, &err);
}
}
@@ -4434,13 +4164,6 @@ btr_print_size(
fseg_header_t* seg;
mtr_t mtr;
- if (dict_index_is_ibuf(index)) {
- fputs("Sorry, cannot print info of an ibuf tree:"
- " use ibuf functions\n", stderr);
-
- return;
- }
-
mtr_start(&mtr);
root = btr_root_get(index, &mtr);
@@ -4450,13 +4173,10 @@ btr_print_size(
fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
fseg_print(seg, &mtr);
- if (!dict_index_is_ibuf(index)) {
-
- seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+ seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
- fseg_print(seg, &mtr);
- }
+ fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
+ fseg_print(seg, &mtr);
mtr_commit(&mtr);
}
@@ -4567,6 +4287,7 @@ btr_check_node_ptr(
/*===============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: index page */
+ que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in: mtr */
{
mem_heap_t* heap;
@@ -4588,8 +4309,8 @@ btr_check_node_ptr(
heap = mem_heap_create(256);
if (dict_index_is_spatial(index)) {
- offsets = rtr_page_get_father_block(NULL, heap, mtr,
- NULL, &cursor);
+ offsets = rtr_page_get_father_block(NULL, heap,
+ NULL, &cursor, thr, mtr);
} else {
offsets = btr_page_get_father_block(NULL, heap, mtr, &cursor);
}
@@ -4664,14 +4385,6 @@ btr_index_rec_validate(
ut_ad(index->n_core_fields);
- if (index->is_ibuf()) {
- /* The insert buffer index tree can contain records from any
- other index: we cannot check the number of fields or
- their length */
-
- return(TRUE);
- }
-
#ifdef VIRTUAL_INDEX_DEBUG
if (dict_index_has_virtual(index)) {
fprintf(stderr, "index name is %s\n", index->name());
@@ -4999,8 +4712,7 @@ corrupted:
mtr.release_last_page();
block = btr_block_get(*index, left_page_no,
- RW_SX_LATCH, false,
- &mtr, &err);
+ RW_SX_LATCH, &mtr, &err);
if (!block) {
goto invalid_page;
}
@@ -5071,7 +4783,7 @@ func_exit:
const rec_t* right_rec;
right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
- !level, &mtr, &err);
+ &mtr, &err);
if (!right_block) {
btr_validate_report1(index, level, block);
fputs("InnoDB: broken FIL_PAGE_NEXT link\n", stderr);
@@ -5324,7 +5036,7 @@ node_ptr_fails:
mtr.start();
block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
- !level, &mtr, &err);
+ &mtr, &err);
goto loop;
}
@@ -5391,8 +5103,7 @@ error:
index = btr_cur_get_index(cursor);
page = btr_cur_get_page(cursor);
- mblock = btr_block_get(*index, page_no, RW_X_LATCH, page_is_leaf(page),
- mtr);
+ mblock = btr_block_get(*index, page_no, RW_X_LATCH, mtr);
if (!mblock) {
goto error;
}
diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc
index 013cd13102c..3c5b4b293f2 100644
--- a/storage/innobase/btr/btr0bulk.cc
+++ b/storage/innobase/btr/btr0bulk.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +28,6 @@ Created 03/11/2014 Shaohua Wang
#include "btr0btr.h"
#include "btr0cur.h"
#include "btr0pcur.h"
-#include "ibuf0ibuf.h"
#include "page0page.h"
#include "trx0trx.h"
@@ -107,7 +106,7 @@ oom:
}
} else {
new_block = btr_block_get(*m_index, m_page_no, RW_X_LATCH,
- false, &m_mtr);
+ &m_mtr);
if (!new_block) {
m_mtr.commit();
return(DB_CORRUPTION);
@@ -122,7 +121,7 @@ oom:
m_page_zip = buf_block_get_page_zip(new_block);
- if (!m_level && dict_index_is_sec_or_ibuf(m_index)) {
+ if (!m_level && !m_index->is_primary()) {
page_update_max_trx_id(new_block, m_page_zip, m_trx_id,
&m_mtr);
}
@@ -563,9 +562,6 @@ inline void PageBulk::finish()
void PageBulk::commit(bool success)
{
finish();
- if (success && !m_index->is_clust() && page_is_leaf(m_page))
- ibuf_set_bitmap_for_bulk_load(m_block, &m_mtr,
- innobase_fill_factor == 100);
m_mtr.commit();
}
@@ -1194,7 +1190,7 @@ BtrBulk::finish(dberr_t err)
ut_ad(last_page_no != FIL_NULL);
last_block = btr_block_get(*m_index, last_page_no, RW_X_LATCH,
- false, &mtr);
+ &mtr);
if (!last_block) {
err = DB_CORRUPTION;
err_exit:
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 71177e228ec..74db3fa3d8f 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -61,7 +61,6 @@ Created 10/16/1994 Heikki Tuuri
#include "que0que.h"
#include "row0row.h"
#include "srv0srv.h"
-#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "zlib.h"
#include "srv0start.h"
@@ -73,15 +72,6 @@ Created 10/16/1994 Heikki Tuuri
#endif /* WITH_WSREP */
#include "log.h"
-/** Buffered B-tree operation types, introduced as part of delete buffering. */
-enum btr_op_t {
- BTR_NO_OP = 0, /*!< Not buffered */
- BTR_INSERT_OP, /*!< Insert, do not ignore UNIQUE */
- BTR_INSERT_IGNORE_UNIQUE_OP, /*!< Insert, ignoring UNIQUE */
- BTR_DELETE_OP, /*!< Purge a delete-marked record */
- BTR_DELMARK_OP /*!< Mark a record for deletion */
-};
-
/** Modification types for the B-tree operation.
Note that the order must be DELETE, BOTH, INSERT !!
*/
@@ -197,10 +187,14 @@ when loading a table definition.
static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
{
ut_ad(index->is_primary());
- ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
- ut_ad(index->table->supports_instant());
ut_ad(index->table->is_readable());
+ if (!index->table->supports_instant()) {
+ return DB_SUCCESS;
+ }
+
+ ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
+
dberr_t err;
const fil_space_t* space = index->table->space;
if (!space) {
@@ -467,17 +461,25 @@ when loading a table definition.
@param[in,out] table table definition from the data dictionary
@return error code
@retval DB_SUCCESS if no error occurred */
-dberr_t
-btr_cur_instant_init(dict_table_t* table)
+dberr_t btr_cur_instant_init(dict_table_t *table)
{
- mtr_t mtr;
- dict_index_t* index = dict_table_get_first_index(table);
- mtr.start();
- dberr_t err = index
- ? btr_cur_instant_init_low(index, &mtr)
- : DB_CORRUPTION;
- mtr.commit();
- return(err);
+ mtr_t mtr;
+ dict_index_t *index= dict_table_get_first_index(table);
+ mtr.start();
+ dberr_t err = index ? btr_cur_instant_init_low(index, &mtr) : DB_CORRUPTION;
+ mtr.commit();
+ if (err == DB_SUCCESS && index->is_gen_clust())
+ {
+ btr_cur_t cur;
+ mtr.start();
+ err= cur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr);
+ if (err != DB_SUCCESS);
+ else if (const rec_t *rec= page_rec_get_prev(btr_cur_get_rec(&cur)))
+ if (page_rec_is_user_rec(rec))
+ table->row_id= mach_read_from_6(rec);
+ mtr.commit();
+ }
+ return(err);
}
/** Initialize the n_core_null_bytes on first access to a clustered
@@ -783,20 +785,6 @@ static bool btr_cur_need_opposite_intention(const page_t *page,
@return maximum size of a node pointer record in bytes */
static ulint btr_node_ptr_max_size(const dict_index_t* index)
{
- if (dict_index_is_ibuf(index)) {
- /* cannot estimate accurately */
- /* This is universal index for change buffer.
- The max size of the entry is about max key length * 2.
- (index key + primary key to be inserted to the index)
- (The max key length is UNIV_PAGE_SIZE / 16 * 3 at
- ha_innobase::max_supported_key_length(),
- considering MAX_KEY_LENGTH = 3072 at MySQL imposes
- the 3500 historical InnoDB value for 16K page size case.)
- For the universal index, node_ptr contains most of the entry.
- And 512 is enough to contain ibuf columns and meta-data */
- return srv_page_size / 8 * 3 + 512;
- }
-
/* Each record has page_no, length of page_no and header. */
ulint comp = dict_table_is_comp(index->table);
ulint rec_max_size = comp
@@ -935,11 +923,9 @@ static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode)
dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
btr_latch_mode latch_mode, mtr_t *mtr)
{
- ut_ad(index()->is_btree() || index()->is_ibuf());
- ut_ad(!index()->is_ibuf() || ibuf_inside(mtr));
+ ut_ad(index()->is_btree());
buf_block_t *guess;
- btr_op_t btr_op;
btr_intention_t lock_intention;
bool detected_same_key_root= false;
@@ -967,34 +953,6 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
MTR_MEMO_S_LOCK | MTR_MEMO_SX_LOCK |
MTR_MEMO_X_LOCK));
- /* These flags are mutually exclusive, they are lumped together
- with the latch mode for historical reasons. It's possible for
- none of the flags to be set. */
- switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) {
- default:
- btr_op= BTR_NO_OP;
- break;
- case BTR_INSERT:
- btr_op= (latch_mode & BTR_IGNORE_SEC_UNIQUE)
- ? BTR_INSERT_IGNORE_UNIQUE_OP
- : BTR_INSERT_OP;
- break;
- case BTR_DELETE:
- btr_op= BTR_DELETE_OP;
- ut_a(purge_node);
- break;
- case BTR_DELETE_MARK:
- btr_op= BTR_DELMARK_OP;
- break;
- }
-
- /* Operations on the insert buffer tree cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !index()->is_ibuf());
- /* Operations on the clustered index cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !index()->is_clust());
- /* Operations on the temporary table(indexes) cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !index()->table->is_temporary());
-
const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED;
lock_intention= btr_cur_get_and_clear_intention(&latch_mode);
latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
@@ -1016,7 +974,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
# ifdef UNIV_SEARCH_PERF_STAT
info->n_searches++;
# endif
- bool ahi_enabled= btr_search_enabled && !index()->is_ibuf();
+ bool ahi_enabled= btr_search_enabled;
/* We do a dirty read of btr_search_enabled below,
and btr_search_guess_on_hash() will have to check it again. */
if (!ahi_enabled);
@@ -1094,84 +1052,19 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
up_bytes= 0;
low_match= 0;
low_bytes= 0;
- ulint buf_mode= BUF_GET;
search_loop:
dberr_t err;
auto block_savepoint= mtr->get_savepoint();
buf_block_t *block=
- buf_page_get_gen(page_id, zip_size, rw_latch, guess, buf_mode, mtr,
- &err, height == 0 && !index()->is_clust());
+ buf_page_get_gen(page_id, zip_size, rw_latch, guess, BUF_GET, mtr, &err);
if (!block)
{
- switch (err) {
- case DB_DECRYPTION_FAILED:
+ if (err == DB_DECRYPTION_FAILED)
btr_decryption_failed(*index());
- /* fall through */
- default:
- func_exit:
- if (UNIV_LIKELY_NULL(heap))
- mem_heap_free(heap);
- return err;
- case DB_SUCCESS:
- /* This must be a search to perform an insert, delete mark, or delete;
- try using the change buffer */
- ut_ad(height == 0);
- ut_ad(thr);
- break;
- }
-
- switch (btr_op) {
- default:
- MY_ASSERT_UNREACHABLE();
- break;
- case BTR_INSERT_OP:
- case BTR_INSERT_IGNORE_UNIQUE_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
-
- if (ibuf_insert(IBUF_OP_INSERT, tuple, index(), page_id, zip_size, thr))
- {
- flag= BTR_CUR_INSERT_TO_IBUF;
- goto func_exit;
- }
- break;
-
- case BTR_DELMARK_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
-
- if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
- index(), page_id, zip_size, thr))
- {
- flag = BTR_CUR_DEL_MARK_IBUF;
- goto func_exit;
- }
-
- break;
-
- case BTR_DELETE_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
- auto& chain = buf_pool.page_hash.cell_get(page_id.fold());
-
- if (!row_purge_poss_sec(purge_node, index(), tuple))
- /* The record cannot be purged yet. */
- flag= BTR_CUR_DELETE_REF;
- else if (ibuf_insert(IBUF_OP_DELETE, tuple, index(),
- page_id, zip_size, thr))
- /* The purge was buffered. */
- flag= BTR_CUR_DELETE_IBUF;
- else
- {
- /* The purge could not be buffered. */
- buf_pool.watch_unset(page_id, chain);
- break;
- }
-
- buf_pool.watch_unset(page_id, chain);
- goto func_exit;
- }
-
- /* Change buffering did not succeed, we must read the page. */
- buf_mode= BUF_GET;
- goto search_loop;
+ func_exit:
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+ return err;
}
if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() ||
@@ -1303,22 +1196,18 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
rw_latch= rw_lock_type_t(latch_mode & (RW_X_LATCH | RW_S_LATCH));
if (page_has_prev(block->page.frame) &&
!btr_block_get(*index(), btr_page_get_prev(block->page.frame),
- rw_latch, false, mtr, &err))
+ rw_latch, mtr, &err))
goto func_exit;
mtr->upgrade_buffer_fix(block_savepoint, rw_latch);
if (page_has_next(block->page.frame) &&
!btr_block_get(*index(), btr_page_get_next(block->page.frame),
- rw_latch, false, mtr, &err))
+ rw_latch, mtr, &err))
goto func_exit;
}
goto release_tree;
case BTR_SEARCH_LEAF:
case BTR_MODIFY_LEAF:
- if (rw_latch == RW_NO_LATCH)
- {
- ut_ad(index()->is_ibuf());
- mtr->upgrade_buffer_fix(block_savepoint, rw_lock_type_t(latch_mode));
- }
+ ut_ad(rw_latch == rw_lock_type_t(latch_mode));
if (!latch_by_caller)
{
release_tree:
@@ -1336,12 +1225,12 @@ release_tree:
/* x-latch also siblings from left to right */
if (page_has_prev(block->page.frame) &&
!btr_block_get(*index(), btr_page_get_prev(block->page.frame),
- RW_X_LATCH, false, mtr, &err))
+ RW_X_LATCH, mtr, &err))
goto func_exit;
mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH);
if (page_has_next(block->page.frame) &&
!btr_block_get(*index(), btr_page_get_next(block->page.frame),
- RW_X_LATCH, false, mtr, &err))
+ RW_X_LATCH, mtr, &err))
goto func_exit;
if (btr_cur_need_opposite_intention(block->page.frame, lock_intention,
node_ptr_max_size, compress_limit,
@@ -1476,7 +1365,7 @@ release_tree:
case BTR_MODIFY_ROOT_AND_LEAF:
rw_latch= RW_X_LATCH;
break;
- case BTR_MODIFY_PREV: /* ibuf_insert() or btr_pcur_move_to_prev() */
+ case BTR_MODIFY_PREV: /* btr_pcur_move_to_prev() */
case BTR_SEARCH_PREV: /* btr_pcur_move_to_prev() */
ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);
@@ -1488,7 +1377,7 @@ release_tree:
of the current page. */
buf_block_t *left= btr_block_get(*index(),
btr_page_get_prev(block->page.frame),
- RW_NO_LATCH, false, mtr, &err);
+ RW_NO_LATCH, mtr, &err);
if (UNIV_UNLIKELY(!left))
goto func_exit;
ut_ad(block_savepoint + 2 == mtr->get_savepoint());
@@ -1520,16 +1409,7 @@ release_tree:
goto leaf_with_no_latch;
case BTR_MODIFY_LEAF:
case BTR_SEARCH_LEAF:
- if (index()->is_ibuf())
- goto leaf_with_no_latch;
rw_latch= rw_lock_type_t(latch_mode);
- if (btr_op != BTR_NO_OP &&
- ibuf_should_try(index(), btr_op != BTR_INSERT_OP))
- /* Try to buffer the operation if the leaf page
- is not in the buffer pool. */
- buf_mode= btr_op == BTR_DELETE_OP
- ? BUF_GET_IF_IN_POOL_OR_WATCH
- : BUF_GET_IF_IN_POOL;
break;
case BTR_MODIFY_TREE:
ut_ad(rw_latch == RW_X_LATCH);
@@ -1568,8 +1448,7 @@ ATTRIBUTE_COLD
dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
page_cur_mode_t mode, mtr_t *mtr)
{
- ut_ad(index()->is_btree() || index()->is_ibuf());
- ut_ad(!index()->is_ibuf() || ibuf_inside(mtr));
+ ut_ad(index()->is_btree());
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs* offsets = offsets_;
@@ -1649,7 +1528,7 @@ dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
const auto block_savepoint= mtr->get_savepoint();
block=
buf_page_get_gen(page_id, block->zip_size(), RW_NO_LATCH, nullptr, BUF_GET,
- mtr, &err, !--height && !index()->is_clust());
+ mtr, &err);
if (!block)
{
@@ -1664,12 +1543,12 @@ dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
!fil_page_index_page_check(block->page.frame))
goto corrupted;
- if (height != btr_page_get_level(block->page.frame))
+ if (--height != btr_page_get_level(block->page.frame))
goto corrupted;
if (page_has_prev(block->page.frame) &&
!btr_block_get(*index(), btr_page_get_prev(block->page.frame),
- RW_X_LATCH, false, mtr, &err))
+ RW_X_LATCH, mtr, &err))
goto func_exit;
mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH);
#ifdef UNIV_ZIP_DEBUG
@@ -1678,7 +1557,7 @@ dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
#endif /* UNIV_ZIP_DEBUG */
if (page_has_next(block->page.frame) &&
!btr_block_get(*index(), btr_page_get_next(block->page.frame),
- RW_X_LATCH, false, mtr, &err))
+ RW_X_LATCH, mtr, &err))
goto func_exit;
goto search_loop;
}
@@ -1708,14 +1587,14 @@ dberr_t btr_cur_search_to_nth_level(ulint level,
{
dict_index_t *const index= cursor->index();
- ut_ad(index->is_btree() || index->is_ibuf());
+ ut_ad(index->is_btree());
mem_heap_t *heap= nullptr;
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs *offsets= offsets_;
rec_offs_init(offsets_);
ut_ad(level);
ut_ad(dict_index_check_search_tuple(index, tuple));
- ut_ad(index->is_ibuf() ? ibuf_inside(mtr) : index->is_btree());
+ ut_ad(index->is_btree());
ut_ad(dtuple_check_typed(tuple));
ut_ad(index->page != FIL_NULL);
@@ -1860,7 +1739,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
/* This function doesn't need to lock left page of the leaf page */
static_assert(int{BTR_SEARCH_PREV} == (4 | BTR_SEARCH_LEAF), "");
static_assert(int{BTR_MODIFY_PREV} == (4 | BTR_MODIFY_LEAF), "");
- latch_mode= btr_latch_mode(latch_mode & ~4);
+ latch_mode= btr_latch_mode(latch_mode & (RW_S_LATCH | RW_X_LATCH));
ut_ad(!latch_by_caller ||
mtr->memo_contains_flagged(&index->lock,
MTR_MEMO_SX_LOCK | MTR_MEMO_S_LOCK));
@@ -1891,9 +1770,7 @@ index_locked:
const rw_lock_type_t rw_latch= height && latch_mode != BTR_MODIFY_TREE
? upper_rw_latch
: RW_NO_LATCH;
- buf_block_t* block=
- btr_block_get(*index, page, rw_latch, !height && !index->is_clust(), mtr,
- &err);
+ buf_block_t* block= btr_block_get(*index, page, rw_latch, mtr, &err);
ut_ad(!block == (err != DB_SUCCESS));
@@ -1940,12 +1817,12 @@ index_locked:
/* x-latch also siblings from left to right */
if (page_has_prev(block->page.frame) &&
!btr_block_get(*index, btr_page_get_prev(block->page.frame),
- RW_X_LATCH, false, mtr, &err))
+ RW_X_LATCH, mtr, &err))
break;
mtr->upgrade_buffer_fix(leaf_savepoint - 1, RW_X_LATCH);
if (page_has_next(block->page.frame) &&
!btr_block_get(*index, btr_page_get_next(block->page.frame),
- RW_X_LATCH, false, mtr, &err))
+ RW_X_LATCH, mtr, &err))
break;
if (!index->lock.have_x() &&
@@ -2065,11 +1942,6 @@ be freed by reorganizing. Differs from btr_cur_optimistic_insert because
no heuristics is applied to whether it pays to use CPU time for
reorganizing the page or not.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return pointer to inserted record if succeed, else NULL */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
rec_t*
@@ -2238,9 +2110,6 @@ static void btr_cur_prefetch_siblings(const buf_block_t *block,
{
ut_ad(page_is_leaf(block->page.frame));
- if (index->is_ibuf())
- return;
-
const page_t *page= block->page.frame;
uint32_t prev= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_PREV));
uint32_t next= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_NEXT));
@@ -2475,14 +2344,6 @@ fail_err:
if (*rec) {
} else if (block->page.zip.data) {
ut_ad(!index->table->is_temporary());
- /* Reset the IBUF_BITMAP_FREE bits, because
- page_cur_tuple_insert() will have attempted page
- reorganize before failing. */
- if (leaf
- && !dict_index_is_clust(index)) {
- ibuf_reset_free_bits(block);
- }
-
goto fail;
} else {
ut_ad(!reorg);
@@ -2523,34 +2384,6 @@ fail_err:
lock_update_insert(block, *rec);
}
- if (leaf
- && !dict_index_is_clust(index)
- && !index->table->is_temporary()) {
- /* Update the free bits of the B-tree page in the
- insert buffer bitmap. */
-
- /* The free bits in the insert buffer bitmap must
- never exceed the free space on a page. It is safe to
- decrement or reset the bits in the bitmap in a
- mini-transaction that is committed before the
- mini-transaction that affects the free space. */
-
- /* It is unsafe to increment the bits in a separately
- committed mini-transaction, because in crash recovery,
- the free bits could momentarily be set too high. */
-
- if (block->page.zip.data) {
- /* Update the bits in the same mini-transaction. */
- ibuf_update_free_bits_zip(block, mtr);
- } else {
- /* Decrement the bits in a separate
- mini-transaction. */
- ibuf_update_free_bits_if_full(
- block, max_size,
- rec_size + PAGE_DIR_SLOT_SIZE);
- }
- }
-
*big_rec = big_rec_vec;
return(DB_SUCCESS);
@@ -2621,12 +2454,10 @@ btr_cur_pessimistic_insert(
the index tree, so that the insert will not fail because of
lack of space */
- if (!index->is_ibuf()
- && (err = fsp_reserve_free_extents(&n_reserved, index->table->space,
- uint32_t(cursor->tree_height / 16
- + 3),
- FSP_NORMAL, mtr))
- != DB_SUCCESS) {
+ err = fsp_reserve_free_extents(&n_reserved, index->table->space,
+ uint32_t(cursor->tree_height / 16 + 3),
+ FSP_NORMAL, mtr);
+ if (err != DB_SUCCESS) {
return err;
}
@@ -2658,11 +2489,21 @@ btr_cur_pessimistic_insert(
}
}
- *rec = index->page == btr_cur_get_block(cursor)->page.id().page_no()
- ? btr_root_raise_and_insert(flags, cursor, offsets, heap,
- entry, n_ext, mtr, &err)
- : btr_page_split_and_insert(flags, cursor, offsets, heap,
- entry, n_ext, mtr, &err);
+ if (index->page == btr_cur_get_block(cursor)->page.id().page_no()) {
+ *rec = index->is_spatial()
+ ? rtr_root_raise_and_insert(flags, cursor, offsets,
+ heap, entry, n_ext, mtr,
+ &err, thr)
+ : btr_root_raise_and_insert(flags, cursor, offsets,
+ heap, entry, n_ext, mtr,
+ &err);
+ } else if (index->is_spatial()) {
+ *rec = rtr_page_split_and_insert(flags, cursor, offsets, heap,
+ entry, n_ext, mtr, &err, thr);
+ } else {
+ *rec = btr_page_split_and_insert(flags, cursor, offsets, heap,
+ entry, n_ext, mtr, &err);
+ }
if (!*rec) {
goto func_exit;
@@ -2906,14 +2747,8 @@ static dberr_t btr_cur_upd_rec_sys(buf_block_t *block, rec_t *rec,
See if there is enough place in the page modification log to log
an update-in-place.
-@retval false if out of space; IBUF_BITMAP_FREE will be reset
-outside mtr if the page was recompressed
-@retval true if enough place;
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
-a secondary index leaf page. This has to be done either within the
-same mini-transaction, or by invoking ibuf_reset_free_bits() before
-mtr_commit(mtr). */
+@retval false if out of space
+@retval true if enough place */
bool
btr_cur_update_alloc_zip_func(
/*==========================*/
@@ -2934,7 +2769,6 @@ btr_cur_update_alloc_zip_func(
const page_t* page = page_cur_get_page(cursor);
ut_ad(page_zip == page_cur_get_page_zip(cursor));
- ut_ad(!dict_index_is_ibuf(index));
ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
if (page_zip_available(page_zip, dict_index_is_clust(index),
@@ -2958,26 +2792,8 @@ btr_cur_update_alloc_zip_func(
rec_offs_make_valid(page_cur_get_rec(cursor), index,
page_is_leaf(page), offsets);
- /* After recompressing a page, we must make sure that the free
- bits in the insert buffer bitmap will not exceed the free
- space on the page. Because this function will not attempt
- recompression unless page_zip_available() fails above, it is
- safe to reset the free bits if page_zip_available() fails
- again, below. The free bits can safely be reset in a separate
- mini-transaction. If page_zip_available() succeeds below, we
- can be sure that the btr_page_reorganize() above did not reduce
- the free space available on the page. */
-
- if (page_zip_available(page_zip, dict_index_is_clust(index),
- length, create)) {
- return true;
- }
- }
-
- if (!dict_index_is_clust(index)
- && !index->table->is_temporary()
- && page_is_leaf(page)) {
- ibuf_reset_free_bits(page_cur_get_block(cursor));
+ return page_zip_available(page_zip, dict_index_is_clust(index),
+ length, create);
}
return(false);
@@ -3126,7 +2942,7 @@ We assume here that the ordering fields of the record do not change.
@return locking or undo log related error code, or
@retval DB_SUCCESS on success
@retval DB_ZIP_OVERFLOW if there is not enough space left
-on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
+on a ROW_FORMAT=COMPRESSED page */
dberr_t
btr_cur_update_in_place(
/*====================*/
@@ -3146,7 +2962,6 @@ btr_cur_update_in_place(
further pages */
{
dict_index_t* index;
- dberr_t err;
rec_t* rec;
roll_ptr_t roll_ptr = 0;
ulint was_delete_marked;
@@ -3154,17 +2969,14 @@ btr_cur_update_in_place(
ut_ad(page_is_leaf(cursor->page_cur.block->page.frame));
rec = btr_cur_get_rec(cursor);
index = cursor->index();
- ut_ad(!index->is_ibuf());
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
ut_ad(trx_id > 0 || (flags & BTR_KEEP_SYS_FLAG)
|| index->table->is_temporary());
- /* The insert buffer tree should never be updated in place. */
- ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
- || dict_index_is_clust(index));
+ || index->is_primary());
ut_ad(thr_get_trx(thr)->id == trx_id
- || (flags & ulint(~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP)))
+ || (flags & ulint(~BTR_KEEP_POS_FLAG))
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
ut_ad(fil_page_index_page_check(btr_cur_get_page(cursor)));
@@ -3194,22 +3006,17 @@ btr_cur_update_in_place(
}
/* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, offsets,
- update, cmpl_info,
- thr, mtr, &roll_ptr);
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
- goto func_exit;
+ if (dberr_t err = btr_cur_upd_lock_and_undo(flags, cursor, offsets,
+ update, cmpl_info,
+ thr, mtr, &roll_ptr)) {
+ return err;
}
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- err = btr_cur_upd_rec_sys(block, rec, index, offsets,
- thr_get_trx(thr), roll_ptr, mtr);
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- goto func_exit;
- }
+ if (flags & BTR_KEEP_SYS_FLAG) {
+ } else if (dberr_t err = btr_cur_upd_rec_sys(block, rec, index, offsets,
+ thr_get_trx(thr),
+ roll_ptr, mtr)) {
+ return err;
}
was_delete_marked = rec_get_deleted_flag(
@@ -3267,19 +3074,7 @@ btr_cur_update_in_place(
btr_cur_unmark_extern_fields(block, rec, index, offsets, mtr);
}
- ut_ad(err == DB_SUCCESS);
-
-func_exit:
- if (page_zip
- && !(flags & BTR_KEEP_IBUF_BITMAP)
- && !dict_index_is_clust(index)
- && page_is_leaf(buf_block_get_frame(block))) {
- /* Update the free bits in the insert buffer. */
- ut_ad(!index->table->is_temporary());
- ibuf_update_free_bits_zip(block, mtr);
- }
-
- return(err);
+ return DB_SUCCESS;
}
/** Trim a metadata record during the rollback of instant ALTER TABLE.
@@ -3423,7 +3218,7 @@ fields of the record do not change.
@retval DB_OVERFLOW if the updated record does not fit
@retval DB_UNDERFLOW if the page would become too empty
@retval DB_ZIP_OVERFLOW if there is not enough space left
-on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
+on a ROW_FORMAT=COMPRESSED page */
dberr_t
btr_cur_optimistic_update(
/*======================*/
@@ -3454,7 +3249,6 @@ btr_cur_optimistic_update(
ulint max_size;
ulint new_rec_size;
ulint old_rec_size;
- ulint max_ins_size = 0;
dtuple_t* new_entry;
roll_ptr_t roll_ptr;
ulint i;
@@ -3463,19 +3257,16 @@ btr_cur_optimistic_update(
page = buf_block_get_frame(block);
rec = btr_cur_get_rec(cursor);
index = cursor->index();
- ut_ad(index->has_locking());
ut_ad(trx_id > 0 || (flags & BTR_KEEP_SYS_FLAG)
|| index->table->is_temporary());
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
/* This is intended only for leaf page updates */
ut_ad(page_is_leaf(page));
- /* The insert buffer tree should never be updated in place. */
- ut_ad(!dict_index_is_ibuf(index));
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
|| dict_index_is_clust(index));
ut_ad(thr_get_trx(thr)->id == trx_id
- || (flags & ulint(~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP)))
+ || (flags & ulint(~BTR_KEEP_POS_FLAG))
== (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
ut_ad(fil_page_index_page_check(page));
@@ -3504,7 +3295,6 @@ btr_cur_optimistic_update(
if (rec_offs_any_extern(*offsets)) {
any_extern:
- ut_ad(!index->is_ibuf());
/* Externally stored fields are treated in pessimistic
update */
@@ -3585,9 +3375,6 @@ any_extern:
if (UNIV_UNLIKELY(new_rec_size
>= (page_get_free_space_of_empty(page_is_comp(page))
/ 2))) {
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
err = DB_OVERFLOW;
goto func_exit;
}
@@ -3595,10 +3382,6 @@ any_extern:
if (UNIV_UNLIKELY(page_get_data_size(page)
- old_rec_size + new_rec_size
< BTR_CUR_PAGE_COMPRESS_LIMIT(index))) {
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
-
/* The page would become too empty */
err = DB_UNDERFLOW;
goto func_exit;
@@ -3611,19 +3394,9 @@ any_extern:
: (old_rec_size
+ page_get_max_insert_size_after_reorganize(page, 1));
- if (!page_zip) {
- max_ins_size = page_get_max_insert_size_after_reorganize(
- page, 1);
- }
-
if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
&& (max_size >= new_rec_size))
|| (page_get_n_recs(page) <= 1))) {
-
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
-
/* There was not enough space, or it did not pay to
reorganize: for simplicity, we decide what to do assuming a
reorganization is needed, though it might not be necessary */
@@ -3637,9 +3410,6 @@ any_extern:
update, cmpl_info,
thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
goto func_exit;
}
@@ -3695,22 +3465,11 @@ any_extern:
ut_ad(err == DB_SUCCESS);
if (!page_cur_move_to_next(page_cursor)) {
corrupted:
- err = DB_CORRUPTION;
- }
-
-func_exit:
- if (!(flags & BTR_KEEP_IBUF_BITMAP)
- && !dict_index_is_clust(index)) {
- /* Update the free bits in the insert buffer. */
- if (page_zip) {
- ut_ad(!index->table->is_temporary());
- ibuf_update_free_bits_zip(block, mtr);
- } else if (!index->table->is_temporary()) {
- ibuf_update_free_bits_low(block, max_ins_size, mtr);
- }
+ return DB_CORRUPTION;
}
if (err != DB_SUCCESS) {
+func_exit:
/* prefetch siblings of the leaf for the pessimistic
operation. */
btr_cur_prefetch_siblings(block, index);
@@ -3807,7 +3566,6 @@ btr_cur_pessimistic_update(
big_rec_t* dummy_big_rec;
dict_index_t* index;
buf_block_t* block;
- page_zip_des_t* page_zip;
rec_t* rec;
page_cur_t* page_cursor;
dberr_t err;
@@ -3820,20 +3578,19 @@ btr_cur_pessimistic_update(
*big_rec = NULL;
block = btr_cur_get_block(cursor);
- page_zip = buf_block_get_page_zip(block);
index = cursor->index();
- ut_ad(index->has_locking());
ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK |
MTR_MEMO_SX_LOCK));
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
+#if defined UNIV_ZIP_DEBUG || defined UNIV_DEBUG
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+#endif
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip
|| page_zip_validate(page_zip, block->page.frame, index));
#endif /* UNIV_ZIP_DEBUG */
ut_ad(!page_zip || !index->table->is_temporary());
- /* The insert buffer tree should never be updated in place. */
- ut_ad(!dict_index_is_ibuf(index));
ut_ad(trx_id > 0 || (flags & BTR_KEEP_SYS_FLAG)
|| index->table->is_temporary());
ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
@@ -3844,7 +3601,7 @@ btr_cur_pessimistic_update(
| BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
err = optim_err = btr_cur_optimistic_update(
- flags | BTR_KEEP_IBUF_BITMAP,
+ flags,
cursor, offsets, offsets_heap, update,
cmpl_info, thr, trx_id, mtr);
@@ -3855,18 +3612,6 @@ btr_cur_pessimistic_update(
break;
default:
err_exit:
- /* We suppressed this with BTR_KEEP_IBUF_BITMAP.
- For DB_ZIP_OVERFLOW, the IBUF_BITMAP_FREE bits were
- already reset by btr_cur_update_alloc_zip() if the
- page was recompressed. */
- if (page_zip
- && optim_err != DB_ZIP_OVERFLOW
- && !dict_index_is_clust(index)
- && page_is_leaf(block->page.frame)) {
- ut_ad(!index->table->is_temporary());
- ibuf_update_free_bits_zip(block, mtr);
- }
-
if (big_rec_vec != NULL) {
dtuple_big_rec_free(big_rec_vec);
}
@@ -3944,11 +3689,6 @@ btr_cur_pessimistic_update(
index->first_user_field())))) {
big_rec_vec = dtuple_convert_big_rec(index, update, new_entry, &n_ext);
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
-
- /* We cannot goto return_after_reservations,
- because we may need to update the
- IBUF_BITMAP_FREE bits, which was suppressed by
- BTR_KEEP_IBUF_BITMAP. */
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip
|| page_zip_validate(page_zip, block->page.frame,
@@ -3993,11 +3733,6 @@ btr_cur_pessimistic_update(
btr_cur_write_sys(new_entry, index, trx_id, roll_ptr);
}
- const ulint max_ins_size = page_zip
- ? 0
- : page_get_max_insert_size_after_reorganize(block->page.frame,
- 1);
-
if (UNIV_UNLIKELY(is_metadata)) {
ut_ad(new_entry->is_metadata());
ut_ad(index->is_instant());
@@ -4082,18 +3817,6 @@ btr_cur_pessimistic_update(
rec_offs_make_valid(page_cursor->rec, index,
true, *offsets);
}
- } else if (!dict_index_is_clust(index)
- && page_is_leaf(block->page.frame)) {
- /* Update the free bits in the insert buffer.
- This is the same block which was skipped by
- BTR_KEEP_IBUF_BITMAP. */
- if (page_zip) {
- ut_ad(!index->table->is_temporary());
- ibuf_update_free_bits_zip(block, mtr);
- } else if (!index->table->is_temporary()) {
- ibuf_update_free_bits_low(block, max_ins_size,
- mtr);
- }
}
#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled
@@ -4114,16 +3837,7 @@ btr_cur_pessimistic_update(
of a badly-compressing record, it is possible for
btr_cur_optimistic_update() to return DB_UNDERFLOW and
btr_cur_insert_if_possible() to return FALSE. */
- ut_a(page_zip || optim_err != DB_UNDERFLOW);
-
- /* Out of space: reset the free bits.
- This is the same block which was skipped by
- BTR_KEEP_IBUF_BITMAP. */
- if (!dict_index_is_clust(index)
- && !index->table->is_temporary()
- && page_is_leaf(block->page.frame)) {
- ibuf_reset_free_bits(block);
- }
+ ut_ad(page_zip || optim_err != DB_UNDERFLOW);
}
if (big_rec_vec != NULL) {
@@ -4168,8 +3882,7 @@ btr_cur_pessimistic_update(
same temp-table in parallel.
max_trx_id is ignored for temp tables because it not required
for MVCC. */
- if (dict_index_is_sec_or_ibuf(index)
- && !index->table->is_temporary()) {
+ if (!index->is_primary() && !index->table->is_temporary()) {
/* Update PAGE_MAX_TRX_ID in the index page header.
It was not updated by btr_cur_pessimistic_insert()
because of BTR_NO_LOCKING_FLAG. */
@@ -4480,9 +4193,6 @@ btr_cur_optimistic_delete(
}
{
- page_t* page = buf_block_get_frame(block);
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
-
if (UNIV_UNLIKELY(rec_get_info_bits(rec, page_rec_is_comp(rec))
& REC_INFO_MIN_REC_FLAG)) {
/* This should be rolling back instant ADD COLUMN.
@@ -4491,7 +4201,7 @@ btr_cur_optimistic_delete(
insert into SYS_COLUMNS is rolled back. */
ut_ad(cursor->index()->table->supports_instant());
ut_ad(cursor->index()->is_primary());
- ut_ad(!page_zip);
+ ut_ad(!buf_block_get_page_zip(block));
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
offsets, mtr);
/* We must empty the PAGE_FREE list, because
@@ -4509,40 +4219,8 @@ btr_cur_optimistic_delete(
btr_search_update_hash_on_delete(cursor);
}
- if (page_zip) {
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page,
- cursor->index()));
-#endif /* UNIV_ZIP_DEBUG */
- page_cur_delete_rec(btr_cur_get_page_cur(cursor),
- offsets, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page,
- cursor->index()));
-#endif /* UNIV_ZIP_DEBUG */
-
- /* On compressed pages, the IBUF_BITMAP_FREE
- space is not affected by deleting (purging)
- records, because it is defined as the minimum
- of space available *without* reorganize, and
- space available in the modification log. */
- } else {
- const ulint max_ins
- = page_get_max_insert_size_after_reorganize(
- page, 1);
-
- page_cur_delete_rec(btr_cur_get_page_cur(cursor),
- offsets, mtr);
-
- /* The change buffer does not handle inserts
- into non-leaf pages, into clustered indexes,
- or into the change buffer. */
- if (!cursor->index()->is_clust()
- && !cursor->index()->table->is_temporary()
- && !dict_index_is_ibuf(cursor->index())) {
- ibuf_update_free_bits_low(block, max_ins, mtr);
- }
- }
+ page_cur_delete_rec(btr_cur_get_page_cur(cursor),
+ offsets, mtr);
}
func_exit:
@@ -4738,9 +4416,9 @@ discard_page:
goto err_exit;
}
- btr_cur_t cursor;
- cursor.page_cur.index = index;
- cursor.page_cur.block = block;
+ btr_cur_t cur;
+ cur.page_cur.index = index;
+ cur.page_cur.block = block;
if (!page_has_prev(page)) {
/* If we delete the leftmost node pointer on a
@@ -4756,16 +4434,17 @@ discard_page:
rec_offs* offsets;
ulint len;
- rtr_page_get_father_block(NULL, heap, mtr, NULL,
- &cursor);
- father_rec = btr_cur_get_rec(&cursor);
+ rtr_page_get_father_block(nullptr, heap, nullptr,
+ &cur,
+ cursor->rtr_info->thr, mtr);
+ father_rec = btr_cur_get_rec(&cur);
offsets = rec_get_offsets(father_rec, index, NULL,
0, ULINT_UNDEFINED, &heap);
rtr_read_mbr(rec_get_nth_field(
father_rec, offsets, 0, &len), &father_mbr);
- rtr_update_mbr_field(&cursor, offsets, NULL,
+ rtr_update_mbr_field(&cur, offsets, NULL,
page, &father_mbr, next_rec, mtr);
ut_d(parent_latched = true);
} else {
@@ -4773,12 +4452,12 @@ discard_page:
on a page, we have to change the parent node pointer
so that it is equal to the new leftmost node pointer
on the page */
- ret = btr_page_get_father(mtr, &cursor);
+ ret = btr_page_get_father(mtr, &cur);
if (!ret) {
*err = DB_CORRUPTION;
goto err_exit;
}
- *err = btr_cur_node_ptr_delete(&cursor, mtr);
+ *err = btr_cur_node_ptr_delete(&cur, mtr);
if (*err != DB_SUCCESS) {
got_err:
ret = FALSE;
@@ -4825,7 +4504,10 @@ got_err:
#endif /* UNIV_ZIP_DEBUG */
ut_ad(!parent_latched
- || btr_check_node_ptr(index, block, mtr));
+ || btr_check_node_ptr(index, block,
+ cursor->rtr_info
+ ? cursor->rtr_info->thr
+ : nullptr, mtr));
if (!ret && btr_cur_compress_recommendation(cursor, mtr)) {
if (UNIV_LIKELY(allow_merge)) {
@@ -4970,7 +4652,7 @@ public:
buf_block_t *parent_block= m_block;
ulint parent_savepoint= m_savepoint;
- m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH, !level,
+ m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH,
&mtr, nullptr);
if (!m_block)
return false;
@@ -5191,8 +4873,7 @@ static ha_rows btr_estimate_n_rows_in_range_on_level(
savepoint= mtr.get_savepoint();
/* Fetch the page. */
- block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, !level, &mtr,
- nullptr);
+ block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, &mtr, nullptr);
if (prev_block)
{
@@ -5502,6 +5183,7 @@ search_loop:
DBUG_EXECUTE_IF("bug14007649", DBUG_RETURN(n_rows););
+#ifdef NOT_USED
/* Do not estimate the number of rows in the range to over 1 / 2 of the
estimated rows in the whole table */
@@ -5516,6 +5198,10 @@ search_loop:
if (n_rows == 0)
n_rows= table_n_rows;
}
+#else
+ if (n_rows > table_n_rows)
+ n_rows= table_n_rows;
+#endif
DBUG_RETURN(n_rows);
@@ -5820,7 +5506,7 @@ struct btr_blob_log_check_t {
m_mtr, &err));
}
m_pcur->btr_cur.page_cur.block = btr_block_get(
- *index, page_no, RW_X_LATCH, false, m_mtr);
+ *index, page_no, RW_X_LATCH, m_mtr);
/* The page should not be evicted or corrupted while
we are holding a buffer-fix on it. */
m_pcur->btr_cur.page_cur.block->page.unfix();
diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc
index 642db0e9f1c..2f0b167f655 100644
--- a/storage/innobase/btr/btr0defragment.cc
+++ b/storage/innobase/btr/btr0defragment.cc
@@ -33,7 +33,6 @@ Modified 30/07/2014 Jan Lindström jan.lindstrom@mariadb.com
#include "dict0stats.h"
#include "dict0stats_bg.h"
#include "dict0defrag_bg.h"
-#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "srv0start.h"
#include "mysqld.h"
@@ -394,20 +393,16 @@ btr_defragment_merge_pages(
// If max_ins_size >= move_size, we can move the records without
// reorganizing the page, otherwise we need to reorganize the page
// first to release more space.
- if (move_size > max_ins_size) {
- dberr_t err = btr_page_reorganize_block(page_zip_level,
- to_block, index, mtr);
- if (err != DB_SUCCESS) {
- if (!dict_index_is_clust(index)
- && page_is_leaf(to_page)) {
- ibuf_reset_free_bits(to_block);
- }
- // If reorganization fails, that means page is
- // not compressable. There's no point to try
- // merging into this page. Continue to the
- // next page.
- return err == DB_FAIL ? from_block : nullptr;
- }
+ if (move_size <= max_ins_size) {
+ } else if (dberr_t err = btr_page_reorganize_block(page_zip_level,
+ to_block, index,
+ mtr)) {
+ // If reorganization fails, that means page is
+ // not compressable. There's no point to try
+ // merging into this page. Continue to the
+ // next page.
+ return err == DB_FAIL ? from_block : nullptr;
+ } else {
ut_ad(page_validate(to_page, index));
max_ins_size = page_get_max_insert_size(to_page, n_recs);
if (max_ins_size < move_size) {
@@ -456,18 +451,6 @@ btr_defragment_merge_pages(
&& *max_data_size > new_data_size + move_size) {
*max_data_size = new_data_size + move_size;
}
- // Set ibuf free bits if necessary.
- if (!dict_index_is_clust(index)
- && page_is_leaf(to_page)) {
- if (zip_size) {
- ibuf_reset_free_bits(to_block);
- } else {
- ibuf_update_free_bits_if_full(
- to_block,
- srv_page_size,
- ULINT_UNDEFINED);
- }
- }
btr_cur_t parent;
parent.page_cur.index = index;
parent.page_cur.block = from_block;
@@ -590,8 +573,7 @@ btr_defragment_n_pages(
break;
}
- blocks[i] = btr_block_get(*index, page_no, RW_X_LATCH, true,
- mtr);
+ blocks[i] = btr_block_get(*index, page_no, RW_X_LATCH, mtr);
if (!blocks[i]) {
return nullptr;
}
@@ -606,7 +588,7 @@ btr_defragment_n_pages(
/* given page is the last page.
Lift the records to father. */
dberr_t err;
- btr_lift_page_up(index, block, mtr, &err);
+ btr_lift_page_up(index, block, nullptr, mtr, &err);
}
return NULL;
}
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 1dd26f8c467..c3309085c46 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -157,20 +157,14 @@ before_first:
cursor->rel_pos = BTR_PCUR_ON;
}
- if (index->is_ibuf()) {
- ut_ad(!index->table->not_redundant());
- cursor->old_n_fields = uint16_t(rec_get_n_fields_old(rec));
- } else {
- cursor->old_n_fields = static_cast<uint16>(
- dict_index_get_n_unique_in_tree(index));
- if (index->is_spatial() && !page_rec_is_leaf(rec)) {
- ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index)
- == DICT_INDEX_SPATIAL_NODEPTR_SIZE);
- /* For R-tree, we have to compare
- the child page numbers as well. */
- cursor->old_n_fields
- = DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
- }
+ cursor->old_n_fields = static_cast<uint16>(
+ dict_index_get_n_unique_in_tree(index));
+ if (index->is_spatial() && !page_rec_is_leaf(rec)) {
+ ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index)
+ == DICT_INDEX_SPATIAL_NODEPTR_SIZE);
+ /* For R-tree, we have to compare
+ the child page numbers as well. */
+ cursor->old_n_fields = DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
}
cursor->old_n_core_fields = index->n_core_fields;
@@ -541,7 +535,7 @@ btr_pcur_move_to_next_page(
dberr_t err;
buf_block_t* next_block = btr_block_get(
*cursor->index(), next_page_no, cursor->latch_mode & ~12,
- page_is_leaf(page), mtr, &err);
+ mtr, &err);
if (UNIV_UNLIKELY(!next_block)) {
return err;
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index 300276ff3a6..eeb39545360 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -2,7 +2,7 @@
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -305,13 +305,6 @@ static void btr_search_info_update_hash(btr_search_t *info, btr_cur_t *cursor)
dict_index_t* index = cursor->index();
int cmp;
- if (dict_index_is_ibuf(index)) {
- /* So many deletes are performed on an insert buffer tree
- that we do not consider a hash index useful on it: */
-
- return;
- }
-
uint16_t n_unique = dict_index_get_n_unique_in_tree(index);
if (info->n_hash_potential == 0) {
@@ -712,7 +705,6 @@ btr_search_update_hash_ref(
ut_ad(block->page.id().space() == index->table->space_id);
ut_ad(index == cursor->index());
- ut_ad(!dict_index_is_ibuf(index));
auto part = btr_search_sys.get_part(*index);
part->latch.wr_lock(SRW_LOCK_CALL);
ut_ad(!block->index || block->index == index);
@@ -1057,7 +1049,7 @@ btr_search_guess_on_hash(
index_id_t index_id;
ut_ad(mtr->is_active());
- ut_ad(index->is_btree() || index->is_ibuf());
+ ut_ad(index->is_btree());
/* Note that, for efficiency, the struct info may not be protected by
any latch here! */
@@ -1267,7 +1259,6 @@ retry:
ut_ad(block->page.id().space() == index->table->space_id);
ut_a(index_id == index->id);
- ut_ad(!dict_index_is_ibuf(index));
n_fields = block->curr_n_fields;
n_bytes = block->curr_n_bytes;
@@ -1470,7 +1461,6 @@ btr_search_build_page_hash_index(
ut_ad(ahi_latch == &btr_search_sys.get_part(*index)->latch);
ut_ad(index);
ut_ad(block->page.id().space() == index->table->space_id);
- ut_ad(!dict_index_is_ibuf(index));
ut_ad(page_is_leaf(block->page.frame));
ut_ad(block->page.lock.have_x() || block->page.lock.have_s());
@@ -1796,7 +1786,6 @@ void btr_search_update_hash_on_delete(btr_cur_t *cursor)
ut_ad(block->page.id().space() == index->table->space_id);
ut_a(index == cursor->index());
ut_a(block->curr_n_fields > 0 || block->curr_n_bytes > 0);
- ut_ad(!dict_index_is_ibuf(index));
rec = btr_cur_get_rec(cursor);
@@ -1869,7 +1858,6 @@ void btr_search_update_hash_node_on_insert(btr_cur_t *cursor,
}
ut_a(cursor->index() == index);
- ut_ad(!dict_index_is_ibuf(index));
ahi_latch->wr_lock(SRW_LOCK_CALL);
if (!block->index || !btr_search_enabled) {
@@ -1962,7 +1950,6 @@ drop:
}
ut_a(index == cursor->index());
- ut_ad(!dict_index_is_ibuf(index));
n_fields = block->curr_n_fields;
n_bytes = block->curr_n_bytes;
@@ -2211,7 +2198,6 @@ btr_search_hash_table_validate(ulint hash_table_id)
invokes btr_search_drop_page_hash_index(). */
ut_a(block->page.state() == buf_page_t::REMOVE_HASH);
state_ok:
- ut_ad(!dict_index_is_ibuf(block->index));
ut_ad(block->page.id().space()
== block->index->table->space_id);
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index 85a698bc875..f43c6672a95 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2021, MariaDB Corporation.
+Copyright (c) 2018, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -454,7 +454,7 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru)
}
/* Try replacing an uncompressed page in the buffer pool. */
- block = buf_LRU_get_free_block(true);
+ block = buf_LRU_get_free_block(have_mutex);
if (lru) {
*lru = true;
}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 5339f913496..4de8b4fd175 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -50,7 +50,6 @@ Created 11/5/1995 Heikki Tuuri
#include "buf0dblwr.h"
#include "lock0lock.h"
#include "btr0sea.h"
-#include "ibuf0ibuf.h"
#include "trx0undo.h"
#include "trx0purge.h"
#include "log0log.h"
@@ -586,7 +585,7 @@ bool buf_page_is_corrupted(bool check_lsn, const byte *read_buf,
DBUG_EXECUTE_IF(
"page_intermittent_checksum_mismatch", {
static int page_counter;
- if (page_counter++ == 3) {
+ if (page_counter++ == 6) {
crc32++;
}
});
@@ -721,7 +720,8 @@ bool buf_page_is_corrupted(bool check_lsn, const byte *read_buf,
DBUG_EXECUTE_IF(
"page_intermittent_checksum_mismatch", {
static int page_counter;
- if (page_counter++ == 3) return true;
+ if (page_counter++ == 6)
+ return true;
});
if ((checksum_field1 != crc32
@@ -1856,9 +1856,6 @@ calc_buf_pool_size:
" and dictionary.";
}
- /* normalize ibuf.max_size */
- ibuf_max_size_update(srv_change_buffer_max_size);
-
if (srv_buf_pool_old_size != srv_buf_pool_size) {
buf_resize_status("Completed resizing buffer pool from %zu to %zu bytes."
@@ -1932,7 +1929,6 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage)
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked());
ut_ad(bpage == buf_pool.page_hash.get(id, chain));
- ut_ad(!buf_pool.watch_is_sentinel(*bpage));
ut_d(const auto state= bpage->state());
ut_ad(state >= buf_page_t::FREED);
ut_ad(state <= buf_page_t::READ_FIX);
@@ -1976,127 +1972,6 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage)
buf_pool.page_hash.replace(chain, bpage, dpage);
}
-buf_page_t *buf_pool_t::watch_set(const page_id_t id,
- buf_pool_t::hash_chain &chain)
-{
- ut_ad(&chain == &page_hash.cell_get(id.fold()));
- page_hash.lock_get(chain).lock();
-
- buf_page_t *bpage= page_hash.get(id, chain);
-
- if (bpage)
- {
-got_block:
- bpage->fix();
- if (watch_is_sentinel(*bpage))
- bpage= nullptr;
- page_hash.lock_get(chain).unlock();
- return bpage;
- }
-
- page_hash.lock_get(chain).unlock();
- /* Allocate a watch[] and then try to insert it into the page_hash. */
- mysql_mutex_lock(&mutex);
-
- /* The maximum number of purge tasks should never exceed
- the UT_ARR_SIZE(watch) - 1, and there is no way for a purge task to hold a
- watch when setting another watch. */
- for (buf_page_t *w= &watch[UT_ARR_SIZE(watch)]; w-- >= watch; )
- {
- ut_ad(w->access_time == 0);
- ut_ad(!w->oldest_modification());
- ut_ad(!w->zip.data);
- ut_ad(!w->in_zip_hash);
- static_assert(buf_page_t::NOT_USED == 0, "efficiency");
- if (ut_d(auto s=) w->state())
- {
- /* This watch may be in use for some other page. */
- ut_ad(s >= buf_page_t::UNFIXED);
- continue;
- }
- /* w is pointing to watch[], which is protected by mutex.
- Normally, buf_page_t::id for objects that are reachable by
- page_hash.get(id, chain) are protected by hash_lock. */
- w->set_state(buf_page_t::UNFIXED + 1);
- w->id_= id;
-
- page_hash.lock_get(chain).lock();
- bpage= page_hash.get(id, chain);
- if (UNIV_LIKELY_NULL(bpage))
- {
- w->set_state(buf_page_t::NOT_USED);
- mysql_mutex_unlock(&mutex);
- goto got_block;
- }
-
- ut_ad(w->state() == buf_page_t::UNFIXED + 1);
- buf_pool.page_hash.append(chain, w);
- mysql_mutex_unlock(&mutex);
- page_hash.lock_get(chain).unlock();
- return nullptr;
- }
-
- ut_error;
-}
-
-/** Stop watching whether a page has been read in.
-watch_set(id) must have returned nullptr before.
-@param id page identifier
-@param chain unlocked hash table chain */
-TRANSACTIONAL_TARGET
-void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain)
-{
- mysql_mutex_assert_not_owner(&mutex);
- buf_page_t *w;
- {
- transactional_lock_guard<page_hash_latch> g{page_hash.lock_get(chain)};
- /* The page must exist because watch_set() did fix(). */
- w= page_hash.get(id, chain);
- ut_ad(w->in_page_hash);
- if (!watch_is_sentinel(*w))
- {
- no_watch:
- w->unfix();
- w= nullptr;
- }
- else
- {
- const auto state= w->state();
- ut_ad(~buf_page_t::LRU_MASK & state);
- ut_ad(state >= buf_page_t::UNFIXED + 1);
- if (state != buf_page_t::UNFIXED + 1)
- goto no_watch;
- }
- }
-
- if (!w)
- return;
-
- const auto old= w;
- /* The following is based on buf_pool_t::watch_remove(). */
- mysql_mutex_lock(&mutex);
- w= page_hash.get(id, chain);
-
- {
- transactional_lock_guard<page_hash_latch> g
- {buf_pool.page_hash.lock_get(chain)};
- auto f= w->unfix();
- ut_ad(f < buf_page_t::READ_FIX || w != old);
-
- if (f == buf_page_t::UNFIXED && w == old)
- {
- page_hash.remove(chain, w);
- // Now that w is detached from page_hash, release it to watch[].
- ut_ad(w->id_ == id);
- ut_ad(!w->frame);
- ut_ad(!w->zip.data);
- w->set_state(buf_page_t::NOT_USED);
- }
- }
-
- mysql_mutex_unlock(&mutex);
-}
-
/** Mark the page status as FREED for the given tablespace and page number.
@param[in,out] space tablespace
@param[in] page page number
@@ -2178,7 +2053,7 @@ lookup:
if (hash_lock.is_locked())
xabort();
bpage= buf_pool.page_hash.get(page_id, chain);
- if (!bpage || buf_pool.watch_is_sentinel(*bpage))
+ if (!bpage)
{
xend();
goto must_read_page;
@@ -2203,7 +2078,7 @@ lookup:
{
hash_lock.lock_shared();
bpage= buf_pool.page_hash.get(page_id, chain);
- if (!bpage || buf_pool.watch_is_sentinel(*bpage))
+ if (!bpage)
{
hash_lock.unlock_shared();
goto must_read_page;
@@ -2256,7 +2131,7 @@ lookup:
return bpage;
must_read_page:
- switch (dberr_t err= buf_read_page(page_id, zip_size)) {
+ switch (dberr_t err= buf_read_page(page_id, zip_size, chain)) {
case DB_SUCCESS:
case DB_SUCCESS_LOCKED_REC:
goto lookup;
@@ -2378,13 +2253,9 @@ err_exit:
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
-BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
+or BUF_PEEK_IF_IN_POOL
@param[in] mtr mini-transaction
@param[out] err DB_SUCCESS or error code
-@param[in] allow_ibuf_merge Allow change buffer merge to happen
-while reading the page from file
-then it makes sure that it does merging of change buffer changes while
-reading the page from file.
@return pointer to the block or NULL */
TRANSACTIONAL_TARGET
buf_block_t*
@@ -2395,10 +2266,8 @@ buf_page_get_low(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
- dberr_t* err,
- bool allow_ibuf_merge)
+ dberr_t* err)
{
- unsigned access_time;
ulint retries = 0;
ut_ad(!mtr || mtr->is_active());
@@ -2415,7 +2284,6 @@ buf_page_get_low(
#ifdef UNIV_DEBUG
switch (mode) {
default:
- ut_ad(!allow_ibuf_merge);
ut_ad(mode == BUF_PEEK_IF_IN_POOL);
break;
case BUF_GET_POSSIBLY_FREED:
@@ -2424,7 +2292,6 @@ buf_page_get_low(
because it does not really matter. */
break;
case BUF_GET:
- case BUF_GET_IF_IN_POOL_OR_WATCH:
ut_ad(!mtr->is_freeing_tree());
fil_space_t* s = fil_space_get(page_id.space());
ut_ad(s);
@@ -2432,9 +2299,6 @@ buf_page_get_low(
}
#endif /* UNIV_DEBUG */
- ut_ad(!mtr || !ibuf_inside(mtr)
- || ibuf_page_low(page_id, zip_size, FALSE, NULL));
-
++buf_pool.stat.n_page_gets;
auto& chain= buf_pool.page_hash.cell_get(page_id.fold());
@@ -2467,8 +2331,7 @@ loop:
hash_lock.lock_shared();
block = reinterpret_cast<buf_block_t*>(
buf_pool.page_hash.get(page_id, chain));
- if (UNIV_LIKELY(block
- && !buf_pool.watch_is_sentinel(block->page))) {
+ if (UNIV_LIKELY(block != nullptr)) {
state = block->page.fix();
hash_lock.unlock_shared();
goto got_block;
@@ -2480,17 +2343,6 @@ loop:
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
return nullptr;
- case BUF_GET_IF_IN_POOL_OR_WATCH:
- /* Buffer-fixing inside watch_set() will prevent eviction */
- block = reinterpret_cast<buf_block_t*>
- (buf_pool.watch_set(page_id, chain));
-
- if (block) {
- state = block->page.state();
- goto got_block_fixed;
- }
-
- return nullptr;
}
/* The call path is buf_read_page() ->
@@ -2504,10 +2356,10 @@ loop:
corrupted, or if an encrypted page with a valid
checksum cannot be decypted. */
- switch (dberr_t local_err = buf_read_page(page_id, zip_size)) {
+ switch (dberr_t local_err = buf_read_page(page_id, zip_size, chain)) {
case DB_SUCCESS:
case DB_SUCCESS_LOCKED_REC:
- buf_read_ahead_random(page_id, zip_size, ibuf_inside(mtr));
+ buf_read_ahead_random(page_id, zip_size);
break;
default:
if (mode != BUF_GET_POSSIBLY_FREED
@@ -2529,7 +2381,6 @@ loop:
got_block:
ut_ad(!block->page.in_zip_hash);
state++;
-got_block_fixed:
ut_ad(state > buf_page_t::FREED);
if (state > buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX) {
@@ -2606,7 +2457,7 @@ wait_for_unzip:
goto loop;
}
- buf_block_t *new_block = buf_LRU_get_free_block(false);
+ buf_block_t *new_block = buf_LRU_get_free_block(have_no_mutex);
buf_block_init_low(new_block);
wait_for_unfix:
@@ -2630,7 +2481,6 @@ wait_for_unfix:
switch (state) {
case buf_page_t::UNFIXED + 1:
- case buf_page_t::IBUF_EXIST + 1:
case buf_page_t::REINIT + 1:
break;
default:
@@ -2684,13 +2534,6 @@ wait_for_unfix:
buf_pool.n_pend_unzip++;
- access_time = block->page.is_accessed();
-
- if (!access_time && !recv_no_ibuf_operations
- && ibuf_page_exists(block->page.id(), block->zip_size())) {
- state = buf_page_t::IBUF_EXIST + 1;
- }
-
/* Decompress the page while not holding
buf_pool.mutex. */
const auto ok = buf_zip_decompress(block, false);
@@ -2709,63 +2552,6 @@ wait_for_unfix:
}
}
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-re_evict:
- if (mode != BUF_GET_IF_IN_POOL
- && mode != BUF_GET_IF_IN_POOL_OR_WATCH) {
- } else if (!ibuf_debug || recv_recovery_is_on()) {
- } else if (fil_space_t* space = fil_space_t::get(page_id.space())) {
- for (ulint i = 0; i < mtr->get_savepoint(); i++) {
- if (buf_block_t* b = mtr->block_at_savepoint(i)) {
- if (b->page.oldest_modification() > 2
- && b->page.lock.have_any()) {
- /* We are holding a dirty page latch
- that would hang buf_flush_sync(). */
- space->release();
- goto re_evict_fail;
- }
- }
- }
-
- /* Try to evict the block from the buffer pool, to use the
- insert buffer (change buffer) as much as possible. */
-
- mysql_mutex_lock(&buf_pool.mutex);
-
- block->unfix();
-
- /* Blocks cannot be relocated or enter or exit the
- buf_pool while we are holding the buf_pool.mutex. */
- const bool evicted = buf_LRU_free_page(&block->page, true);
- space->release();
-
- if (!evicted) {
- block->fix();
- }
-
- mysql_mutex_unlock(&buf_pool.mutex);
-
- if (evicted) {
- if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
- buf_pool.watch_set(page_id, chain);
- }
- return(NULL);
- }
-
- buf_flush_sync();
-
- state = block->page.state();
-
- if (state == buf_page_t::UNFIXED + 1
- && !block->page.oldest_modification()) {
- goto re_evict;
- }
-
- /* Failed to evict the page; change it directly */
- }
-re_evict_fail:
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) {
goto ignore_block;
}
@@ -2777,112 +2563,64 @@ re_evict_fail:
#endif /* UNIV_DEBUG */
ut_ad(block->page.frame);
- if (state >= buf_page_t::UNFIXED
- && allow_ibuf_merge
- && fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX
- && page_is_leaf(block->page.frame)) {
- block->page.lock.x_lock();
- ut_ad(block->page.id() == page_id
- || (state >= buf_page_t::READ_FIX
- && state < buf_page_t::WRITE_FIX));
-
-#ifdef BTR_CUR_HASH_ADAPT
- btr_search_drop_page_hash_index(block, true);
-#endif /* BTR_CUR_HASH_ADAPT */
-
- dberr_t e;
-
+ switch (rw_latch) {
+ case RW_NO_LATCH:
+ mtr->memo_push(block, MTR_MEMO_BUF_FIX);
+ return block;
+ case RW_S_LATCH:
+ block->page.lock.s_lock();
+ ut_ad(!block->page.is_read_fixed());
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
+ block->page.lock.s_unlock();
+ block->page.lock.x_lock();
page_id_mismatch:
- state = block->page.state();
- e = DB_CORRUPTION;
-ibuf_merge_corrupted:
- if (err) {
- *err = e;
- }
-
if (block->page.id().is_corrupted()) {
- buf_pool.corrupted_evict(&block->page, state);
+ buf_pool.corrupted_evict(&block->page,
+ block->page.state());
}
- return nullptr;
- }
-
- state = block->page.state();
- ut_ad(state < buf_page_t::READ_FIX);
-
- if (state >= buf_page_t::IBUF_EXIST
- && state < buf_page_t::REINIT) {
- block->page.clear_ibuf_exist();
- e = ibuf_merge_or_delete_for_page(block, page_id,
- block->zip_size());
- if (UNIV_UNLIKELY(e != DB_SUCCESS)) {
- goto ibuf_merge_corrupted;
+ if (err) {
+ *err = DB_CORRUPTION;
}
+ return nullptr;
}
-
- if (rw_latch == RW_X_LATCH) {
- goto get_latch_valid;
- } else {
- block->page.lock.x_unlock();
- goto get_latch;
+ break;
+ case RW_SX_LATCH:
+ block->page.lock.u_lock();
+ ut_ad(!block->page.is_io_fixed());
+ if (UNIV_UNLIKELY(block->page.id() != page_id)) {
+ block->page.lock.u_x_upgrade();
+ goto page_id_mismatch;
}
- } else {
-get_latch:
- switch (rw_latch) {
- case RW_NO_LATCH:
- mtr->memo_push(block, MTR_MEMO_BUF_FIX);
+ break;
+ default:
+ ut_ad(rw_latch == RW_X_LATCH);
+ if (block->page.lock.x_lock_upgraded()) {
+ ut_ad(block->page.id() == page_id);
+ block->unfix();
+ mtr->page_lock_upgrade(*block);
return block;
- case RW_S_LATCH:
- block->page.lock.s_lock();
- ut_ad(!block->page.is_read_fixed());
- if (UNIV_UNLIKELY(block->page.id() != page_id)) {
- block->page.lock.s_unlock();
- block->page.lock.x_lock();
- goto page_id_mismatch;
- }
-get_latch_valid:
- mtr->memo_push(block, mtr_memo_type_t(rw_latch));
+ }
+ if (UNIV_UNLIKELY(block->page.id() != page_id)) {
+ goto page_id_mismatch;
+ }
+ }
+
+ mtr->memo_push(block, mtr_memo_type_t(rw_latch));
#ifdef BTR_CUR_HASH_ADAPT
- btr_search_drop_page_hash_index(block, true);
+ btr_search_drop_page_hash_index(block, true);
#endif /* BTR_CUR_HASH_ADAPT */
- break;
- case RW_SX_LATCH:
- block->page.lock.u_lock();
- ut_ad(!block->page.is_io_fixed());
- if (UNIV_UNLIKELY(block->page.id() != page_id)) {
- block->page.lock.u_x_upgrade();
- goto page_id_mismatch;
- }
- goto get_latch_valid;
- default:
- ut_ad(rw_latch == RW_X_LATCH);
- if (block->page.lock.x_lock_upgraded()) {
- ut_ad(block->page.id() == page_id);
- block->unfix();
- mtr->page_lock_upgrade(*block);
- return block;
- }
- if (UNIV_UNLIKELY(block->page.id() != page_id)) {
- goto page_id_mismatch;
- }
- goto get_latch_valid;
- }
- ut_ad(page_id_t(page_get_space_id(block->page.frame),
- page_get_page_no(block->page.frame))
- == page_id);
+ ut_ad(page_id_t(page_get_space_id(block->page.frame),
+ page_get_page_no(block->page.frame)) == page_id);
- if (mode == BUF_GET_POSSIBLY_FREED
- || mode == BUF_PEEK_IF_IN_POOL) {
- return block;
- }
+ if (mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL) {
+ return block;
+ }
- const bool not_first_access{block->page.set_accessed()};
- buf_page_make_young_if_needed(&block->page);
- if (!not_first_access) {
- buf_read_ahead_linear(page_id, block->zip_size(),
- ibuf_inside(mtr));
- }
+ const bool not_first_access{block->page.set_accessed()};
+ buf_page_make_young_if_needed(&block->page);
+ if (!not_first_access) {
+ buf_read_ahead_linear(page_id, block->zip_size());
}
return block;
@@ -2894,11 +2632,9 @@ get_latch_valid:
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
-BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
+or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL
@param[out] err DB_SUCCESS or error code
-@param[in] allow_ibuf_merge Allow change buffer merge while
-reading the pages from file.
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_gen(
@@ -2908,13 +2644,12 @@ buf_page_get_gen(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
- dberr_t* err,
- bool allow_ibuf_merge)
+ dberr_t* err)
{
buf_block_t *block= recv_sys.recover(page_id);
if (UNIV_LIKELY(!block))
return buf_page_get_low(page_id, zip_size, rw_latch,
- guess, mode, mtr, err, allow_ibuf_merge);
+ guess, mode, mtr, err);
else if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
{
corrupted:
@@ -2922,19 +2657,16 @@ buf_page_get_gen(
*err= DB_CORRUPTION;
return nullptr;
}
+ if (err)
+ *err= DB_SUCCESS;
/* Recovery is a special case; we fix() before acquiring lock. */
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a lock, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
- if (err)
- *err= DB_SUCCESS;
- const bool must_merge= allow_ibuf_merge &&
- ibuf_page_exists(page_id, block->zip_size());
if (s < buf_page_t::UNFIXED)
{
- got_freed_page:
ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
mysql_mutex_lock(&buf_pool.mutex);
block->page.unfix();
@@ -2942,40 +2674,7 @@ buf_page_get_gen(
mysql_mutex_unlock(&buf_pool.mutex);
goto corrupted;
}
- else if (must_merge &&
- fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX &&
- page_is_leaf(block->page.frame))
- {
- block->page.lock.x_lock();
- s= block->page.state();
- ut_ad(s > buf_page_t::FREED);
- ut_ad(s < buf_page_t::READ_FIX);
- if (s < buf_page_t::UNFIXED)
- {
- block->page.lock.x_unlock();
- goto got_freed_page;
- }
- else
- {
- if (block->page.is_ibuf_exist())
- block->page.clear_ibuf_exist();
- if (dberr_t e=
- ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()))
- {
- if (err)
- *err= e;
- buf_pool.corrupted_evict(&block->page, s);
- return nullptr;
- }
- }
- if (rw_latch == RW_X_LATCH)
- {
- mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
- return block;
- }
- block->page.lock.x_unlock();
- }
mtr->page_lock(block, rw_latch);
return block;
}
@@ -3042,7 +2741,6 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
{
ut_ad(rw_latch == RW_S_LATCH || !block->page.is_io_fixed());
ut_ad(id == block->page.id());
- ut_ad(!ibuf_inside(mtr) || ibuf_page(id, block->zip_size(), nullptr));
if (modify_clock != block->modify_clock || block->page.is_freed())
{
@@ -3137,12 +2835,11 @@ retry:
buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain);
- if (bpage && !buf_pool.watch_is_sentinel(*bpage))
+ if (bpage)
{
#ifdef BTR_CUR_HASH_ADAPT
const dict_index_t *drop_hash_entry= nullptr;
#endif
- bool ibuf_exist= false;
if (!mtr->have_x_latch(reinterpret_cast<const buf_block_t&>(*bpage)))
{
@@ -3168,10 +2865,7 @@ retry:
if (state < buf_page_t::UNFIXED)
bpage->set_reinit(buf_page_t::FREED);
else
- {
bpage->set_reinit(state & buf_page_t::LRU_MASK);
- ibuf_exist= (state & buf_page_t::LRU_MASK) == buf_page_t::IBUF_EXIST;
- }
if (UNIV_LIKELY(bpage->frame != nullptr))
{
@@ -3197,10 +2891,7 @@ retry:
if (state < buf_page_t::UNFIXED)
bpage->set_reinit(buf_page_t::FREED);
else
- {
bpage->set_reinit(state & buf_page_t::LRU_MASK);
- ibuf_exist= (state & buf_page_t::LRU_MASK) == buf_page_t::IBUF_EXIST;
- }
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_relocate(bpage, &free_block->page);
@@ -3240,9 +2931,6 @@ retry:
false);
#endif /* BTR_CUR_HASH_ADAPT */
- if (ibuf_exist && !recv_recovery_is_on())
- ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size);
-
return reinterpret_cast<buf_block_t*>(bpage);
}
@@ -3283,13 +2971,6 @@ retry:
bpage->set_accessed();
- /* Delete possible entries for the page from the insert buffer:
- such can exist if the page belonged to an index which was dropped */
- if (page_id < page_id_t{SRV_SPACE_ID_UPPER_BOUND, 0} &&
- !srv_is_undo_tablespace(page_id.space()) &&
- !recv_recovery_is_on())
- ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size);
-
static_assert(FIL_PAGE_PREV + 4 == FIL_PAGE_NEXT, "adjacent");
memset_aligned<8>(bpage->frame + FIL_PAGE_PREV, 0xff, 8);
mach_write_to_2(bpage->frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
@@ -3353,32 +3034,15 @@ ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read)
const byte* frame = bpage.zip.data ? bpage.zip.data : bpage.frame;
switch (fil_page_get_type(frame)) {
- ulint level;
case FIL_PAGE_TYPE_INSTANT:
case FIL_PAGE_INDEX:
case FIL_PAGE_RTREE:
- level = btr_page_get_level(frame);
-
- /* Check if it is an index page for insert buffer */
- if (fil_page_get_type(frame) == FIL_PAGE_INDEX
- && btr_page_get_index_id(frame)
- == (index_id_t)(DICT_IBUF_ID_MIN + IBUF_SPACE_ID)) {
- if (level == 0) {
- counter = MONITOR_RW_COUNTER(
- read, MONITOR_INDEX_IBUF_LEAF_PAGE);
- } else {
- counter = MONITOR_RW_COUNTER(
- read,
- MONITOR_INDEX_IBUF_NON_LEAF_PAGE);
- }
+ if (page_is_leaf(frame)) {
+ counter = MONITOR_RW_COUNTER(
+ read, MONITOR_INDEX_LEAF_PAGE);
} else {
- if (level == 0) {
- counter = MONITOR_RW_COUNTER(
- read, MONITOR_INDEX_LEAF_PAGE);
- } else {
- counter = MONITOR_RW_COUNTER(
- read, MONITOR_INDEX_NON_LEAF_PAGE);
- }
+ counter = MONITOR_RW_COUNTER(
+ read, MONITOR_INDEX_NON_LEAF_PAGE);
}
break;
@@ -3390,14 +3054,6 @@ ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read)
counter = MONITOR_RW_COUNTER(read, MONITOR_INODE_PAGE);
break;
- case FIL_PAGE_IBUF_FREE_LIST:
- counter = MONITOR_RW_COUNTER(read, MONITOR_IBUF_FREELIST_PAGE);
- break;
-
- case FIL_PAGE_IBUF_BITMAP:
- counter = MONITOR_RW_COUNTER(read, MONITOR_IBUF_BITMAP_PAGE);
- break;
-
case FIL_PAGE_TYPE_SYS:
counter = MONITOR_RW_COUNTER(read, MONITOR_SYSTEM_PAGE);
break;
@@ -3613,41 +3269,30 @@ database_corrupted:
<< FORCE_RECOVERY_MSG;
}
- if (!srv_force_recovery)
- goto release_page;
- }
-
- if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED)
- {
+ if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED ||
+ !srv_force_recovery)
+ {
release_page:
- buf_pool.corrupted_evict(this, buf_page_t::READ_FIX);
- return err;
+ buf_pool.corrupted_evict(this, buf_page_t::READ_FIX);
+ return err;
+ }
}
- const bool recovery= recv_recovery_is_on();
+ const bool recovery= frame && recv_recovery_is_on();
if (recovery && !recv_recover_page(node.space, this))
return DB_PAGE_CORRUPTED;
- const bool ibuf_may_exist= frame && !recv_no_ibuf_operations &&
- (!expected_id.space() || !is_predefined_tablespace(expected_id.space())) &&
- fil_page_get_type(read_frame) == FIL_PAGE_INDEX &&
- page_is_leaf(read_frame);
-
if (UNIV_UNLIKELY(MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE)))
buf_page_monitor(*this, true);
DBUG_PRINT("ib_buf", ("read page %u:%u", id().space(), id().page_no()));
if (!recovery)
{
- ut_d(auto f=) zip.fix.fetch_sub(ibuf_may_exist
- ? READ_FIX - IBUF_EXIST
- : READ_FIX - UNFIXED);
+ ut_d(auto f=) zip.fix.fetch_sub(READ_FIX - UNFIXED);
ut_ad(f >= READ_FIX);
ut_ad(f < WRITE_FIX);
}
- else if (ibuf_may_exist)
- set_ibuf_exist();
lock.x_unlock(true);
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 3fa26dab25f..0072d0c22b8 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
Copyright (c) 2013, 2014, Fusion-io
This program is free software; you can redistribute it and/or modify it under
@@ -873,7 +873,7 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold,
const buf_page_t *bpage=
buf_pool.page_hash.get(id, buf_pool.page_hash.cell_get(fold));
- if (!bpage || buf_pool.watch_is_sentinel(*bpage))
+ if (!bpage)
return false;
/* We avoid flushing 'non-old' blocks in an eviction flush, because the
@@ -1059,7 +1059,6 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
{
ut_ad(bpage == b);
bpage= nullptr;
- ut_ad(!buf_pool.watch_is_sentinel(*b));
ut_ad(b->oldest_modification() > 1);
flush:
if (b->flush(evict, space))
@@ -1070,7 +1069,7 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
}
/* We avoid flushing 'non-old' blocks in an eviction flush,
because the flushed blocks are soon freed */
- else if ((!evict || b->is_old()) && !buf_pool.watch_is_sentinel(*b) &&
+ else if ((!evict || b->is_old()) &&
b->oldest_modification() > 1 && b->lock.u_lock_try(true))
{
if (b->oldest_modification() < 2)
@@ -1169,7 +1168,7 @@ static void buf_flush_discard_page(buf_page_t *bpage)
ut_d(const auto state= bpage->state());
ut_ad(state == buf_page_t::FREED || state == buf_page_t::UNFIXED ||
- state == buf_page_t::IBUF_EXIST || state == buf_page_t::REINIT);
+ state == buf_page_t::REINIT);
bpage->lock.u_unlock(true);
buf_LRU_free_page(bpage, true);
}
@@ -1744,7 +1743,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept
resize_log.write(CHECKPOINT_1, {c, get_block_size()});
}
- if (srv_file_flush_method != SRV_O_DSYNC)
+ if (!log_write_through)
ut_a(log.flush());
latch.wr_lock(SRW_LOCK_CALL);
ut_ad(checkpoint_pending);
@@ -1776,7 +1775,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept
if (!is_pmem())
{
- if (srv_file_flush_method != SRV_O_DSYNC)
+ if (!log_write_through)
ut_a(resize_log.flush());
IF_WIN(log.close(),);
}
@@ -1922,13 +1921,7 @@ static bool log_checkpoint()
if (recv_recovery_is_on())
recv_sys.apply(true);
- switch (srv_file_flush_method) {
- case SRV_NOSYNC:
- case SRV_O_DIRECT_NO_FSYNC:
- break;
- default:
- fil_flush_file_spaces();
- }
+ fil_flush_file_spaces();
log_sys.latch.wr_lock(SRW_LOCK_CALL);
const lsn_t end_lsn= log_sys.get_lsn();
@@ -2073,13 +2066,7 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
MONITOR_FLUSH_SYNC_PAGES, n_flushed);
}
- switch (srv_file_flush_method) {
- case SRV_NOSYNC:
- case SRV_O_DIRECT_NO_FSYNC:
- break;
- default:
- fil_flush_file_spaces();
- }
+ fil_flush_file_spaces();
log_sys.latch.wr_lock(SRW_LOCK_CALL);
const lsn_t newest_lsn= log_sys.get_lsn();
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 1a0e481ece4..e4e20e8335f 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -385,14 +385,15 @@ we put it to free list to be used.
* scan whole LRU list
* scan LRU list even if buf_pool.try_LRU_scan is not set
-@param have_mutex whether buf_pool.mutex is already being held
-@return the free control block, in state BUF_BLOCK_MEMORY */
-buf_block_t *buf_LRU_get_free_block(bool have_mutex)
+@param get how to allocate the block
+@return the free control block, in state BUF_BLOCK_MEMORY
+@retval nullptr if get==have_no_mutex_soft and memory was not available */
+buf_block_t* buf_LRU_get_free_block(buf_LRU_get get)
{
ulint n_iterations = 0;
ulint flush_failures = 0;
MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
- if (have_mutex) {
+ if (UNIV_UNLIKELY(get == have_mutex)) {
mysql_mutex_assert_owner(&buf_pool.mutex);
goto got_mutex;
}
@@ -411,13 +412,14 @@ got_mutex:
DBUG_EXECUTE_IF("ib_lru_force_no_free_page",
if (!buf_lru_free_blocks_error_printed) {
n_iterations = 21;
+ block = nullptr;
goto not_found;});
retry:
/* If there is a block in the free list, take it */
if ((block = buf_LRU_get_free_only()) != nullptr) {
got_block:
- if (!have_mutex) {
+ if (UNIV_LIKELY(get != have_mutex)) {
mysql_mutex_unlock(&buf_pool.mutex);
}
block->page.zip.clear();
@@ -441,6 +443,11 @@ got_block:
buf_pool.try_LRU_scan = false;
}
+ if (get == have_no_mutex_soft) {
+ mysql_mutex_unlock(&buf_pool.mutex);
+ return nullptr;
+ }
+
for (;;) {
if ((block = buf_LRU_get_free_only()) != nullptr) {
goto got_block;
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index fa91939acee..bbd905365ed 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -35,7 +35,7 @@ Created 11/5/1995 Heikki Tuuri
#include "buf0lru.h"
#include "buf0buddy.h"
#include "buf0dblwr.h"
-#include "ibuf0ibuf.h"
+#include "page0zip.h"
#include "log0recv.h"
#include "trx0sys.h"
#include "os0file.h"
@@ -43,122 +43,89 @@ Created 11/5/1995 Heikki Tuuri
#include "srv0srv.h"
#include "log.h"
+TRANSACTIONAL_TARGET
+bool buf_pool_t::page_hash_contains(const page_id_t page_id, hash_chain &chain)
+{
+ transactional_shared_lock_guard<page_hash_latch> g
+ {page_hash.lock_get(chain)};
+ return page_hash.get(page_id, chain);
+}
+
/** If there are buf_pool.curr_size per the number below pending reads, then
read-ahead is not done: this is to prevent flooding the buffer pool with
i/o-fixed buffer blocks */
#define BUF_READ_AHEAD_PEND_LIMIT 2
-/** Remove the sentinel block for the watch before replacing it with a
-real block. watch_unset() or watch_occurred() will notice
-that the block has been replaced with the real block.
-@param w sentinel
-@param chain locked hash table chain
-@return w->state() */
-inline uint32_t buf_pool_t::watch_remove(buf_page_t *w,
- buf_pool_t::hash_chain &chain)
-{
- mysql_mutex_assert_owner(&buf_pool.mutex);
- ut_ad(xtest() || page_hash.lock_get(chain).is_write_locked());
- ut_ad(w >= &watch[0]);
- ut_ad(w < &watch[array_elements(watch)]);
- ut_ad(!w->in_zip_hash);
- ut_ad(!w->zip.data);
-
- uint32_t s{w->state()};
- w->set_state(buf_page_t::NOT_USED);
- ut_ad(s >= buf_page_t::UNFIXED);
- ut_ad(s < buf_page_t::READ_FIX);
-
- if (~buf_page_t::LRU_MASK & s)
- page_hash.remove(chain, w);
-
- ut_ad(!w->in_page_hash);
- w->id_= page_id_t(~0ULL);
- return s;
-}
-
/** Initialize a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or
-(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
-(3) if the space is deleted or being deleted,
+(2) if the tablespace has been or is being deleted,
then this function does nothing.
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
on the buffer frame. The io-handler must take care that the flag is cleared
and the lock released later.
-@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] unzip whether the uncompressed page is
- requested (for ROW_FORMAT=COMPRESSED)
+@param page_id page identifier
+@param zip_size ROW_FORMAT=COMPRESSED page size, or 0,
+ bitwise-ORed with 1 in recovery
+@param chain buf_pool.page_hash cell for page_id
+@param block preallocated buffer block (set to nullptr if consumed)
@return pointer to the block
-@retval NULL in case of an error */
+@retval nullptr in case of an error */
TRANSACTIONAL_TARGET
-static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
- ulint zip_size, bool unzip)
+static buf_page_t *buf_page_init_for_read(const page_id_t page_id,
+ ulint zip_size,
+ buf_pool_t::hash_chain &chain,
+ buf_block_t *&block)
{
- mtr_t mtr;
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY)
- {
- /* It is a read-ahead within an ibuf routine */
- ut_ad(!ibuf_bitmap_page(page_id, zip_size));
- ibuf_mtr_start(&mtr);
-
- if (!recv_no_ibuf_operations && !ibuf_page(page_id, zip_size, &mtr))
- {
- ibuf_mtr_commit(&mtr);
- return nullptr;
- }
- }
- else
- ut_ad(mode == BUF_READ_ANY_PAGE);
-
buf_page_t *bpage= nullptr;
- buf_block_t *block= nullptr;
- if (!zip_size || unzip || recv_recovery_is_on())
+ if (!zip_size || (zip_size & 1))
{
- block= buf_LRU_get_free_block(false);
- block->initialise(page_id, zip_size, buf_page_t::READ_FIX);
+ bpage= &block->page;
+ block->initialise(page_id, zip_size & ~1, buf_page_t::READ_FIX);
/* x_unlock() will be invoked
in buf_page_t::read_complete() by the io-handler thread. */
block->page.lock.x_lock(true);
}
- buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
-
- mysql_mutex_lock(&buf_pool.mutex);
-
- buf_page_t *hash_page= buf_pool.page_hash.get(page_id, chain);
- if (hash_page && !buf_pool.watch_is_sentinel(*hash_page))
+ page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
+ hash_lock.lock();
+ if (buf_pool.page_hash.get(page_id, chain))
{
+page_exists:
+ hash_lock.unlock();
/* The page is already in the buffer pool. */
- if (block)
+ if (bpage)
{
- block->page.lock.x_unlock(true);
- ut_d(block->page.set_state(buf_page_t::MEMORY));
- buf_LRU_block_free_non_file_page(block);
+ bpage->lock.x_unlock(true);
+ ut_d(mysql_mutex_lock(&buf_pool.mutex));
+ ut_d(bpage->set_state(buf_page_t::MEMORY));
+ ut_d(mysql_mutex_unlock(&buf_pool.mutex));
}
- goto func_exit;
+ return nullptr;
}
- if (UNIV_LIKELY(block != nullptr))
+ if (UNIV_UNLIKELY(mysql_mutex_trylock(&buf_pool.mutex)))
{
- bpage= &block->page;
-
- /* Insert into the hash table of file pages */
+ hash_lock.unlock();
+ mysql_mutex_lock(&buf_pool.mutex);
+ hash_lock.lock();
+ if (buf_pool.page_hash.get(page_id, chain))
{
- transactional_lock_guard<page_hash_latch> g
- {buf_pool.page_hash.lock_get(chain)};
+ mysql_mutex_unlock(&buf_pool.mutex);
+ goto page_exists;
+ }
+ }
- if (hash_page)
- bpage->set_state(buf_pool.watch_remove(hash_page, chain) +
- (buf_page_t::READ_FIX - buf_page_t::UNFIXED));
+ zip_size&= ~1;
- buf_pool.page_hash.append(chain, &block->page);
- }
+ if (UNIV_LIKELY(bpage != nullptr))
+ {
+ block= nullptr;
+ /* Insert into the hash table of file pages */
+ buf_pool.page_hash.append(chain, bpage);
+ hash_lock.unlock();
/* The block must be put to the LRU list, to the old blocks */
- buf_LRU_add_block(&block->page, true/* to old blocks */);
+ buf_LRU_add_block(bpage, true/* to old blocks */);
if (UNIV_UNLIKELY(zip_size))
{
@@ -166,19 +133,19 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
buf_buddy_alloc(). We must defer this operation until after the
block descriptor has been added to buf_pool.LRU and
buf_pool.page_hash. */
- block->page.zip.data= static_cast<page_zip_t*>
- (buf_buddy_alloc(zip_size));
+ bpage->zip.data= static_cast<page_zip_t*>(buf_buddy_alloc(zip_size));
/* To maintain the invariant
block->in_unzip_LRU_list == block->page.belongs_to_unzip_LRU()
we have to add this block to unzip_LRU
after block->page.zip.data is set. */
- ut_ad(block->page.belongs_to_unzip_LRU());
- buf_unzip_LRU_add_block(block, TRUE);
+ ut_ad(bpage->belongs_to_unzip_LRU());
+ buf_unzip_LRU_add_block(reinterpret_cast<buf_block_t*>(bpage), TRUE);
}
}
else
{
+ hash_lock.unlock();
/* The compressed page must be allocated before the
control block (bpage), in order to avoid the
invocation of buf_buddy_relocate_block() on
@@ -191,9 +158,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
check the page_hash again, as it may have been modified. */
if (UNIV_UNLIKELY(lru))
{
- hash_page= buf_pool.page_hash.get(page_id, chain);
-
- if (UNIV_UNLIKELY(hash_page && !buf_pool.watch_is_sentinel(*hash_page)))
+ if (UNIV_LIKELY_NULL(buf_pool.page_hash.get(page_id, chain)))
{
/* The block was added by some other thread. */
buf_buddy_free(data, zip_size);
@@ -213,11 +178,6 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
{
transactional_lock_guard<page_hash_latch> g
{buf_pool.page_hash.lock_get(chain)};
-
- if (hash_page)
- bpage->set_state(buf_pool.watch_remove(hash_page, chain) +
- (buf_page_t::READ_FIX - buf_page_t::UNFIXED));
-
buf_pool.page_hash.append(chain, bpage);
}
@@ -229,13 +189,9 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
buf_pool.stat.n_pages_read++;
mysql_mutex_unlock(&buf_pool.mutex);
buf_pool.n_pend_reads++;
- goto func_exit_no_mutex;
+ return bpage;
func_exit:
mysql_mutex_unlock(&buf_pool.mutex);
-func_exit_no_mutex:
- if (mode == BUF_READ_IBUF_PAGES_ONLY)
- ibuf_mtr_commit(&mtr);
-
ut_ad(!bpage || bpage->in_file());
return bpage;
@@ -246,54 +202,31 @@ buffer buf_pool if it is not already there, in which case does nothing.
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
flag is cleared and the x-lock released by an i/o-handler thread.
+@param[in] page_id page id
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0,
+ bitwise-ORed with 1 in recovery
+@param[in,out] chain buf_pool.page_hash cell for page_id
@param[in,out] space tablespace
+@param[in,out] block preallocated buffer block
@param[in] sync true if synchronous aio is desired
-@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] unzip true=request uncompressed page
@return error code
@retval DB_SUCCESS if the page was read
@retval DB_SUCCESS_LOCKED_REC if the page exists in the buffer pool already */
static
dberr_t
buf_read_page_low(
- fil_space_t* space,
- bool sync,
- ulint mode,
const page_id_t page_id,
ulint zip_size,
- bool unzip)
+ buf_pool_t::hash_chain& chain,
+ fil_space_t* space,
+ buf_block_t*& block,
+ bool sync = false)
{
buf_page_t* bpage;
- if (buf_dblwr.is_inside(page_id)) {
- ib::error() << "Trying to read doublewrite buffer page "
- << page_id;
- ut_ad(0);
- space->release();
- return DB_PAGE_CORRUPTED;
- }
-
- if (sync) {
- } else if (trx_sys_hdr_page(page_id)
- || ibuf_bitmap_page(page_id, zip_size)
- || (!recv_no_ibuf_operations
- && ibuf_page(page_id, zip_size, nullptr))) {
-
- /* Trx sys header is so low in the latching order that we play
- safe and do not leave the i/o-completion to an asynchronous
- i/o-thread. Change buffer pages must always be read with
- synchronous i/o, to make sure they do not get involved in
- thread deadlocks. */
- sync = true;
- }
+ ut_ad(!buf_dblwr.is_inside(page_id));
- /* The following call will also check if the tablespace does not exist
- or is being dropped; if we succeed in initing the page in the buffer
- pool for read, then DISCARD cannot proceed until the read has
- completed */
- bpage = buf_page_init_for_read(mode, page_id, zip_size, unzip);
+ bpage = buf_page_init_for_read(page_id, zip_size, chain, block);
if (!bpage) {
space->release();
@@ -308,10 +241,10 @@ buf_read_page_low(
DBUG_LOG("ib_buf",
"read page " << page_id << " zip_size=" << zip_size
- << " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
+ << (sync ? " sync" : " async"));
- void* dst = zip_size ? bpage->zip.data : bpage->frame;
- const ulint len = zip_size ? zip_size : srv_page_size;
+ void* dst = zip_size > 1 ? bpage->zip.data : bpage->frame;
+ const ulint len = zip_size & ~1 ? zip_size & ~1 : srv_page_size;
auto fio = space->io(IORequest(sync
? IORequest::READ_SYNC
@@ -335,25 +268,35 @@ buf_read_page_low(
return fio.err;
}
+/** Acquire a buffer block. */
+static buf_block_t *buf_read_acquire()
+{
+ return buf_LRU_get_free_block(have_no_mutex_soft);
+}
+
+/** Free a buffer block if needed. */
+static void buf_read_release(buf_block_t *block)
+{
+ if (block)
+ {
+ mysql_mutex_lock(&buf_pool.mutex);
+ buf_LRU_block_free_non_file_page(block);
+ mysql_mutex_unlock(&buf_pool.mutex);
+ }
+}
+
/** Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
-mechanism is not activated. NOTE 1: the calling thread may own latches on
+mechanism is not activated. NOTE: the calling thread may own latches on
pages: to avoid deadlocks this function must be written such that it cannot
-end up waiting for these latches! NOTE 2: the calling thread must want
-access to the page given: this rule is set to prevent unintended read-aheads
-performed by ibuf routines, a situation which could result in a deadlock if
-the OS does not support asynchronous i/o.
+end up waiting for these latches!
@param[in] page_id page id of a page which the current thread
wants to access
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] ibuf whether we are inside ibuf routine
-@return number of page read requests issued; NOTE that if we read ibuf
-pages, it may happen that the page at the given page number does not
-get read even if we return a positive value! */
+@return number of page read requests issued */
TRANSACTIONAL_TARGET
-ulint
-buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
+ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)
{
if (!srv_random_read_ahead)
return 0;
@@ -362,11 +305,6 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
/* No read-ahead to avoid thread deadlocks */
return 0;
- if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
- /* If it is an ibuf bitmap page or trx sys hdr, we do no
- read-ahead, as that could break the ibuf page access order */
- return 0;
-
if (buf_pool.n_pend_reads > buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT)
return 0;
@@ -402,18 +340,23 @@ read_ahead:
goto no_read_ahead;
/* Read all the suitable blocks within the area */
- const ulint ibuf_mode= ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
+ buf_block_t *block= nullptr;
+ if (!zip_size && !(block= buf_read_acquire()))
+ goto no_read_ahead;
for (page_id_t i= low; i < high; ++i)
{
- if (ibuf_bitmap_page(i, zip_size))
- continue;
if (space->is_stopping())
break;
+ buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold());
space->reacquire();
- if (buf_read_page_low(space, false, ibuf_mode, i, zip_size, false) ==
- DB_SUCCESS)
+ if (buf_read_page_low(i, zip_size, chain, space, block) == DB_SUCCESS)
+ {
count++;
+ ut_ad(!block);
+ if (!zip_size && !(block= buf_read_acquire()))
+ break;
+ }
}
if (count)
@@ -430,6 +373,7 @@ read_ahead:
}
space->release();
+ buf_read_release(block);
return count;
}
@@ -437,15 +381,17 @@ read_ahead:
if it is not already there. Sets the io_fix and an exclusive lock
on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@retval DB_SUCCESS if the page was read and is not corrupted
+@param page_id page id
+@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@param chain buf_pool.page_hash cell for page_id
+@retval DB_SUCCESS if the page was read and is not corrupted,
@retval DB_SUCCESS_LOCKED_REC if the page was not read
-@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
+@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
+dberr_t buf_read_page(const page_id_t page_id, ulint zip_size,
+ buf_pool_t::hash_chain &chain)
{
fil_space_t *space= fil_space_t::get(page_id.space());
if (!space)
@@ -455,9 +401,20 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
return DB_TABLESPACE_DELETED;
}
- buf_LRU_stat_inc_io(); /* NOT protected by buf_pool.mutex */
- return buf_read_page_low(space, true, BUF_READ_ANY_PAGE,
- page_id, zip_size, false);
+ /* Our caller should already have ensured that the page does not
+ exist in buf_pool.page_hash. */
+ buf_block_t *block= nullptr;
+ if (UNIV_LIKELY(!zip_size))
+ {
+ mysql_mutex_lock(&buf_pool.mutex);
+ buf_LRU_stat_inc_io();
+ block= buf_LRU_get_free_block(have_mutex);
+ mysql_mutex_unlock(&buf_pool.mutex);
+ }
+
+ dberr_t err= buf_read_page_low(page_id, zip_size, chain, space, block, true);
+ buf_read_release(block);
+ return err;
}
/** High-level function which reads a page asynchronously from a file to the
@@ -470,15 +427,30 @@ released by the i/o-handler thread.
void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
ulint zip_size)
{
- buf_read_page_low(space, false, BUF_READ_ANY_PAGE,
- page_id, zip_size, false);
-
- /* We do not increment number of I/O operations used for LRU policy
- here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
- about evicting uncompressed version of compressed pages from the
- buffer pool. Since this function is called from buffer pool load
- these IOs are deliberate and are not part of normal workload we can
- ignore these in our heuristics. */
+ buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
+ if (buf_pool.page_hash_contains(page_id, chain))
+ {
+ skip:
+ space->release();
+ return;
+ }
+
+ buf_block_t *block= nullptr;
+ if (!zip_size && !(block= buf_read_acquire()))
+ goto skip;
+
+ if (buf_read_page_low(page_id, zip_size, chain, space, block) ==
+ DB_SUCCESS)
+ ut_ad(!block);
+ else
+ buf_read_release(block);
+
+ /* We do not increment number of I/O operations used for LRU policy
+ here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
+ about evicting uncompressed version of ROW_FORMAT=COMPRESSED pages
+ from the buffer pool. Since this function is called from buffer pool
+ load these IOs are deliberate and are not part of normal workload we
+ can ignore these in our heuristics. */
}
/** Applies linear read-ahead if in the buf_pool the page is a border page of
@@ -500,16 +472,11 @@ only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
-NOTE 3: the calling thread must want access to the page given: this rule is
-set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io.
@param[in] page_id page id; see NOTE 3 above
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] ibuf whether if we are inside ibuf routine
@return number of page read requests issued */
TRANSACTIONAL_TARGET
-ulint
-buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
+ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
{
/* check if readahead is disabled */
if (!srv_read_ahead_threshold)
@@ -534,11 +501,6 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
/* This is not a border page of the area */
return 0;
- if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
- /* If it is an ibuf bitmap page or trx sys hdr, we do no
- read-ahead, as that could break the ibuf page access order */
- return 0;
-
fil_space_t *space= fil_space_t::get(page_id.space());
if (!space)
return 0;
@@ -621,18 +583,25 @@ failed:
}
/* If we got this far, read-ahead can be sensible: do it */
+ buf_block_t *block= nullptr;
+ if (!zip_size && !(block= buf_read_acquire()))
+ goto fail;
+
count= 0;
- for (ulint ibuf_mode= ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
- new_low != new_high_1; ++new_low)
+ for (; new_low != new_high_1; ++new_low)
{
- if (ibuf_bitmap_page(new_low, zip_size))
- continue;
if (space->is_stopping())
break;
+ buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(new_low.fold());
space->reacquire();
- if (buf_read_page_low(space, false, ibuf_mode, new_low, zip_size, false) ==
+ if (buf_read_page_low(new_low, zip_size, chain, space, block) ==
DB_SUCCESS)
+ {
count++;
+ ut_ad(!block);
+ if (!zip_size && !(block= buf_read_acquire()))
+ break;
+ }
}
if (count)
@@ -649,6 +618,7 @@ failed:
}
space->release();
+ buf_read_release(block);
return count;
}
@@ -671,7 +641,8 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
return;
}
- const ulint zip_size = space->zip_size();
+ const ulint zip_size = space->zip_size() | 1;
+ buf_block_t* block = buf_LRU_get_free_block(have_no_mutex);
for (ulint i = 0; i < page_nos.size(); i++) {
@@ -701,10 +672,16 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
}
}
+ buf_pool_t::hash_chain& chain =
+ buf_pool.page_hash.cell_get(cur_page_id.fold());
space->reacquire();
- switch (buf_read_page_low(space, false, BUF_READ_ANY_PAGE,
- cur_page_id, zip_size, true)) {
- case DB_SUCCESS: case DB_SUCCESS_LOCKED_REC:
+ switch (buf_read_page_low(cur_page_id, zip_size, chain, space,
+ block)) {
+ case DB_SUCCESS:
+ ut_ad(!block);
+ block = buf_LRU_get_free_block(have_no_mutex);
+ break;
+ case DB_SUCCESS_LOCKED_REC:
break;
default:
sql_print_error("InnoDB: Recovery failed to read page "
@@ -712,10 +689,12 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
cur_page_id.page_no(),
space->chain.start->name);
}
+ ut_ad(block);
}
-
- DBUG_PRINT("ib_buf", ("recovery read (%zu pages) for %s",
+ DBUG_PRINT("ib_buf", ("recovery read (%zu pages) for %s",
page_nos.size(), space->chain.start->name));
space->release();
+
+ buf_read_release(block);
}
diff --git a/storage/innobase/data/data0type.cc b/storage/innobase/data/data0type.cc
index b1952bcc2a4..dc1c4b9a04a 100644
--- a/storage/innobase/data/data0type.cc
+++ b/storage/innobase/data/data0type.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2019, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,13 +33,6 @@ const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN] = {
0x80, 0, 0, 0, 0, 0, 0
};
-/* At the database startup we store the default-charset collation number of
-this MySQL installation to this global variable. If we have < 4.1.2 format
-column definitions, or records in the insert buffer, we use this
-charset-collation code for them. */
-
-ulint data_mysql_default_charset_coll;
-
/*********************************************************************//**
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
index 5516bce920b..316d0f01322 100644
--- a/storage/innobase/dict/dict0boot.cc
+++ b/storage/innobase/dict/dict0boot.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +30,6 @@ Created 4/18/1996 Heikki Tuuri
#include "dict0load.h"
#include "trx0trx.h"
#include "srv0srv.h"
-#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "log0recv.h"
#include "os0file.h"
@@ -94,18 +93,6 @@ dict_hdr_get_new_id(
mtr.commit();
}
-/** Update dict_sys.row_id in the dictionary header file page. */
-void dict_hdr_flush_row_id(row_id_t id)
-{
- mtr_t mtr;
- mtr.start();
- buf_block_t* d= dict_hdr_get(&mtr);
- byte *row_id= DICT_HDR + DICT_HDR_ROW_ID + d->page.frame;
- if (mach_read_from_8(row_id) < id)
- mtr.write<8>(*d, row_id, id);
- mtr.commit();
-}
-
/** Create the DICT_HDR page on database initialization.
@return error code */
dberr_t dict_create()
@@ -127,10 +114,8 @@ dberr_t dict_create()
}
ut_a(d->page.id() == hdr_page_id);
- /* Start counting row, table, index, and tree ids from
+ /* Start counting table, index, and tree ids from
DICT_HDR_FIRST_ID */
- mtr.write<8>(*d, DICT_HDR + DICT_HDR_ROW_ID + d->page.frame,
- DICT_HDR_FIRST_ID);
mtr.write<8>(*d, DICT_HDR + DICT_HDR_TABLE_ID + d->page.frame,
DICT_HDR_FIRST_ID);
mtr.write<8>(*d, DICT_HDR + DICT_HDR_INDEX_ID + d->page.frame,
@@ -233,12 +218,12 @@ dberr_t dict_boot()
dict_sys.create();
dberr_t err;
- const buf_block_t *d = buf_page_get_gen(hdr_page_id, 0, RW_X_LATCH,
+ const buf_block_t *d = buf_page_get_gen(hdr_page_id, 0, RW_S_LATCH,
nullptr, BUF_GET, &mtr, &err);
- if (!d) {
+ if (!d) {
mtr.commit();
return err;
- }
+ }
heap = mem_heap_create(450);
@@ -246,17 +231,6 @@ dberr_t dict_boot()
const byte* dict_hdr = &d->page.frame[DICT_HDR];
- /* Because we only write new row ids to disk-based data structure
- (dictionary header) when it is divisible by
- DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
- the latest value of the row id counter. Therefore we advance
- the counter at the database startup to avoid overlapping values.
- Note that when a user after database startup first time asks for
- a new row id, then because the counter is now divisible by
- ..._MARGIN, it will immediately be updated to the disk-based
- header. */
-
- dict_sys.recover_row_id(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID));
if (uint32_t max_space_id
= mach_read_from_4(dict_hdr + DICT_HDR_MAX_SPACE_ID)) {
max_space_id--;
@@ -420,10 +394,7 @@ dberr_t dict_boot()
mtr.commit();
- err = ibuf_init_at_db_start();
-
- if (err == DB_SUCCESS || srv_force_recovery >= SRV_FORCE_NO_DDL_UNDO) {
- err = DB_SUCCESS;
+ if (err == DB_SUCCESS) {
/* Load definitions of other indexes on system tables */
dict_load_sys_table(dict_sys.sys_tables);
diff --git a/storage/innobase/dict/dict0defrag_bg.cc b/storage/innobase/dict/dict0defrag_bg.cc
index bec6da8e6af..b688f3970fc 100644
--- a/storage/innobase/dict/dict0defrag_bg.cc
+++ b/storage/innobase/dict/dict0defrag_bg.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -214,9 +214,6 @@ Save defragmentation result.
@return DB_SUCCESS or error code */
dberr_t dict_stats_save_defrag_summary(dict_index_t *index, THD *thd)
{
- if (index->is_ibuf())
- return DB_SUCCESS;
-
MDL_ticket *mdl_table= nullptr, *mdl_index= nullptr;
dict_table_t *table_stats= dict_table_open_on_name(TABLE_STATS_NAME, false,
DICT_ERR_IGNORE_NONE);
@@ -336,8 +333,6 @@ dict_stats_save_defrag_stats(
/*============================*/
dict_index_t* index) /*!< in: index */
{
- if (index->is_ibuf())
- return DB_SUCCESS;
if (!index->is_readable())
return dict_stats_report_error(index->table, true);
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index d2fa8555e43..6df8ee0699a 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1180,6 +1180,7 @@ inline void dict_sys_t::add(dict_table_t* table)
ulint fold = my_crc32c(0, table->name.m_name,
strlen(table->name.m_name));
+ table->row_id = 0;
table->autoinc_mutex.init();
table->lock_mutex_init();
@@ -1999,7 +2000,6 @@ dict_index_add_to_cache(
ut_ad(index->n_def == index->n_fields);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(!dict_index_is_online_ddl(index));
- ut_ad(!dict_index_is_ibuf(index));
ut_d(mem_heap_validate(index->heap));
ut_a(!dict_index_is_clust(index)
@@ -2381,15 +2381,7 @@ dict_index_copy_types(
ulint n_fields) /*!< in: number of
field types to copy */
{
- ulint i;
-
- if (dict_index_is_ibuf(index)) {
- dtuple_set_types_binary(tuple, n_fields);
-
- return;
- }
-
- for (i = 0; i < n_fields; i++) {
+ for (ulint i = 0; i < n_fields; i++) {
const dict_field_t* ifield;
dtype_t* dfield_type;
@@ -2628,17 +2620,14 @@ dict_index_build_internal_non_clust(
ulint i;
ibool* indexed;
- ut_ad(table && index);
- ut_ad(!dict_index_is_clust(index));
- ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(!index->is_primary());
ut_ad(dict_sys.locked());
/* The clustered index should be the first in the list of indexes */
clust_index = UT_LIST_GET_FIRST(table->indexes);
ut_ad(clust_index);
- ut_ad(dict_index_is_clust(clust_index));
- ut_ad(!dict_index_is_ibuf(clust_index));
+ ut_ad(clust_index->is_clust());
/* Create a new index */
new_index = dict_mem_index_create(
@@ -3769,24 +3758,7 @@ dict_index_build_node_ptr(
dtuple_t* tuple;
dfield_t* field;
byte* buf;
- ulint n_unique;
-
- if (dict_index_is_ibuf(index)) {
- /* In a universal index tree, we take the whole record as
- the node pointer if the record is on the leaf level,
- on non-leaf levels we remove the last field, which
- contains the page number of the child page */
-
- ut_a(!dict_table_is_comp(index->table));
- n_unique = rec_get_n_fields_old(rec);
-
- if (level > 0) {
- ut_a(n_unique > 1);
- n_unique--;
- }
- } else {
- n_unique = dict_index_get_n_unique_in_tree_nonleaf(index);
- }
+ ulint n_unique = dict_index_get_n_unique_in_tree_nonleaf(index);
tuple = dtuple_create(heap, n_unique + 1);
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index bd3bd71544a..9d5568f965b 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -865,9 +865,7 @@ err_exit:
return READ_OK;
}
-/** Check each tablespace found in the data dictionary.
-Then look at each table defined in SYS_TABLES that has a space_id > 0
-to find all the file-per-table tablespaces.
+/** Open each tablespace found in the data dictionary.
In a crash recovery we already have some tablespace objects created from
processing the REDO log. We will compare the
@@ -876,14 +874,12 @@ tablespace file. In addition, more validation will be done if recovery
was needed and force_recovery is not set.
We also scan the biggest space id, and store it to fil_system. */
-void dict_check_tablespaces_and_store_max_id()
+void dict_load_tablespaces()
{
uint32_t max_space_id = 0;
btr_pcur_t pcur;
mtr_t mtr;
- DBUG_ENTER("dict_check_tablespaces_and_store_max_id");
-
mtr.start();
dict_sys.lock(SRW_LOCK_CALL);
@@ -976,8 +972,6 @@ void dict_check_tablespaces_and_store_max_id()
fil_set_max_space_id_if_bigger(max_space_id);
dict_sys.unlock();
-
- DBUG_VOID_RETURN;
}
/** Error message for a delete-marked record in dict_load_column_low() */
@@ -1125,7 +1119,7 @@ err_len:
prtype = dtype_form_prtype(
prtype,
- data_mysql_default_charset_coll);
+ default_charset_info->number);
}
}
@@ -2475,9 +2469,7 @@ corrupted:
goto corrupted;
}
- if (table->supports_instant()) {
- err = btr_cur_instant_init(table);
- }
+ err = btr_cur_instant_init(table);
}
} else {
ut_ad(ignore_err & DICT_ERR_IGNORE_INDEX);
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index 39f5943d5a4..04b1ec88ac3 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -584,8 +584,6 @@ dict_stats_table_clone_create(
continue;
}
- ut_ad(!dict_index_is_ibuf(index));
-
ulint n_uniq = dict_index_get_n_unique(index);
heap_size += sizeof(dict_index_t);
@@ -634,8 +632,6 @@ dict_stats_table_clone_create(
continue;
}
- ut_ad(!dict_index_is_ibuf(index));
-
dict_index_t* idx;
idx = (dict_index_t*) mem_heap_zalloc(heap, sizeof(*idx));
@@ -714,7 +710,6 @@ dict_stats_empty_index(
/*!< in: whether to empty defrag stats */
{
ut_ad(!(index->type & DICT_FTS));
- ut_ad(!dict_index_is_ibuf(index));
ut_ad(index->table->stats_mutex_is_owner());
ulint n_uniq = index->n_uniq;
@@ -767,8 +762,6 @@ dict_stats_empty_table(
continue;
}
- ut_ad(!dict_index_is_ibuf(index));
-
dict_stats_empty_index(index, empty_defrag_stats);
}
@@ -901,8 +894,6 @@ dict_stats_copy(
}
}
- ut_ad(!dict_index_is_ibuf(dst_idx));
-
if (!INDEX_EQ(src_idx, dst_idx)) {
for (src_idx = dict_table_get_first_index(src);
src_idx != NULL;
@@ -1094,11 +1085,10 @@ btr_cur_t::open_random_leaf(rec_offs *&offsets, mem_heap_t *&heap, mtr_t &mtr)
dberr_t err;
auto offset= index()->page;
- bool merge= false;
ulint height= ULINT_UNDEFINED;
while (buf_block_t *block=
- btr_block_get(*index(), offset, RW_S_LATCH, merge, &mtr, &err))
+ btr_block_get(*index(), offset, RW_S_LATCH, &mtr, &err))
{
page_cur.block= block;
@@ -1120,8 +1110,7 @@ btr_cur_t::open_random_leaf(rec_offs *&offsets, mem_heap_t *&heap, mtr_t &mtr)
return DB_SUCCESS;
}
- if (!--height)
- merge= !index()->is_clust();
+ height--;
page_cur_open_on_rnd_user_rec(&page_cur);
@@ -1462,10 +1451,6 @@ dummy_empty:
dict_stats_empty_index(index, false);
index->table->stats_mutex_unlock();
return err;
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- } else if (ibuf_debug && !dict_index_is_clust(index)) {
- goto dummy_empty;
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
} else if (dict_index_is_online_ddl(index) || !index->is_committed()
|| !index->table->space) {
goto dummy_empty;
@@ -1571,9 +1556,6 @@ empty_table:
}
for (; index != NULL; index = dict_table_get_next_index(index)) {
-
- ut_ad(!dict_index_is_ibuf(index));
-
if (!index->is_btree()) {
continue;
}
@@ -1638,9 +1620,7 @@ static dberr_t page_cur_open_level(page_cur_t *page_cur, ulint level,
for (ulint height = ULINT_UNDEFINED;; height--)
{
- buf_block_t* block=
- btr_block_get(*index, page, RW_S_LATCH,
- !height && !index->is_clust(), mtr, &err);
+ buf_block_t* block= btr_block_get(*index, page, RW_S_LATCH, mtr, &err);
if (!block)
break;
@@ -2258,9 +2238,7 @@ dict_stats_analyze_index_below_cur(
block = buf_page_get_gen(page_id, zip_size,
RW_S_LATCH, NULL, BUF_GET,
- &mtr, &err,
- !index->is_clust()
- && 1 == btr_page_get_level(page));
+ &mtr, &err);
if (!block) {
goto func_exit;
}
@@ -2999,7 +2977,6 @@ dict_stats_update_persistent(
return(DB_CORRUPTION);
}
- ut_ad(!dict_index_is_ibuf(index));
table->stats_mutex_lock();
dict_stats_empty_index(index, false);
table->stats_mutex_unlock();
@@ -3380,8 +3357,6 @@ unlocked_free_and_exit:
continue;
}
- ut_ad(!dict_index_is_ibuf(index));
-
for (unsigned i = 0; i < index->n_uniq; i++) {
char stat_name[16];
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index a3d9df4af1b..c1fd916be55 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2021, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -45,7 +45,6 @@ Created 10/25/1995 Heikki Tuuri
#include "srv0start.h"
#include "trx0purge.h"
#include "buf0lru.h"
-#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "log.h"
#ifdef __linux__
@@ -496,6 +495,9 @@ void fil_space_t::flush_low()
break;
}
+ if (fil_system.is_write_through())
+ goto skip_flush;
+
fil_n_pending_tablespace_flushes++;
for (fil_node_t *node= UT_LIST_GET_FIRST(chain); node;
node= UT_LIST_GET_NEXT(chain, node))
@@ -520,8 +522,9 @@ void fil_space_t::flush_low()
mysql_mutex_unlock(&fil_system.mutex);
}
- clear_flush();
fil_n_pending_tablespace_flushes--;
+skip_flush:
+ clear_flush();
}
/** Try to extend a tablespace.
@@ -750,7 +753,6 @@ inline pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
{
if (space->is_in_unflushed_spaces)
{
- ut_ad(srv_file_flush_method != SRV_O_DIRECT_NO_FSYNC);
space->is_in_unflushed_spaces= false;
fil_system.unflushed_spaces.remove(*space);
}
@@ -783,7 +785,6 @@ pfs_os_file_t fil_system_t::detach(fil_space_t *space, bool detach_handle)
if (space->is_in_unflushed_spaces)
{
- ut_ad(srv_file_flush_method != SRV_O_DIRECT_NO_FSYNC);
space->is_in_unflushed_spaces= false;
unflushed_spaces.remove(*space);
}
@@ -1341,6 +1342,120 @@ ATTRIBUTE_COLD void fil_system_t::extend_to_recv_size()
mysql_mutex_unlock(&mutex);
}
+ATTRIBUTE_COLD void fil_space_t::reopen_all()
+{
+ mysql_mutex_assert_owner(&fil_system.mutex);
+ fil_system.freeze_space_list++;
+
+ for (fil_space_t &space : fil_system.space_list)
+ {
+ for (fil_node_t *node= UT_LIST_GET_FIRST(space.chain); node;
+ node= UT_LIST_GET_NEXT(chain, node))
+ if (node->is_open())
+ goto need_to_close;
+ continue;
+
+ need_to_close:
+ uint32_t p= space.n_pending.fetch_or(CLOSING, std::memory_order_acquire);
+ if (p & (STOPPING | CLOSING))
+ continue;
+
+ for (fil_node_t *node= UT_LIST_GET_FIRST(space.chain); node;
+ node= UT_LIST_GET_NEXT(chain, node))
+ {
+ if (!node->is_open())
+ continue;
+
+ ulint type= OS_DATA_FILE;
+
+ switch (FSP_FLAGS_GET_ZIP_SSIZE(space.flags)) {
+ case 1: case 2:
+ type= OS_DATA_FILE_NO_O_DIRECT;
+ }
+
+ for (ulint count= 10000; count--;)
+ {
+ p= space.pending();
+
+ if (!(p & CLOSING) || (p & STOPPING))
+ break;
+
+ if (!(p & PENDING) && !node->being_extended)
+ {
+ space.reacquire();
+ mysql_mutex_unlock(&fil_system.mutex);
+ /* Unconditionally flush the file, because
+ fil_system.write_through was updated prematurely,
+ potentially causing some flushes to be lost. */
+ os_file_flush(node->handle);
+ mysql_mutex_lock(&fil_system.mutex);
+ p= space.n_pending.fetch_sub(1, std::memory_order_relaxed) - 1;
+
+ if (!(p & CLOSING) || (p & STOPPING))
+ break;
+
+ if (!(p & PENDING) && !node->being_extended)
+ {
+ ut_a(os_file_close(node->handle));
+ bool success;
+ node->handle= os_file_create(innodb_data_file_key, node->name,
+ node->is_raw_disk
+ ? OS_FILE_OPEN_RAW : OS_FILE_OPEN,
+ OS_FILE_AIO, type,
+ srv_read_only_mode, &success);
+ ut_a(success);
+ goto next_file;
+ }
+ }
+
+ space.reacquire();
+ mysql_mutex_unlock(&fil_system.mutex);
+ std::this_thread::sleep_for(std::chrono::microseconds(100));
+ mysql_mutex_lock(&fil_system.mutex);
+ space.release();
+
+ if (!node->is_open())
+ goto next_file;
+ }
+
+ if (!(p & CLOSING) || (p & STOPPING))
+ next_file:
+ continue;
+
+ sql_print_error("InnoDB: Failed to reopen file '%s' due to " UINT32PF
+ " operations", node->name, p & PENDING);
+ }
+ }
+
+ fil_system.freeze_space_list--;
+}
+
+void fil_system_t::set_write_through(bool write_through)
+{
+ mysql_mutex_lock(&mutex);
+
+ if (write_through != is_write_through())
+ {
+ this->write_through= write_through;
+ fil_space_t::reopen_all();
+ }
+
+ mysql_mutex_unlock(&mutex);
+}
+
+void fil_system_t::set_buffered(bool buffered)
+{
+ mysql_mutex_lock(&mutex);
+
+ if (buffered != is_buffered())
+ {
+ this->buffered= buffered;
+ fil_space_t::reopen_all();
+ }
+
+ mysql_mutex_unlock(&mutex);
+}
+
/** Close all tablespace files at shutdown */
void fil_space_t::close_all()
{
@@ -1361,12 +1476,9 @@ void fil_space_t::close_all()
for (fil_node_t *node= UT_LIST_GET_FIRST(space.chain); node != NULL;
node= UT_LIST_GET_NEXT(chain, node))
{
-
if (!node->is_open())
- {
next:
continue;
- }
for (ulint count= 10000; count--;)
{
@@ -1382,8 +1494,8 @@ void fil_space_t::close_all()
goto next;
}
- ib::error() << "File '" << node->name << "' has " << space.referenced()
- << " operations";
+ sql_print_error("InnoDB: File '%s' has " UINT32PF " operations",
+ node->name, space.referenced());
}
fil_system.detach(&space);
@@ -1626,7 +1738,6 @@ pfs_os_file_t fil_delete_tablespace(uint32_t id)
fil_space_free_low(space);
}
- ibuf_delete_for_discarded_space(id);
return handle;
}
@@ -2619,7 +2730,7 @@ inline void fil_node_t::complete_write()
mysql_mutex_assert_not_owner(&fil_system.mutex);
if (space->purpose != FIL_TYPE_TEMPORARY &&
- srv_file_flush_method != SRV_O_DIRECT_NO_FSYNC &&
+ (!fil_system.is_write_through() && !my_disable_sync) &&
space->set_needs_flush())
{
mysql_mutex_lock(&fil_system.mutex);
@@ -2767,10 +2878,6 @@ write_completed:
{
ut_ad(request.is_read());
- /* IMPORTANT: since i/o handling for reads will read also the insert
- buffer in fil_system.sys_space, we have to be very careful not to
- introduce deadlocks. We never close fil_system.sys_space data
- files and never issue asynchronous reads of change buffer pages. */
const page_id_t id(request.bpage->id());
if (dberr_t err= request.bpage->read_complete(*request.node))
@@ -2795,14 +2902,6 @@ write_completed:
possibly cached by the OS. */
void fil_flush_file_spaces()
{
- if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)
- {
- ut_d(mysql_mutex_lock(&fil_system.mutex));
- ut_ad(fil_system.unflushed_spaces.empty());
- ut_d(mysql_mutex_unlock(&fil_system.mutex));
- return;
- }
-
rescan:
mysql_mutex_lock(&fil_system.mutex);
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index b6971558201..bdc08b22f3a 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (C) 2013, 2021, MariaDB Corporation.
+Copyright (C) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -47,7 +47,6 @@ Updated 14/02/2015
#include "trx0sys.h"
#include "row0mysql.h"
#include "buf0lru.h"
-#include "ibuf0ibuf.h"
#include "zlib.h"
#ifdef __linux__
#include <linux/fs.h>
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index c6044b201fe..f7625974886 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -33,7 +33,6 @@ Created 11/29/1995 Heikki Tuuri
#include "page0page.h"
#include "srv0srv.h"
#include "srv0start.h"
-#include "ibuf0ibuf.h"
#include "btr0btr.h"
#include "btr0sea.h"
#include "dict0boot.h"
@@ -507,7 +506,7 @@ dberr_t fsp_header_init(fil_space_t *space, uint32_t size, mtr_t *mtr)
const page_id_t page_id(space->id, 0);
const ulint zip_size = space->zip_size();
- buf_block_t *free_block = buf_LRU_get_free_block(false);
+ buf_block_t *free_block = buf_LRU_get_free_block(have_no_mutex);
mtr->x_lock_space(space);
@@ -841,9 +840,9 @@ fsp_fill_free_list(
if (i)
{
- buf_block_t *f= buf_LRU_get_free_block(false);
+ buf_block_t *f= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(i),
- zip_size, mtr, f);
+ zip_size, mtr, f);
if (UNIV_UNLIKELY(block != f))
buf_pool.free_block(f);
fsp_init_file_page(space, block, mtr);
@@ -853,13 +852,19 @@ fsp_fill_free_list(
if (space->purpose != FIL_TYPE_TEMPORARY)
{
- buf_block_t *f= buf_LRU_get_free_block(false);
+ buf_block_t *f= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block=
- buf_page_create(space,
- static_cast<uint32_t>(i + FSP_IBUF_BITMAP_OFFSET),
+ buf_page_create(space, static_cast<uint32_t>(i + 1),
zip_size, mtr, f);
if (UNIV_UNLIKELY(block != f))
buf_pool.free_block(f);
+ /* The zero-initialization will reset the change buffer bitmap bits
+ to safe values for possible import to an earlier version that
+ supports change buffering:
+
+ IBUF_BITMAP_FREE = 0 (no space left for buffering inserts)
+ IBUF_BITMAP_BUFFERED = 0 (no changes have been buffered)
+ IBUF_BITMAP_IBUF = 0 (not part of the change buffer) */
fsp_init_file_page(space, block, mtr);
mtr->write<2>(*block, FIL_PAGE_TYPE + block->page.frame,
FIL_PAGE_IBUF_BITMAP);
@@ -884,9 +889,9 @@ fsp_fill_free_list(
if (UNIV_UNLIKELY(init_xdes))
{
/* The first page in the extent is a descriptor page and the
- second is an ibuf bitmap page: mark them used */
+ second was reserved for change buffer bitmap: mark them used */
xdes_set_free<false>(*xdes, descr, 0, mtr);
- xdes_set_free<false>(*xdes, descr, FSP_IBUF_BITMAP_OFFSET, mtr);
+ xdes_set_free<false>(*xdes, descr, 1, mtr);
xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr);
if (dberr_t err= flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG,
xdes, xoffset, mtr))
@@ -1055,7 +1060,7 @@ fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr)
}
}
- free_block= buf_LRU_get_free_block(false);
+ free_block= buf_LRU_get_free_block(have_no_mutex);
got_free_block:
block= buf_page_create(space, static_cast<uint32_t>(offset),
space->zip_size(), mtr, free_block);
diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc
index 83afd732b21..60218a132c9 100644
--- a/storage/innobase/gis/gis0rtree.cc
+++ b/storage/innobase/gis/gis0rtree.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2022, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -34,7 +34,6 @@ Created 2013/03/27 Allen Lai and Jimmy Yang
#include "btr0pcur.h"
#include "rem0cmp.h"
#include "lock0lock.h"
-#include "ibuf0ibuf.h"
#include "trx0undo.h"
#include "srv0mon.h"
#include "gis0geo.h"
@@ -538,7 +537,7 @@ err_exit:
mem_heap_free(heap);
}
-MY_ATTRIBUTE((nonnull, warn_unused_result))
+MY_ATTRIBUTE((nonnull(1,3,4,5,6,8), warn_unused_result))
/**************************************************************//**
Update parent page's MBR and Predicate lock information during a split */
static
@@ -552,6 +551,7 @@ rtr_adjust_upper_level(
buf_block_t* new_block, /*!< in/out: the new half page */
rtr_mbr_t* mbr, /*!< in: MBR on the old page */
rtr_mbr_t* new_mbr, /*!< in: MBR on the new page */
+ que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in: mtr */
{
ulint page_no;
@@ -570,7 +570,6 @@ rtr_adjust_upper_level(
/* Create a memory heap where the data tuple is stored */
heap = mem_heap_create(1024);
- cursor.thr = sea_cur->thr;
cursor.page_cur.index = sea_cur->index();
cursor.page_cur.block = block;
@@ -584,7 +583,8 @@ rtr_adjust_upper_level(
/* Set new mbr for the old page on the upper level. */
/* Look up the index for the node pointer to page */
- offsets = rtr_page_get_father_block(NULL, heap, mtr, sea_cur, &cursor);
+ offsets = rtr_page_get_father_block(nullptr, heap, sea_cur, &cursor,
+ thr, mtr);
page_cursor = btr_cur_get_page_cur(&cursor);
@@ -669,7 +669,7 @@ rtr_adjust_upper_level(
if (next_page_no == FIL_NULL) {
} else if (buf_block_t* next_block =
btr_block_get(*sea_cur->index(), next_page_no, RW_X_LATCH,
- false, mtr, &err)) {
+ mtr, &err)) {
if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame
+ FIL_PAGE_PREV,
block->page.frame
@@ -691,11 +691,6 @@ rtr_adjust_upper_level(
/*************************************************************//**
Moves record list to another page for rtree splitting.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return error code
@retval DB_FAIL on ROW_FORMAT=COMPRESSED compression failure */
static
@@ -731,8 +726,7 @@ rtr_split_page_move_rec_list(
ulint max_to_move = 0;
rtr_rec_move_t* rec_move = NULL;
- ut_ad(!dict_index_is_ibuf(index));
- ut_ad(dict_index_is_spatial(index));
+ ut_ad(index->is_spatial());
rec_offs_init(offsets_);
@@ -867,7 +861,8 @@ rtr_page_split_and_insert(
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr, /*!< in: mtr */
- dberr_t* err) /*!< out: error code */
+ dberr_t* err, /*!< out: error code */
+ que_thr_t* thr) /*!< in: query thread */
{
buf_block_t* block;
page_t* page;
@@ -895,6 +890,8 @@ rtr_page_split_and_insert(
int first_rec_group = 1;
IF_DBUG(bool iterated = false,);
+ buf_pool.pages_split++;
+
if (!*heap) {
*heap = mem_heap_create(1024);
}
@@ -1159,7 +1156,7 @@ after_insert:
/* Adjust the upper level. */
*err = rtr_adjust_upper_level(cursor, flags, block, new_block,
- &mbr, &new_mbr, mtr);
+ &mbr, &new_mbr, thr, mtr);
if (UNIV_UNLIKELY(*err != DB_SUCCESS)) {
return nullptr;
}
@@ -1179,13 +1176,6 @@ after_insert:
/* If the new res insert fail, we need to do another split
again. */
if (!rec) {
- /* We play safe and reset the free bits for new_page */
- if (!dict_index_is_clust(cursor->index())
- && !cursor->index()->table->is_temporary()) {
- ibuf_reset_free_bits(new_block);
- ibuf_reset_free_bits(block);
- }
-
/* We need to clean the parent path here and search father
node later, otherwise, it's possible that find a wrong
parent. */
@@ -1212,6 +1202,244 @@ after_insert:
return(rec);
}
+/*************************************************************//**
+Makes tree one level higher by splitting the root, and inserts the tuple.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called.
+@return inserted record */
+rec_t*
+rtr_root_raise_and_insert(
+/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
+ on the root page; when the function returns,
+ the cursor is positioned on the predecessor
+ of the inserted record */
+ rec_offs** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr, /*!< in: mtr */
+ dberr_t* err, /*!< out: error code */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ dict_index_t* index;
+ rec_t* rec;
+ dtuple_t* node_ptr;
+ ulint level;
+ rec_t* node_ptr_rec;
+ page_cur_t* page_cursor;
+ page_zip_des_t* root_page_zip;
+ page_zip_des_t* new_page_zip;
+ buf_block_t* root;
+ buf_block_t* new_block;
+
+ root = btr_cur_get_block(cursor);
+ root_page_zip = buf_block_get_page_zip(root);
+ ut_ad(!page_is_empty(root->page.frame));
+ index = btr_cur_get_index(cursor);
+ ut_ad(index->is_spatial());
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!root_page_zip
+ || page_zip_validate(root_page_zip, root->page.frame, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ const page_id_t root_id{root->page.id()};
+
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr->memo_contains_flagged(root, MTR_MEMO_PAGE_X_FIX));
+
+ if (index->page != root_id.page_no()) {
+ ut_ad("corrupted root page number" == 0);
+ return nullptr;
+ }
+
+ if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
+ *root, *index->table->space)
+ || !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
+ *root, *index->table->space)) {
+ return nullptr;
+ }
+
+ /* Allocate a new page to the tree. Root splitting is done by first
+ moving the root records to the new page, emptying the root, putting
+ a node pointer to the new page, and then splitting the new page. */
+
+ level = btr_page_get_level(root->page.frame);
+
+ new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr, err);
+
+ if (!new_block) {
+ return nullptr;
+ }
+
+ new_page_zip = buf_block_get_page_zip(new_block);
+ ut_a(!new_page_zip == !root_page_zip);
+ ut_a(!new_page_zip
+ || page_zip_get_size(new_page_zip)
+ == page_zip_get_size(root_page_zip));
+
+ btr_page_create(new_block, new_page_zip, index, level, mtr);
+ if (page_has_siblings(new_block->page.frame)) {
+ compile_time_assert(FIL_PAGE_NEXT == FIL_PAGE_PREV + 4);
+ compile_time_assert(FIL_NULL == 0xffffffff);
+ static_assert(FIL_PAGE_PREV % 8 == 0, "alignment");
+ memset_aligned<8>(new_block->page.frame + FIL_PAGE_PREV,
+ 0xff, 8);
+ mtr->memset(new_block, FIL_PAGE_PREV, 8, 0xff);
+ if (UNIV_LIKELY_NULL(new_page_zip)) {
+ memset_aligned<8>(new_page_zip->data + FIL_PAGE_PREV,
+ 0xff, 8);
+ }
+ }
+
+ /* Copy the records from root to the new page one by one. */
+ dberr_t e;
+ if (!err) {
+ err = &e;
+ }
+
+ if (0
+#ifdef UNIV_ZIP_COPY
+ || new_page_zip
+#endif /* UNIV_ZIP_COPY */
+ || !page_copy_rec_list_end(new_block, root,
+ page_get_infimum_rec(root->page.frame),
+ index, mtr, err)) {
+ switch (*err) {
+ case DB_SUCCESS:
+ break;
+ case DB_FAIL:
+ *err = DB_SUCCESS;
+ break;
+ default:
+ return nullptr;
+ }
+
+ ut_a(new_page_zip);
+
+ /* Copy the page byte for byte. */
+ page_zip_copy_recs(new_block, root_page_zip,
+ root->page.frame, index, mtr);
+
+ /* Update the lock table and possible hash index. */
+ if (index->has_locking()) {
+ lock_move_rec_list_end(
+ new_block, root,
+ page_get_infimum_rec(root->page.frame));
+ }
+
+ /* Move any existing predicate locks */
+ lock_prdt_rec_move(new_block, root_id);
+ }
+
+ constexpr uint16_t max_trx_id = PAGE_HEADER + PAGE_MAX_TRX_ID;
+ if (!index->is_primary()) {
+ /* In secondary indexes,
+ PAGE_MAX_TRX_ID can be reset on the root page, because
+ the field only matters on leaf pages, and the root no
+ longer is a leaf page. (Older versions of InnoDB did
+ set PAGE_MAX_TRX_ID on all secondary index pages.) */
+ byte* p = my_assume_aligned<8>(
+ PAGE_HEADER + PAGE_MAX_TRX_ID + root->page.frame);
+ if (mach_read_from_8(p)) {
+ mtr->memset(root, max_trx_id, 8, 0);
+ if (UNIV_LIKELY_NULL(root->page.zip.data)) {
+ memset_aligned<8>(max_trx_id
+ + root->page.zip.data, 0, 8);
+ }
+ }
+ } else {
+ /* PAGE_ROOT_AUTO_INC is only present in the clustered index
+ root page; on other clustered index pages, we want to reserve
+ the field PAGE_MAX_TRX_ID for future use. */
+ byte* p = my_assume_aligned<8>(
+ PAGE_HEADER + PAGE_MAX_TRX_ID + new_block->page.frame);
+ if (mach_read_from_8(p)) {
+ mtr->memset(new_block, max_trx_id, 8, 0);
+ if (UNIV_LIKELY_NULL(new_block->page.zip.data)) {
+ memset_aligned<8>(max_trx_id
+ + new_block->page.zip.data,
+ 0, 8);
+ }
+ }
+ }
+
+ /* If this is a pessimistic insert which is actually done to
+ perform a pessimistic update then we have stored the lock
+ information of the record to be inserted on the infimum of the
+ root page: we cannot discard the lock structs on the root page */
+
+ if (index->has_locking()) {
+ lock_update_root_raise(*new_block, root_id);
+ }
+
+ /* Create a memory heap where the node pointer is stored */
+ if (!*heap) {
+ *heap = mem_heap_create(1000);
+ }
+
+ const uint32_t new_page_no = new_block->page.id().page_no();
+ rec = page_rec_get_next(page_get_infimum_rec(new_block->page.frame));
+ ut_ad(rec); /* We just created the page. */
+
+ /* Build the node pointer (= node key and page address) for the
+ child */
+ rtr_mbr_t new_mbr;
+ rtr_page_cal_mbr(index, new_block, &new_mbr, *heap);
+ node_ptr = rtr_index_build_node_ptr(index, &new_mbr, rec, new_page_no,
+ *heap);
+ /* The node pointer must be marked as the predefined minimum record,
+ as there is no lower alphabetical limit to records in the leftmost
+ node of a level: */
+ dtuple_set_info_bits(node_ptr,
+ dtuple_get_info_bits(node_ptr)
+ | REC_INFO_MIN_REC_FLAG);
+
+ /* Rebuild the root page to get free space */
+ btr_page_empty(root, root_page_zip, index, level + 1, mtr);
+ ut_ad(!page_has_siblings(root->page.frame));
+
+ page_cursor = btr_cur_get_page_cur(cursor);
+
+ /* Insert node pointer to the root */
+
+ page_cur_set_before_first(root, page_cursor);
+
+ node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
+ offsets, heap, 0, mtr);
+
+ /* The root page should only contain the node pointer
+ to new_block at this point. Thus, the data should fit. */
+ ut_a(node_ptr_rec);
+
+ page_cursor->block = new_block;
+ page_cursor->index = index;
+
+ if (tuple) {
+ ut_ad(dtuple_check_typed(tuple));
+ /* Reposition the cursor to the child node */
+ ulint low_match = 0, up_match = 0;
+
+ if (page_cur_search_with_match(tuple, PAGE_CUR_LE,
+ &up_match, &low_match,
+ page_cursor, nullptr)) {
+ if (err) {
+ *err = DB_CORRUPTION;
+ }
+ return nullptr;
+ }
+ } else {
+ page_cursor->rec = page_get_infimum_rec(new_block->page.frame);
+ }
+
+ /* Split the child and insert tuple */
+ return rtr_page_split_and_insert(flags, cursor, offsets, heap,
+ tuple, n_ext, mtr, err, thr);
+}
+
/****************************************************************//**
Following the right link to find the proper block for insert.
@return the proper block.*/
@@ -1240,6 +1468,7 @@ rtr_ins_enlarge_mbr(
/* Check path info is not empty. */
ut_ad(!btr_cur->rtr_info->parent_path->empty());
+ ut_ad(btr_cur->rtr_info->thr || !btr_cur->index()->is_committed());
/* Create a memory heap. */
heap = mem_heap_create(1024);
@@ -1265,7 +1494,8 @@ rtr_ins_enlarge_mbr(
cursor.page_cur.index = page_cursor->index;
cursor.page_cur.block = block;
offsets = rtr_page_get_father_block(
- NULL, heap, mtr, btr_cur, &cursor);
+ nullptr, heap, btr_cur, &cursor,
+ btr_cur->rtr_info->thr, mtr);
page = buf_block_get_frame(block);
diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc
index 8ca8681bce9..43fcf5c82c8 100644
--- a/storage/innobase/gis/gis0sea.cc
+++ b/storage/innobase/gis/gis0sea.cc
@@ -34,7 +34,6 @@ Created 2014/01/16 Jimmy Yang
#include "btr0pcur.h"
#include "rem0cmp.h"
#include "lock0lock.h"
-#include "ibuf0ibuf.h"
#include "trx0trx.h"
#include "srv0mon.h"
#include "que0que.h"
@@ -114,8 +113,8 @@ rtr_latch_leaves(
left_page_no = btr_page_get_prev(block->page.frame);
if (left_page_no != FIL_NULL) {
- btr_block_get(*cursor->index(), left_page_no, RW_X_LATCH,
- true, mtr);
+ btr_block_get(*cursor->index(), left_page_no,
+ RW_X_LATCH, mtr);
}
mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH);
@@ -124,7 +123,7 @@ rtr_latch_leaves(
if (right_page_no != FIL_NULL) {
btr_block_get(*cursor->index(), right_page_no,
- RW_X_LATCH, true, mtr);
+ RW_X_LATCH, mtr);
}
break;
case BTR_SEARCH_LEAF:
@@ -541,10 +540,10 @@ static void rtr_compare_cursor_rec(const rec_t *rec, dict_index_t *index,
#endif
TRANSACTIONAL_TARGET
-dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
- page_cur_mode_t mode,
- btr_latch_mode latch_mode,
- btr_cur_t *cur, mtr_t *mtr)
+dberr_t rtr_search_to_nth_level(btr_cur_t *cur, que_thr_t *thr,
+ const dtuple_t *tuple,
+ btr_latch_mode latch_mode, mtr_t *mtr,
+ page_cur_mode_t mode, ulint level)
{
page_cur_mode_t page_mode;
page_cur_mode_t search_mode= PAGE_CUR_UNSUPP;
@@ -667,7 +666,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
dberr_t err;
auto block_savepoint= mtr->get_savepoint();
buf_block_t *block= buf_page_get_gen(page_id, zip_size, rw_latch, guess,
- buf_mode, mtr, &err, false);
+ buf_mode, mtr, &err);
if (!block)
{
if (err == DB_DECRYPTION_FAILED)
@@ -725,7 +724,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
index->set_ssn(page_get_ssn_id(page) + 1);
/* Save the MBR */
- cur->rtr_info->thr= cur->thr;
+ cur->rtr_info->thr= thr;
rtr_get_mbr_from_tuple(tuple, &cur->rtr_info->mbr);
#ifdef BTR_CUR_ADAPT
@@ -833,7 +832,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
lock_prdt_t prdt;
{
- trx_t* trx= thr_get_trx(cur->thr);
+ trx_t* trx= thr_get_trx(thr);
TMLockTrxGuard g{TMLockTrxArgs(*trx)};
lock_init_prdt_from_mbr(&prdt, &cur->rtr_info->mbr, mode,
trx->lock.lock_heap);
@@ -842,7 +841,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
if (rw_latch == RW_NO_LATCH && height != 0)
block->page.lock.s_lock();
- lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, cur->thr);
+ lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, thr);
if (rw_latch == RW_NO_LATCH && height != 0)
block->page.lock.s_unlock();
@@ -950,7 +949,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
if (upper_rw_latch == RW_NO_LATCH)
{
ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
- btr_block_get(*index, page_id.page_no(), RW_X_LATCH, false, mtr, &err);
+ btr_block_get(*index, page_id.page_no(), RW_X_LATCH, mtr, &err);
}
else
{
@@ -979,19 +978,21 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
goto func_exit;
}
-dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple,
+dberr_t rtr_search_leaf(btr_cur_t *cur, que_thr_t *thr, const dtuple_t *tuple,
btr_latch_mode latch_mode,
mtr_t *mtr, page_cur_mode_t mode)
{
- return rtr_search_to_nth_level(0, tuple, mode, latch_mode, cur, mtr);
+ return rtr_search_to_nth_level(cur, thr, tuple, latch_mode, mtr, mode, 0);
}
/** Search for a spatial index leaf page record.
-@param pcur cursor
+@param pcur cursor
+@param thr query thread
@param tuple search tuple
@param mode search mode
@param mtr mini-transaction */
-dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple,
+dberr_t rtr_search_leaf(btr_pcur_t *pcur, que_thr_t *thr,
+ const dtuple_t *tuple,
page_cur_mode_t mode, mtr_t *mtr)
{
#ifdef UNIV_DEBUG
@@ -1010,7 +1011,8 @@ dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple,
pcur->search_mode= mode;
pcur->pos_state= BTR_PCUR_IS_POSITIONED;
pcur->trx_if_known= nullptr;
- return rtr_search_leaf(&pcur->btr_cur, tuple, BTR_SEARCH_LEAF, mtr, mode);
+ return rtr_search_leaf(&pcur->btr_cur, thr, tuple, BTR_SEARCH_LEAF, mtr,
+ mode);
}
/**************************************************************//**
@@ -1020,6 +1022,7 @@ bool rtr_search(
const dtuple_t* tuple, /*!< in: tuple on which search done */
btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ que_thr_t* thr, /*!< in/out; query thread */
mtr_t* mtr) /*!< in: mtr */
{
static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), "");
@@ -1048,15 +1051,16 @@ bool rtr_search(
btr_cur_t* btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cursor->rtr_info
- = rtr_create_rtr_info(false, false,
- btr_cursor, cursor->index());
+ = rtr_create_rtr_info(false, false, thr, btr_cursor);
- if (btr_cursor->thr) {
+ if (!thr) {
+ /* Purge will U lock the tree instead of take Page Locks */
+ } else {
btr_cursor->rtr_info->need_page_lock = true;
- btr_cursor->rtr_info->thr = btr_cursor->thr;
+ btr_cursor->rtr_info->thr = thr;
}
- if (rtr_search_leaf(btr_cursor, tuple, latch_mode, mtr)
+ if (rtr_search_leaf(btr_cursor, thr, tuple, latch_mode, mtr)
!= DB_SUCCESS) {
return true;
}
@@ -1103,12 +1107,14 @@ bool rtr_search(
about parent nodes in search
@param[out] cursor cursor on node pointer record,
its page x-latched
+@param[in,out] thr query thread
@return whether the cursor was successfully positioned */
-bool rtr_page_get_father(mtr_t *mtr, btr_cur_t *sea_cur, btr_cur_t *cursor)
+bool rtr_page_get_father(mtr_t *mtr, btr_cur_t *sea_cur, btr_cur_t *cursor,
+ que_thr_t *thr)
{
mem_heap_t *heap = mem_heap_create(100);
rec_offs *offsets= rtr_page_get_father_block(nullptr, heap,
- mtr, sea_cur, cursor);
+ sea_cur, cursor, thr, mtr);
mem_heap_free(heap);
return offsets != nullptr;
}
@@ -1125,12 +1131,13 @@ static const rec_t* rtr_get_father_node(
btr_cur_t* sea_cur,/*!< in: search cursor */
btr_cur_t* btr_cur,/*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
+ que_thr_t* thr, /*!< in/out: query thread */
ulint page_no,/*!< Current page no */
mtr_t* mtr) /*!< in: mtr */
{
const rec_t* rec = nullptr;
auto had_rtr = btr_cur->rtr_info;
- dict_index_t* const index = btr_cur->index();
+ ut_d(dict_index_t* const index = btr_cur->index());
/* Try to optimally locate the parent node. Level should always
less than sea_cur->tree_height unless the root is splitting */
@@ -1161,10 +1168,10 @@ static const rec_t* rtr_get_father_node(
rtr_clean_rtr_info(btr_cur->rtr_info, true);
}
- btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index);
+ btr_cur->rtr_info = rtr_create_rtr_info(false, false, thr, btr_cur);
- if (rtr_search_to_nth_level(level, tuple, PAGE_CUR_RTREE_LOCATE,
- BTR_CONT_MODIFY_TREE, btr_cur, mtr)
+ if (rtr_search_to_nth_level(btr_cur, thr, tuple, BTR_CONT_MODIFY_TREE,
+ mtr, PAGE_CUR_RTREE_LOCATE, level)
!= DB_SUCCESS) {
} else if (sea_cur && sea_cur->tree_height == level) {
rec = btr_cur_get_rec(btr_cur);
@@ -1212,6 +1219,7 @@ rtr_page_get_father_node_ptr(
btr_cur_t* cursor, /*!< in: cursor pointing to user record,
out: cursor on node pointer record,
its page x-latched */
+ que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in: mtr */
{
dtuple_t* tuple;
@@ -1247,7 +1255,7 @@ rtr_page_get_father_node_ptr(
const rec_t* node_ptr = rtr_get_father_node(level + 1, tuple,
sea_cur, cursor,
- page_no, mtr);
+ thr, page_no, mtr);
if (!node_ptr) {
return nullptr;
}
@@ -1273,18 +1281,20 @@ rtr_page_get_father_block(
/*======================*/
rec_offs* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
- mtr_t* mtr, /*!< in: mtr */
btr_cur_t* sea_cur,/*!< in: search cursor, contains information
about parent nodes in search */
- btr_cur_t* cursor) /*!< out: cursor on node pointer record,
+ btr_cur_t* cursor, /*!< out: cursor on node pointer record,
its page x-latched */
+ que_thr_t* thr, /*!< in/out: query thread */
+ mtr_t* mtr) /*!< in/out: mtr */
{
rec_t *rec=
page_rec_get_next(page_get_infimum_rec(cursor->block()->page.frame));
if (!rec)
return nullptr;
cursor->page_cur.rec= rec;
- return rtr_page_get_father_node_ptr(offsets, heap, sea_cur, cursor, mtr);
+ return rtr_page_get_father_node_ptr(offsets, heap, sea_cur, cursor,
+ thr, mtr);
}
/*******************************************************************//**
@@ -1297,12 +1307,12 @@ rtr_create_rtr_info(
bool init_matches, /*!< in: Whether to initiate the
"matches" structure for collecting
matched leaf records */
- btr_cur_t* cursor, /*!< in: tree search cursor */
- dict_index_t* index) /*!< in: index struct */
+ que_thr_t* thr, /*!< in/out: query thread */
+ btr_cur_t* cursor) /*!< in: tree search cursor */
{
rtr_info_t* rtr_info;
- index = index ? index : cursor->index();
+ dict_index_t* index = cursor->index();
ut_ad(index);
rtr_info = static_cast<rtr_info_t*>(ut_zalloc_nokey(sizeof(*rtr_info)));
@@ -1310,6 +1320,7 @@ rtr_create_rtr_info(
rtr_info->allocated = true;
rtr_info->cursor = cursor;
rtr_info->index = index;
+ rtr_info->thr = thr;
if (init_matches) {
rtr_info->heap = mem_heap_create(sizeof(*(rtr_info->matches)));
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 0825d527dfc..d18f85f4ac5 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -87,7 +87,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "fts0plugin.h"
#include "fts0priv.h"
#include "fts0types.h"
-#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "log0crypt.h"
#include "mtr0mtr.h"
@@ -366,6 +365,8 @@ const char* innodb_flush_method_names[] = {
NullS
};
+static constexpr ulong innodb_flush_method_default = IF_WIN(6,4);
+
/** Enumeration of innodb_flush_method */
TYPELIB innodb_flush_method_typelib = {
array_elements(innodb_flush_method_names) - 1,
@@ -374,6 +375,9 @@ TYPELIB innodb_flush_method_typelib = {
NULL
};
+/** Deprecated parameter */
+static ulong innodb_flush_method;
+
/** Names of allowed values of innodb_deadlock_report */
static const char *innodb_deadlock_report_names[]= {
"off", /* Do not report any details of deadlocks */
@@ -394,25 +398,6 @@ static TYPELIB innodb_deadlock_report_typelib = {
NULL
};
-/** Allowed values of innodb_change_buffering */
-static const char* innodb_change_buffering_names[] = {
- "none", /* IBUF_USE_NONE */
- "inserts", /* IBUF_USE_INSERT */
- "deletes", /* IBUF_USE_DELETE_MARK */
- "changes", /* IBUF_USE_INSERT_DELETE_MARK */
- "purges", /* IBUF_USE_DELETE */
- "all", /* IBUF_USE_ALL */
- NullS
-};
-
-/** Enumeration of innodb_change_buffering */
-static TYPELIB innodb_change_buffering_typelib = {
- array_elements(innodb_change_buffering_names) - 1,
- "innodb_change_buffering_typelib",
- innodb_change_buffering_names,
- NULL
-};
-
/** Allowed values of innodb_instant_alter_column_allowed */
const char* innodb_instant_alter_column_allowed_names[] = {
"never", /* compatible with MariaDB 5.5 to 10.2 */
@@ -526,9 +511,6 @@ mysql_pfs_key_t fts_cache_mutex_key;
mysql_pfs_key_t fts_cache_init_mutex_key;
mysql_pfs_key_t fts_delete_mutex_key;
mysql_pfs_key_t fts_doc_id_mutex_key;
-mysql_pfs_key_t ibuf_bitmap_mutex_key;
-mysql_pfs_key_t ibuf_mutex_key;
-mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
mysql_pfs_key_t recalc_pool_mutex_key;
mysql_pfs_key_t purge_sys_pq_mutex_key;
mysql_pfs_key_t recv_sys_mutex_key;
@@ -560,8 +542,6 @@ static PSI_mutex_info all_innodb_mutexes[] = {
PSI_KEY(fts_cache_init_mutex),
PSI_KEY(fts_delete_mutex),
PSI_KEY(fts_doc_id_mutex),
- PSI_KEY(ibuf_mutex),
- PSI_KEY(ibuf_pessimistic_insert_mutex),
PSI_KEY(index_online_log),
PSI_KEY(page_zip_stat_per_index_mutex),
PSI_KEY(purge_sys_pq_mutex),
@@ -962,20 +942,6 @@ static SHOW_VAR innodb_status_variables[]= {
{"dblwr_writes", &export_vars.innodb_dblwr_writes, SHOW_SIZE_T},
{"deadlocks", &lock_sys.deadlocks, SHOW_SIZE_T},
{"history_list_length", &export_vars.innodb_history_list_length,SHOW_SIZE_T},
- {"ibuf_discarded_delete_marks", &ibuf.n_discarded_ops[IBUF_OP_DELETE_MARK],
- SHOW_SIZE_T},
- {"ibuf_discarded_deletes", &ibuf.n_discarded_ops[IBUF_OP_DELETE],
- SHOW_SIZE_T},
- {"ibuf_discarded_inserts", &ibuf.n_discarded_ops[IBUF_OP_INSERT],
- SHOW_SIZE_T},
- {"ibuf_free_list", &ibuf.free_list_len, SHOW_SIZE_T},
- {"ibuf_merged_delete_marks", &ibuf.n_merged_ops[IBUF_OP_DELETE_MARK],
- SHOW_SIZE_T},
- {"ibuf_merged_deletes", &ibuf.n_merged_ops[IBUF_OP_DELETE], SHOW_SIZE_T},
- {"ibuf_merged_inserts", &ibuf.n_merged_ops[IBUF_OP_INSERT], SHOW_SIZE_T},
- {"ibuf_merges", &ibuf.n_merges, SHOW_SIZE_T},
- {"ibuf_segment_size", &ibuf.seg_size, SHOW_SIZE_T},
- {"ibuf_size", &ibuf.size, SHOW_SIZE_T},
{"log_waits", &log_sys.waits, SHOW_SIZE_T},
{"log_write_requests", &log_sys.write_to_buf, SHOW_SIZE_T},
{"log_writes", &log_sys.write_to_log, SHOW_SIZE_T},
@@ -3913,8 +3879,6 @@ static int innodb_init_params()
DBUG_RETURN(HA_ERR_INITIALIZATION);
}
- DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL);
-
/* Check that interdependent parameters have sane values. */
if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm"
@@ -3991,27 +3955,27 @@ static int innodb_init_params()
fts_sort_pll_degree = num_pll_degree;
- /* Store the default charset-collation number of this MySQL
- installation */
-
- data_mysql_default_charset_coll = (ulint) default_charset_info->number;
-
+ if (innodb_flush_method == 1 /* O_DSYNC */) {
+ log_sys.log_write_through = true;
+ fil_system.write_through = true;
+ fil_system.buffered = false;
+#if defined __linux__ || defined _WIN32
+ log_sys.log_buffered = false;
+ goto skip_buffering_tweak;
+#endif
+ } else if (innodb_flush_method >= 4 /* O_DIRECT */
+ IF_WIN(&& innodb_flush_method < 8 /* normal */,)) {
+ /* O_DIRECT and similar settings do nothing */
#ifndef _WIN32
- if (srv_use_atomic_writes && my_may_have_atomic_write) {
- /*
- Force O_DIRECT on Unixes (on Windows writes are always
- unbuffered)
- */
- switch (srv_file_flush_method) {
- case SRV_O_DIRECT:
- case SRV_O_DIRECT_NO_FSYNC:
- break;
- default:
- srv_file_flush_method = SRV_O_DIRECT;
- fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
- }
- }
+ } else if (srv_use_atomic_writes && my_may_have_atomic_write) {
+ /* If atomic writes are enabled, do the same as with
+ innodb_flush_method=O_DIRECT: retain the default settings */
#endif
+ } else {
+ log_sys.log_write_through = false;
+ fil_system.write_through = false;
+ fil_system.buffered = true;
+ }
#if defined __linux__ || defined _WIN32
if (srv_flush_log_at_trx_commit == 2) {
@@ -4019,6 +3983,7 @@ static int innodb_init_params()
innodb_flush_log_at_trx_commit=2. */
log_sys.log_buffered = true;
}
+skip_buffering_tweak:
#endif
if (srv_read_only_mode) {
@@ -4026,12 +3991,6 @@ static int innodb_init_params()
srv_use_doublewrite_buf = FALSE;
}
-#if !defined LINUX_NATIVE_AIO && !defined HAVE_URING && !defined _WIN32
- /* Currently native AIO is supported only on windows and linux
- and that also when the support is compiled in. In all other
- cases, we ignore the setting of innodb_use_native_aio. */
- srv_use_native_aio = FALSE;
-#endif
#ifdef HAVE_URING
if (srv_use_native_aio && io_uring_may_be_unsafe) {
sql_print_warning("innodb_use_native_aio may cause "
@@ -4039,28 +3998,39 @@ static int innodb_init_params()
"https://jira.mariadb.org/browse/MDEV-26674",
io_uring_may_be_unsafe);
}
+#elif !defined LINUX_NATIVE_AIO && !defined _WIN32
+ /* Currently native AIO is supported only on windows and linux
+ and that also when the support is compiled in. In all other
+ cases, we ignore the setting of innodb_use_native_aio. */
+ srv_use_native_aio = FALSE;
#endif
-#ifndef _WIN32
- ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC);
-#else
- switch (srv_file_flush_method) {
- case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */:
- srv_file_flush_method = SRV_ALL_O_DIRECT_FSYNC;
- break;
- case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */:
- srv_file_flush_method = SRV_FSYNC;
- break;
- default:
- ut_ad(srv_file_flush_method <= SRV_ALL_O_DIRECT_FSYNC);
- }
-#endif
innodb_buffer_pool_size_init();
srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift);
DBUG_RETURN(0);
}
+
+/*********************************************************************//**
+Setup costs factors for InnoDB to be able to approximate how many
+ms different opperations takes. See cost functions in handler.h how
+the different variables are used */
+
+static void innobase_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ The following number was found by check_costs.pl when using 1M rows
+ and all rows are cached. See optimizer_costs.txt for details
+ */
+ costs->row_next_find_cost= 0.00007013;
+ costs->row_lookup_cost= 0.00076597;
+ costs->key_next_find_cost= 0.00009900;
+ costs->key_lookup_cost= 0.00079112;
+ costs->row_copy_cost= 0.00006087;
+}
+
+
/** Initialize the InnoDB storage engine plugin.
@param[in,out] p InnoDB handlerton
@return error code
@@ -4128,6 +4098,8 @@ static int innodb_init(void* p)
innobase_hton->prepare_commit_versioned
= innodb_prepare_commit_versioned;
+ innobase_hton->update_optimizer_costs= innobase_update_optimizer_costs;
+
innodb_remember_check_sysvar_funcs();
compile_time_assert(DATA_MYSQL_TRUE_VARCHAR == MYSQL_TYPE_VARCHAR);
@@ -4208,8 +4180,6 @@ static int innodb_init(void* p)
innobase_old_blocks_pct = buf_LRU_old_ratio_update(
innobase_old_blocks_pct, true);
- ibuf_max_size_update(srv_change_buffer_max_size);
-
mysql_mutex_init(pending_checkpoint_mutex_key,
&log_requests.mutex,
MY_MUTEX_INIT_FAST);
@@ -4340,7 +4310,7 @@ innobase_start_trx_and_assign_read_view(
Do this only if transaction is using REPEATABLE READ isolation
level. */
trx->isolation_level = innobase_map_isolation_level(
- thd_get_trx_isolation(thd));
+ thd_get_trx_isolation(thd)) & 3;
if (trx->isolation_level == TRX_ISO_REPEATABLE_READ) {
trx->read_view.open(trx);
@@ -5077,13 +5047,11 @@ ha_innobase::index_flags(
}
ulong flags= key == table_share->primary_key
- ? HA_CLUSTERED_INDEX : 0;
+ ? HA_CLUSTERED_INDEX : HA_KEYREAD_ONLY | HA_DO_RANGE_FILTER_PUSHDOWN;
flags |= HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
- | HA_READ_RANGE | HA_KEYREAD_ONLY
- | HA_DO_INDEX_COND_PUSHDOWN
- | HA_DO_RANGE_FILTER_PUSHDOWN;
-
+ | HA_READ_RANGE
+ | HA_DO_INDEX_COND_PUSHDOWN;
return(flags);
}
@@ -6604,8 +6572,7 @@ uint8_t
get_innobase_type_from_mysql_type(unsigned *unsigned_flag, const Field *field)
{
/* The following asserts try to check that the MySQL type code fits in
- 8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
- the type */
+ 8 bits: this is used when DATA_NOT_NULL is ORed to the type */
static_assert(MYSQL_TYPE_STRING < 256, "compatibility");
static_assert(MYSQL_TYPE_VAR_STRING < 256, "compatibility");
@@ -9506,6 +9473,11 @@ ha_innobase::ft_init()
trx->will_lock = true;
}
+ /* If there is an FTS scan in progress, stop it */
+ fts_result_t* result = (reinterpret_cast<NEW_FT_INFO*>(ft_handler))->ft_result;
+ if (result)
+ result->current= NULL;
+
DBUG_RETURN(rnd_init(false));
}
@@ -14348,13 +14320,15 @@ ha_innobase::estimate_rows_upper_bound()
DBUG_RETURN((ha_rows) estimate);
}
+
/*********************************************************************//**
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys.
@return estimated time measured in disk seeks */
-double
+#ifdef NOT_USED
+IO_AND_CPU_COST
ha_innobase::scan_time()
/*====================*/
{
@@ -14374,24 +14348,28 @@ ha_innobase::scan_time()
TODO: This will be further improved to return some approximate
estimate but that would also needs pre-population of stats
structure. As of now approach is in sync with MyISAM. */
- return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2);
+ return { (ulonglong2double(stats.data_file_length) / IO_SIZE * DISK_READ_COST), 0.0 };
}
ulint stat_clustered_index_size;
-
+ IO_AND_CPU_COST cost;
ut_a(m_prebuilt->table->stat_initialized);
stat_clustered_index_size =
m_prebuilt->table->stat_clustered_index_size;
- return((double) stat_clustered_index_size);
+ cost.io= (double) stat_clustered_index_size * DISK_READ_COST;
+ cost.cpu= 0;
+ return(cost);
}
+#endif
/******************************************************************//**
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes.
@return estimated time measured in disk seeks */
+#ifdef NOT_USED
double
ha_innobase::read_time(
/*===================*/
@@ -14416,8 +14394,33 @@ ha_innobase::read_time(
return(time_for_scan);
}
- return(ranges + (double) rows / (double) total_rows * time_for_scan);
+ return(ranges * KEY_LOOKUP_COST + (double) rows / (double) total_rows * time_for_scan);
+}
+
+/******************************************************************//**
+Calculate the time it takes to read a set of rows with primary key.
+*/
+
+IO_AND_CPU_COST
+ha_innobase::rnd_pos_time(ha_rows rows)
+{
+ ha_rows total_rows;
+
+ /* Assume that the read time is proportional to the scan time for all
+ rows + at most one seek per range. */
+
+ IO_AND_CPU_COST time_for_scan = scan_time();
+
+ if ((total_rows = estimate_rows_upper_bound()) < rows) {
+
+ return(time_for_scan);
+ }
+ double frac= (double) rows + (double) rows / (double) total_rows;
+ time_for_scan.io*= frac;
+ time_for_scan.cpu*= frac;
+ return(time_for_scan);
}
+#endif
/*********************************************************************//**
Calculates the key number used inside MySQL for an Innobase index.
@@ -14896,13 +14899,6 @@ ha_innobase::info_low(
innodb_rec_per_key(index, j,
stats.records));
- /* Since MySQL seems to favor table scans
- too much over index searches, we pretend
- index selectivity is 2 times better than
- our estimate: */
-
- rec_per_key_int = rec_per_key_int / 2;
-
if (rec_per_key_int == 0) {
rec_per_key_int = 1;
}
@@ -15290,7 +15286,7 @@ ha_innobase::check(
}
/* Restore the original isolation level */
- m_prebuilt->trx->isolation_level = old_isolation_level;
+ m_prebuilt->trx->isolation_level = old_isolation_level & 3;
#ifdef BTR_CUR_HASH_ADAPT
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/* We validate the whole adaptive hash index for all tables
@@ -16324,7 +16320,7 @@ ha_innobase::store_lock(
if (lock_type != TL_IGNORE
&& trx->n_mysql_tables_in_use == 0) {
trx->isolation_level = innobase_map_isolation_level(
- (enum_tx_isolation) thd_tx_isolation(thd));
+ (enum_tx_isolation) thd_tx_isolation(thd)) & 3;
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
@@ -17462,20 +17458,6 @@ innodb_old_blocks_pct_update(THD*, st_mysql_sys_var*, void*, const void* save)
innobase_old_blocks_pct = ratio;
}
-/****************************************************************//**
-Update the system variable innodb_old_blocks_pct using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_change_buffer_max_size_update(THD*, st_mysql_sys_var*, void*,
- const void* save)
-{
- srv_change_buffer_max_size = *static_cast<const uint*>(save);
- mysql_mutex_unlock(&LOCK_global_system_variables);
- ibuf_max_size_update(srv_change_buffer_max_size);
- mysql_mutex_lock(&LOCK_global_system_variables);
-}
-
#ifdef UNIV_DEBUG
static uint srv_fil_make_page_dirty_debug = 0;
static uint srv_saved_page_number_debug;
@@ -18429,7 +18411,7 @@ buffer_pool_load_abort(
}
#if defined __linux__ || defined _WIN32
-static void innodb_log_file_buffering_update(THD *thd, st_mysql_sys_var*,
+static void innodb_log_file_buffering_update(THD *, st_mysql_sys_var*,
void *, const void *save)
{
mysql_mutex_unlock(&LOCK_global_system_variables);
@@ -18438,6 +18420,30 @@ static void innodb_log_file_buffering_update(THD *thd, st_mysql_sys_var*,
}
#endif
+static void innodb_log_file_write_through_update(THD *, st_mysql_sys_var*,
+ void *, const void *save)
+{
+ mysql_mutex_unlock(&LOCK_global_system_variables);
+ log_sys.set_write_through(*static_cast<const my_bool*>(save));
+ mysql_mutex_lock(&LOCK_global_system_variables);
+}
+
+static void innodb_data_file_buffering_update(THD *, st_mysql_sys_var*,
+ void *, const void *save)
+{
+ mysql_mutex_unlock(&LOCK_global_system_variables);
+ fil_system.set_buffered(*static_cast<const my_bool*>(save));
+ mysql_mutex_lock(&LOCK_global_system_variables);
+}
+
+static void innodb_data_file_write_through_update(THD *, st_mysql_sys_var*,
+ void *, const void *save)
+{
+ mysql_mutex_unlock(&LOCK_global_system_variables);
+ fil_system.set_write_through(*static_cast<const my_bool*>(save));
+ mysql_mutex_lock(&LOCK_global_system_variables);
+}
+
static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*,
void *var, const void *save)
{
@@ -18874,7 +18880,7 @@ static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown,
fast_shutdown_validate, NULL, 1, 0, 3, 0);
static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
- PLUGIN_VAR_NOCMDARG,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_DEPRECATED,
"Stores each InnoDB table to an .ibd file in the database dir.",
NULL, NULL, TRUE);
@@ -18904,11 +18910,10 @@ static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
" guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
NULL, NULL, 1, 0, 3, 0);
-static MYSQL_SYSVAR_ENUM(flush_method, srv_file_flush_method,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_DEPRECATED,
"With which method to flush data.",
- NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_O_DIRECT),
- &innodb_flush_method_typelib);
+ NULL, NULL, innodb_flush_method_default, &innodb_flush_method_typelib);
static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -19105,7 +19110,7 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_st
NULL, NULL, TRUE);
static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
- PLUGIN_VAR_RQCMDARG,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED,
"Enable/disable InnoDB defragmentation (default FALSE). When set to FALSE, all existing "
"defragmentation will be paused. And new defragmentation command will fail."
"Paused defragmentation commands will resume when this variable is set to "
@@ -19113,14 +19118,14 @@ static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
NULL, NULL, FALSE);
static MYSQL_SYSVAR_UINT(defragment_n_pages, srv_defragment_n_pages,
- PLUGIN_VAR_RQCMDARG,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED,
"Number of pages considered at once when merging multiple pages to "
"defragment",
NULL, NULL, 7, 2, 32, 0);
static MYSQL_SYSVAR_UINT(defragment_stats_accuracy,
srv_defragment_stats_accuracy,
- PLUGIN_VAR_RQCMDARG,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED,
"How many defragment stats changes there are before the stats "
"are written to persistent storage. Set to 0 meaning disable "
"defragment stats tracking.",
@@ -19128,7 +19133,7 @@ static MYSQL_SYSVAR_UINT(defragment_stats_accuracy,
static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs,
srv_defragment_fill_factor_n_recs,
- PLUGIN_VAR_RQCMDARG,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED,
"How many records of space defragmentation should leave on the page. "
"This variable, together with innodb_defragment_fill_factor, is introduced "
"so defragmentation won't pack the page too full and cause page split on "
@@ -19137,7 +19142,7 @@ static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs,
NULL, NULL, 20, 1, 100, 0);
static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor,
- PLUGIN_VAR_RQCMDARG,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED,
"A number between [0.7, 1] that tells defragmentation how full it should "
"fill a page. Default is 0.9. Number below 0.7 won't make much sense."
"This variable, together with innodb_defragment_fill_factor_n_recs, is "
@@ -19147,7 +19152,7 @@ static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor,
NULL, NULL, 0.9, 0.7, 1, 0);
static MYSQL_SYSVAR_UINT(defragment_frequency, srv_defragment_frequency,
- PLUGIN_VAR_RQCMDARG,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED,
"Do not defragment a single index more than this number of time per second."
"This controls the number of time defragmentation thread can request X_LOCK "
"on an index. Defragmentation thread will check whether "
@@ -19340,6 +19345,21 @@ static MYSQL_SYSVAR_BOOL(log_file_buffering, log_sys.log_buffered,
nullptr, innodb_log_file_buffering_update, FALSE);
#endif
+static MYSQL_SYSVAR_BOOL(log_file_write_through, log_sys.log_write_through,
+ PLUGIN_VAR_OPCMDARG,
+ "Whether each write to ib_logfile0 is write through",
+ nullptr, innodb_log_file_write_through_update, FALSE);
+
+static MYSQL_SYSVAR_BOOL(data_file_buffering, fil_system.buffered,
+ PLUGIN_VAR_OPCMDARG,
+ "Whether the file system cache for data files is enabled",
+ nullptr, innodb_data_file_buffering_update, FALSE);
+
+static MYSQL_SYSVAR_BOOL(data_file_write_through, fil_system.write_through,
+ PLUGIN_VAR_OPCMDARG,
+ "Whether each write to data files writes through",
+ nullptr, innodb_data_file_write_through_update, FALSE);
+
static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size,
PLUGIN_VAR_RQCMDARG,
"Redo log size in bytes.",
@@ -19400,7 +19420,7 @@ static MYSQL_SYSVAR_UINT(undo_tablespaces, srv_undo_tablespaces,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of undo tablespaces to use.",
NULL, NULL,
- 0L, /* Default seting */
+ 3L, /* Default seting */
0L, /* Minimum value */
TRX_SYS_MAX_UNDO_SPACES, 0); /* Maximum value */
@@ -19473,31 +19493,6 @@ static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
NULL, NULL, FALSE);
#endif /* HAVE_LIBNUMA */
-static void innodb_change_buffering_update(THD *thd, struct st_mysql_sys_var*,
- void*, const void *save)
-{
- ulong i= *static_cast<const ulong*>(save);
- if (i != IBUF_USE_NONE && !ibuf.index)
- push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_NOT_KEYFILE,
- "InnoDB: The change buffer is corrupted.");
- else
- innodb_change_buffering= i;
-}
-
-static MYSQL_SYSVAR_ENUM(change_buffering, innodb_change_buffering,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED,
- "Buffer changes to secondary indexes.",
- nullptr, innodb_change_buffering_update,
- IBUF_USE_NONE, &innodb_change_buffering_typelib);
-
-static MYSQL_SYSVAR_UINT(change_buffer_max_size,
- srv_change_buffer_max_size,
- PLUGIN_VAR_RQCMDARG,
- "Maximum on-disk size of change buffer in terms of percentage"
- " of the buffer pool.",
- NULL, innodb_change_buffer_max_size_update,
- CHANGE_BUFFER_DEFAULT_SIZE, 0, 50, 0);
-
static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
PLUGIN_VAR_RQCMDARG,
"Specifies how InnoDB index statistics collection code should"
@@ -19505,18 +19500,6 @@ static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
" NULLS_UNEQUAL and NULLS_IGNORED",
NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-static MYSQL_SYSVAR_BOOL(change_buffer_dump, ibuf_dump,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Dump the change buffer at startup.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
- PLUGIN_VAR_RQCMDARG,
- "Debug flags for InnoDB change buffering (0=none, 1=try to buffer)",
- NULL, NULL, 0, 0, 1, 0);
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequency,
PLUGIN_VAR_RQCMDARG,
"A number between [0, 100] that tells how oftern buffer pool dump status "
@@ -19796,6 +19779,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
#if defined __linux__ || defined _WIN32
MYSQL_SYSVAR(log_file_buffering),
#endif
+ MYSQL_SYSVAR(log_file_write_through),
+ MYSQL_SYSVAR(data_file_buffering),
+ MYSQL_SYSVAR(data_file_write_through),
MYSQL_SYSVAR(log_file_size),
MYSQL_SYSVAR(log_group_home_dir),
MYSQL_SYSVAR(max_dirty_pages_pct),
@@ -19843,12 +19829,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
#ifdef HAVE_LIBNUMA
MYSQL_SYSVAR(numa_interleave),
#endif /* HAVE_LIBNUMA */
- MYSQL_SYSVAR(change_buffering),
- MYSQL_SYSVAR(change_buffer_max_size),
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- MYSQL_SYSVAR(change_buffer_dump),
- MYSQL_SYSVAR(change_buffering_debug),
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
MYSQL_SYSVAR(random_read_ahead),
MYSQL_SYSVAR(read_ahead_threshold),
MYSQL_SYSVAR(read_only),
@@ -20004,6 +19984,7 @@ ha_innobase::multi_range_read_info_const(
uint n_ranges,
uint* bufsz,
uint* flags,
+ ha_rows limit,
Cost_estimate* cost)
{
/* See comments in ha_myisam::multi_range_read_info_const */
@@ -20013,8 +19994,9 @@ ha_innobase::multi_range_read_info_const(
*flags |= HA_MRR_USE_DEFAULT_IMPL;
}
- ha_rows res= m_ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
- bufsz, flags, cost);
+ ha_rows res= m_ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param,
+ n_ranges,
+ bufsz, flags, limit, cost);
return res;
}
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 1f42bf180a8..60b56b4a22f 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -105,10 +105,10 @@ public:
int close(void) override;
- double scan_time() override;
-
- double read_time(uint index, uint ranges, ha_rows rows) override;
-
+#ifdef NOT_USED
+ IO_AND_CPU_COST scan_time() override;
+ double rnd_pos_time(ha_rows rows) override;
+#endif
int write_row(const uchar * buf) override;
int update_row(const uchar * old_data, const uchar * new_data) override;
@@ -383,6 +383,7 @@ public:
uint n_ranges,
uint* bufsz,
uint* flags,
+ ha_rows limit,
Cost_estimate* cost) override;
/** Initialize multi range read and get information.
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 59a8b005557..cfa5ed922da 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -2154,8 +2154,7 @@ next_page:
}
next_page= false;
- block= btr_block_get(*clust_index, next_page_no, BTR_SEARCH_LEAF, false,
- &mtr);
+ block= btr_block_get(*clust_index, next_page_no, BTR_SEARCH_LEAF, &mtr);
if (!block)
goto non_empty;
page_cur_set_before_first(block, cur);
@@ -10225,6 +10224,7 @@ commit_try_rebuild(
/* We must be still holding a table handle. */
DBUG_ASSERT(user_table->get_ref_count() == 1);
+ rebuilt_table->row_id = uint64_t{user_table->row_id};
DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;);
switch (error) {
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 3b537afef40..589182b73ba 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,7 +39,6 @@ Created July 18, 2007 Vasil Dimov
#include "dict0load.h"
#include "buf0buddy.h"
#include "buf0buf.h"
-#include "ibuf0ibuf.h"
#include "dict0mem.h"
#include "dict0types.h"
#include "srv0start.h"
@@ -80,10 +79,7 @@ in i_s_page_type[] array */
/** R-tree index page */
#define I_S_PAGE_TYPE_RTREE (FIL_PAGE_TYPE_LAST + 1)
-/** Change buffer B-tree page */
-#define I_S_PAGE_TYPE_IBUF (FIL_PAGE_TYPE_LAST + 2)
-
-#define I_S_PAGE_TYPE_LAST I_S_PAGE_TYPE_IBUF
+#define I_S_PAGE_TYPE_LAST I_S_PAGE_TYPE_RTREE
#define I_S_PAGE_TYPE_BITS 4
@@ -104,9 +100,6 @@ static buf_page_desc_t i_s_page_type[] = {
{"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2},
{"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN},
{"RTREE_INDEX", I_S_PAGE_TYPE_RTREE},
- {"IBUF_INDEX", I_S_PAGE_TYPE_IBUF},
- {"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED},
- {"PAGE COMPRESSED AND ENCRYPTED", FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED},
};
/** This structure defines information we will fetch from pages
@@ -3776,17 +3769,17 @@ i_s_innodb_buffer_page_fill(
OK(fields[IDX_BUFFER_PAGE_STATE]->store(
std::min<uint32_t>(3, page_info->state) + 1, true));
- static_assert(buf_page_t::UNFIXED == 1U << 29, "comp.");
+ static_assert(buf_page_t::UNFIXED == 2U << 29, "comp.");
static_assert(buf_page_t::READ_FIX == 4U << 29, "comp.");
- static_assert(buf_page_t::WRITE_FIX == 5U << 29, "comp.");
+ static_assert(buf_page_t::WRITE_FIX == 6U << 29, "comp.");
unsigned io_fix = page_info->state >> 29;
if (io_fix < 4) {
io_fix = 1;
- } else if (io_fix > 5) {
- io_fix = 3;
+ } else if (io_fix == 4) {
+ io_fix = 2;
} else {
- io_fix -= 2;
+ io_fix = 3;
}
OK(fields[IDX_BUFFER_PAGE_IO_FIX]->store(io_fix, true));
@@ -3824,14 +3817,9 @@ i_s_innodb_set_page_type(
their values are defined as 17855 and 17854, so we cannot
use them to index into i_s_page_type[] array, its array index
in the i_s_page_type[] array is I_S_PAGE_TYPE_INDEX
- (1) for index pages or I_S_PAGE_TYPE_IBUF for
- change buffer index pages */
+ (1) for index pages */
if (page_type == FIL_PAGE_RTREE) {
page_info->page_type = I_S_PAGE_TYPE_RTREE;
- } else if (page_info->index_id
- == static_cast<index_id_t>(DICT_IBUF_ID_MIN
- + IBUF_SPACE_ID)) {
- page_info->page_type = I_S_PAGE_TYPE_IBUF;
} else {
ut_ad(page_type == FIL_PAGE_INDEX
|| page_type == FIL_PAGE_TYPE_INSTANT);
@@ -3876,9 +3864,9 @@ i_s_innodb_buffer_page_get_info(
static_assert(buf_page_t::NOT_USED == 0, "compatibility");
static_assert(buf_page_t::MEMORY == 1, "compatibility");
static_assert(buf_page_t::REMOVE_HASH == 2, "compatibility");
- static_assert(buf_page_t::UNFIXED == 1U << 29, "compatibility");
+ static_assert(buf_page_t::UNFIXED == 2U << 29, "compatibility");
static_assert(buf_page_t::READ_FIX == 4U << 29, "compatibility");
- static_assert(buf_page_t::WRITE_FIX == 5U << 29, "compatibility");
+ static_assert(buf_page_t::WRITE_FIX == 6U << 29, "compatibility");
page_info->state = bpage->state();
@@ -4268,17 +4256,17 @@ i_s_innodb_buf_page_lru_fill(
OK(fields[IDX_BUF_LRU_PAGE_STATE]->store(
page_info->compressed_only, true));
- static_assert(buf_page_t::UNFIXED == 1U << 29, "comp.");
+ static_assert(buf_page_t::UNFIXED == 2U << 29, "comp.");
static_assert(buf_page_t::READ_FIX == 4U << 29, "comp.");
- static_assert(buf_page_t::WRITE_FIX == 5U << 29, "comp.");
+ static_assert(buf_page_t::WRITE_FIX == 6U << 29, "comp.");
unsigned io_fix = page_info->state >> 29;
if (io_fix < 4) {
io_fix = 1;
- } else if (io_fix > 5) {
- io_fix = 3;
+ } else if (io_fix == 4) {
+ io_fix = 2;
} else {
- io_fix -= 2;
+ io_fix = 3;
}
OK(fields[IDX_BUF_LRU_PAGE_IO_FIX]->store(io_fix, true));
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index e988a685678..5303b592c71 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -17,1259 +17,99 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
-/**************************************************//**
+/**
@file ibuf/ibuf0ibuf.cc
-Insert buffer
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
+Upgrade and removal of the InnoDB change buffer
+*/
#include "ibuf0ibuf.h"
#include "btr0sea.h"
-
-/** Number of bits describing a single page */
-#define IBUF_BITS_PER_PAGE 4
-/** The start address for an insert buffer bitmap page bitmap */
-#define IBUF_BITMAP PAGE_DATA
-
-#include "buf0buf.h"
-#include "buf0rea.h"
-#include "fsp0fsp.h"
-#include "trx0sys.h"
-#include "fil0fil.h"
-#include "rem0rec.h"
-#include "btr0cur.h"
#include "btr0pcur.h"
-#include "btr0btr.h"
#include "row0upd.h"
-#include "dict0boot.h"
-#include "fut0lst.h"
-#include "lock0lock.h"
-#include "log0recv.h"
-#include "que0que.h"
-#include "srv0start.h" /* srv_shutdown_state */
-#include "rem0cmp.h"
+#include "my_service_manager.h"
#include "log.h"
-/* STRUCTURE OF AN INSERT BUFFER RECORD
+/** Possible operations buffered in the change buffer. */
+enum ibuf_op
+{
+ IBUF_OP_INSERT= 0,
+ IBUF_OP_DELETE_MARK= 1,
+ IBUF_OP_DELETE= 2,
+};
+
+constexpr const page_id_t ibuf_root{0, FSP_IBUF_TREE_ROOT_PAGE_NO};
+constexpr const page_id_t ibuf_header{0, FSP_IBUF_HEADER_PAGE_NO};
+constexpr const index_id_t ibuf_index_id{0xFFFFFFFF00000000ULL};
-In versions < 4.1.x:
+/* Format of the change buffer records:
+
+MySQL 3.23 and MySQL 4.0 (not supported since MySQL 5.6.5 and MariaDB 10.0.11):
1. The first field is the page number.
2. The second field is an array which stores type info for each subsequent
- field. We store the information which affects the ordering of records, and
+ field (4 bytes per column).
+ We store the information which affects the ordering of records, and
also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
is 10 bytes.
3. Next we have the fields of the actual index record.
-In versions >= 4.1.x:
-
-Note that contary to what we planned in the 1990's, there will only be one
-insert buffer tree, and that is in the system tablespace of InnoDB.
+MySQL 4.1:
1. The first field is the space id.
2. The second field is a one-byte marker (0) which differentiates records from
the < 4.1.x storage format.
3. The third field is the page number.
-4. The fourth field contains the type info, where we have also added 2 bytes to
- store the charset. In the compressed table format of 5.0.x we must add more
- information here so that we can build a dummy 'index' struct which 5.0.x
- can use in the binary search on the index page in the ibuf merge phase.
+4. The fourth field contains the type info
+ (6 bytes per index field, 16-bit collation information added).
+ Unless ROW_FORMAT=REDUNDANT, we add more metadata here so that
+ we can access records in the index page.
5. The rest of the fields contain the fields of the actual index record.
-In versions >= 5.0.3:
+MySQL 5.0 (starting with MySQL 5.0.3) and MySQL 5.1:
The first byte of the fourth field is an additional marker (0) if the record
-is in the compact format. The presence of this marker can be detected by
-looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
+is not in ROW_FORMAT=REDUNDANT. The presence of this marker can be detected by
+looking at the length of the field modulo 6.
The high-order bit of the character set field in the type info is the
"nullable" flag for the field.
-In versions >= 5.5:
+MySQL 5.5 and MariaDB 5.5 and later:
-The optional marker byte at the start of the fourth field is replaced by
-mandatory 3 fields, totaling 4 bytes:
+Unless innodb_change_buffering=inserts, the optional marker byte at
+the start of the fourth field may be replaced by mandatory 3 fields,
+comprising 4 bytes:
1. 2 bytes: Counter field, used to sort records within a (space id, page
no) in the order they were added. This is needed so that for example the
sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled
correctly.
- 2. 1 byte: Operation type (see ibuf_op_t).
+ 2. 1 byte: Operation type (see ibuf_op).
- 3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT.
-
-To ensure older records, which do not have counters to enforce correct
-sorting, are merged before any new records, ibuf_insert checks if we're
-trying to insert to a position that contains old-style records, and if so,
-refuses the insert. Thus, ibuf pages are gradually converted to the new
-format as their corresponding buffer pool pages are read into memory.
+ 3. 1 byte: 0=ROW_FORMAT=REDUNDANT, 1=other
*/
-
-/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
-
-If an OS thread performs any operation that brings in disk pages from
-non-system tablespaces into the buffer pool, or creates such a page there,
-then the operation may have as a side effect an insert buffer index tree
-compression. Thus, the tree latch of the insert buffer tree may be acquired
-in the x-mode, and also the file space latch of the system tablespace may
-be acquired in the x-mode.
-
-Also, an insert to an index in a non-system tablespace can have the same
-effect. How do we know this cannot lead to a deadlock of OS threads? There
-is a problem with the i\o-handler threads: they break the latching order
-because they own x-latches to pages which are on a lower level than the
-insert buffer tree latch, its page latches, and the tablespace latch an
-insert buffer operation can reserve.
-
-The solution is the following: Let all the tree and page latches connected
-with the insert buffer be later in the latching order than the fsp latch and
-fsp page latches.
-
-Insert buffer pages must be such that the insert buffer is never invoked
-when these pages are accessed as this would result in a recursion violating
-the latching order. We let a special i/o-handler thread take care of i/o to
-the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
-pages and the first inode page, which contains the inode of the ibuf tree: let
-us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
-access both non-ibuf and ibuf pages.
-
-Then an i/o-handler for the insert buffer never needs to access recursively the
-insert buffer tree and thus obeys the latching order. On the other hand, other
-i/o-handlers for other tablespaces may require access to the insert buffer,
-but because all kinds of latches they need to access there are later in the
-latching order, no violation of the latching order occurs in this case,
-either.
-
-A problem is how to grow and contract an insert buffer tree. As it is later
-in the latching order than the fsp management, we have to reserve the fsp
-latch first, before adding or removing pages from the insert buffer tree.
-We let the insert buffer tree have its own file space management: a free
-list of pages linked to the tree root. To prevent recursive using of the
-insert buffer when adding pages to the tree, we must first load these pages
-to memory, obtaining a latch on them, and only after that add them to the
-free list of the insert buffer tree. More difficult is removing of pages
-from the free list. If there is an excess of pages in the free list of the
-ibuf tree, they might be needed if some thread reserves the fsp latch,
-intending to allocate more file space. So we do the following: if a thread
-reserves the fsp latch, we check the writer count field of the latch. If
-this field has value 1, it means that the thread did not own the latch
-before entering the fsp system, and the mtr of the thread contains no
-modifications to the fsp pages. Now we are free to reserve the ibuf latch,
-and check if there is an excess of pages in the free list. We can then, in a
-separate mini-transaction, take them out of the free list and free them to
-the fsp system.
-
-To avoid deadlocks in the ibuf system, we divide file pages into three levels:
-
-(1) non-ibuf pages,
-(2) ibuf tree pages and the pages in the ibuf tree free list, and
-(3) ibuf bitmap pages.
-
-No OS thread is allowed to access higher level pages if it has latches to
-lower level pages; even if the thread owns a B-tree latch it must not access
-the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
-is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
-exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
-level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
-it uses synchronous aio, it can access any pages, as long as it obeys the
-access order rules. */
-
-/** Operations that can currently be buffered. */
-ulong innodb_change_buffering;
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-/** Dump the change buffer at startup */
-my_bool ibuf_dump;
-/** Flag to control insert buffer debugging. */
-uint ibuf_debug;
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
-/** The insert buffer control structure */
-ibuf_t ibuf;
-
-/** @name Offsets to the per-page bits in the insert buffer bitmap */
-/* @{ */
-#define IBUF_BITMAP_FREE 0 /*!< Bits indicating the
- amount of free space */
-#define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered
- changes for the page */
-#define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of
- the ibuf tree, excluding the
- root page, or is in the free
- list of the ibuf */
-/* @} */
-
-#define IBUF_REC_FIELD_SPACE 0 /*!< in the pre-4.1 format,
- the page number. later, the space_id */
-#define IBUF_REC_FIELD_MARKER 1 /*!< starting with 4.1, a marker
- consisting of 1 byte that is 0 */
-#define IBUF_REC_FIELD_PAGE 2 /*!< starting with 4.1, the
- page number */
-#define IBUF_REC_FIELD_METADATA 3 /* the metadata field */
-#define IBUF_REC_FIELD_USER 4 /* first user field */
-
-/* Various constants for checking the type of an ibuf record and extracting
-data from it. For details, see the description of the record format at the
-top of this file. */
-
-/** @name Format of the IBUF_REC_FIELD_METADATA of an insert buffer record
-The fourth column in the MySQL 5.5 format contains an operation
-type, counter, and some flags. */
-/* @{ */
-#define IBUF_REC_INFO_SIZE 4 /*!< Combined size of info fields at
- the beginning of the fourth field */
-
-/* Offsets for the fields at the beginning of the fourth field */
-#define IBUF_REC_OFFSET_COUNTER 0 /*!< Operation counter */
-#define IBUF_REC_OFFSET_TYPE 2 /*!< Type of operation */
-#define IBUF_REC_OFFSET_FLAGS 3 /*!< Additional flags */
-
-/* Record flag masks */
-#define IBUF_REC_COMPACT 0x1 /*!< Set in
- IBUF_REC_OFFSET_FLAGS if the
- user index is in COMPACT
- format or later */
-
-
-#ifndef SAFE_MUTEX
-static
-#endif /* SAFE_MUTEX */
-/** The mutex protecting the insert buffer */
-mysql_mutex_t ibuf_mutex,
- /** The mutex covering pessimistic inserts into the change buffer */
- ibuf_pessimistic_insert_mutex;
-
-/** The area in pages from which contract looks for page numbers for merge */
-const ulint IBUF_MERGE_AREA = 8;
-
-/** Inside the merge area, pages which have at most 1 per this number less
-buffered entries compared to maximum volume that can buffered for a single
-page are merged along with the page whose buffer became full */
-const ulint IBUF_MERGE_THRESHOLD = 4;
-
-/** In ibuf_contract at most this number of pages is read to memory in one
-batch, in order to merge the entries for them in the insert buffer */
-const ulint IBUF_MAX_N_PAGES_MERGED = IBUF_MERGE_AREA;
-
-/** If the combined size of the ibuf trees exceeds ibuf.max_size by
-this many pages, we start to contract it synchronous contract, but do
-not insert */
-const ulint IBUF_CONTRACT_DO_NOT_INSERT = 10;
-
-/* TODO: how to cope with drop table if there are records in the insert
-buffer for the indexes of the table? Is there actually any problem,
-because ibuf merge is done to a page when it is read in, and it is
-still physically like the index page even if the index would have been
-dropped! So, there seems to be no problem. */
-
-/******************************************************************//**
-Sets the flag in the current mini-transaction record indicating we're
-inside an insert buffer routine. */
-UNIV_INLINE
-void
-ibuf_enter(
-/*=======*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(!mtr->is_inside_ibuf());
- mtr->enter_ibuf();
-}
-
-/******************************************************************//**
-Sets the flag in the current mini-transaction record indicating we're
-exiting an insert buffer routine. */
-UNIV_INLINE
-void
-ibuf_exit(
-/*======*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(mtr->is_inside_ibuf());
- mtr->exit_ibuf();
-}
-
-/**************************************************************//**
-Commits an insert buffer mini-transaction and sets the persistent
-cursor latch mode to BTR_NO_LATCHES, that is, detaches the cursor. */
-UNIV_INLINE
-void
-ibuf_btr_pcur_commit_specify_mtr(
-/*=============================*/
- btr_pcur_t* pcur, /*!< in/out: persistent cursor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_d(ibuf_exit(mtr));
- btr_pcur_commit_specify_mtr(pcur, mtr);
-}
-
-/******************************************************************//**
-Gets the ibuf header page and x-latches it.
-@return insert buffer header page */
-static
-page_t*
-ibuf_header_page_get(
-/*=================*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(!ibuf_inside(mtr));
-
- buf_block_t* block = buf_page_get(
- page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO),
- 0, RW_X_LATCH, mtr);
-
- return block ? block->page.frame : nullptr;
-}
-
-/** Acquire the change buffer root page.
-@param[in,out] mtr mini-transaction
-@return change buffer root page, SX-latched */
-static buf_block_t *ibuf_tree_root_get(mtr_t *mtr, dberr_t *err= nullptr)
-{
- ut_ad(ibuf_inside(mtr));
- mysql_mutex_assert_owner(&ibuf_mutex);
-
- mtr_sx_lock_index(ibuf.index, mtr);
-
- buf_block_t *block=
- buf_page_get_gen(page_id_t{IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO},
- 0, RW_SX_LATCH, nullptr, BUF_GET, mtr, err);
- ut_ad(!block || ibuf.empty == page_is_empty(block->page.frame));
- return block;
-}
-
-/******************************************************************//**
-Closes insert buffer and frees the data structures. */
-void
-ibuf_close(void)
-/*============*/
-{
- if (!ibuf.index) {
- return;
- }
-
- mysql_mutex_destroy(&ibuf_pessimistic_insert_mutex);
- mysql_mutex_destroy(&ibuf_mutex);
-
- dict_table_t* ibuf_table = ibuf.index->table;
- ibuf.index->lock.free();
- dict_mem_index_free(ibuf.index);
- dict_mem_table_free(ibuf_table);
- ibuf.index = NULL;
-}
-
-/******************************************************************//**
-Updates the size information of the ibuf, assuming the segment size has not
-changed. */
-static
-void
-ibuf_size_update(
-/*=============*/
- const page_t* root) /*!< in: ibuf tree root */
-{
- mysql_mutex_assert_owner(&ibuf_mutex);
-
- ibuf.free_list_len = flst_get_len(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST);
-
- ibuf.height = 1 + btr_page_get_level(root);
-
- /* the '1 +' is the ibuf header page */
- ibuf.size = ibuf.seg_size - (1 + ibuf.free_list_len);
-}
-
-/******************************************************************//**
-Creates the insert buffer data structure at a database startup and initializes
-the data structures for the insert buffer.
-@return DB_SUCCESS or failure */
-dberr_t
-ibuf_init_at_db_start(void)
-/*=======================*/
-{
- page_t* root;
-
- ut_ad(!ibuf.index);
- mtr_t mtr;
- mtr.start();
- compile_time_assert(IBUF_SPACE_ID == TRX_SYS_SPACE);
- compile_time_assert(IBUF_SPACE_ID == 0);
- mtr.x_lock_space(fil_system.sys_space);
- dberr_t err;
- buf_block_t* header_page = buf_page_get_gen(
- page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO),
- 0, RW_X_LATCH, nullptr, BUF_GET, &mtr, &err);
-
- if (!header_page) {
-err_exit:
- sql_print_error("InnoDB: The change buffer is corrupted"
- " or has been removed on upgrade"
- " to MariaDB 11.0 or later");
- mtr.commit();
- if (innodb_change_buffering == IBUF_USE_NONE) {
- err = DB_SUCCESS;
- }
- return err;
- }
-
- fseg_n_reserved_pages(*header_page,
- IBUF_HEADER + IBUF_TREE_SEG_HEADER
- + header_page->page.frame, &ibuf.seg_size, &mtr);
-
- do {
- DBUG_EXECUTE_IF("intermittent_read_failure", continue;);
- ut_ad(ibuf.seg_size >= 2);
- } while (0);
-
- if (buf_block_t* block =
- buf_page_get_gen(page_id_t(IBUF_SPACE_ID,
- FSP_IBUF_TREE_ROOT_PAGE_NO),
- 0, RW_X_LATCH, nullptr, BUF_GET, &mtr, &err)) {
- root = buf_block_get_frame(block);
- } else {
- goto err_exit;
- }
-
- DBUG_EXECUTE_IF("ibuf_init_corrupt",
- err = DB_CORRUPTION;
- goto err_exit;);
-
- if (page_is_comp(root) || fil_page_get_type(root) != FIL_PAGE_INDEX
- || btr_page_get_index_id(root) != DICT_IBUF_ID_MIN) {
- err = DB_CORRUPTION;
- goto err_exit;
- }
-
- /* At startup we intialize ibuf to have a maximum of
- CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the
- buffer pool size. Once ibuf struct is initialized this
- value is updated with the user supplied size by calling
- ibuf_max_size_update(). */
- ibuf.max_size = ((buf_pool_get_curr_size() >> srv_page_size_shift)
- * CHANGE_BUFFER_DEFAULT_SIZE) / 100;
-
- mysql_mutex_init(ibuf_mutex_key, &ibuf_mutex, nullptr);
- mysql_mutex_init(ibuf_pessimistic_insert_mutex_key,
- &ibuf_pessimistic_insert_mutex, nullptr);
-
- mysql_mutex_lock(&ibuf_mutex);
- ibuf_size_update(root);
- mysql_mutex_unlock(&ibuf_mutex);
-
- ibuf.empty = page_is_empty(root);
- mtr.commit();
-
- ibuf.index = dict_mem_index_create(
- dict_table_t::create(
- {C_STRING_WITH_LEN("innodb_change_buffer")},
- fil_system.sys_space, 1, 0, 0, 0),
- "CLUST_IND",
- DICT_CLUSTERED | DICT_IBUF, 1);
- ibuf.index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
- ibuf.index->n_uniq = REC_MAX_N_FIELDS;
- ibuf.index->lock.SRW_LOCK_INIT(index_tree_rw_lock_key);
-#ifdef BTR_CUR_ADAPT
- ibuf.index->search_info = btr_search_info_create(ibuf.index->heap);
-#endif /* BTR_CUR_ADAPT */
- ibuf.index->page = FSP_IBUF_TREE_ROOT_PAGE_NO;
- ut_d(ibuf.index->cached = TRUE);
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- if (!ibuf_dump) {
- return DB_SUCCESS;
- }
- ib::info() << "Dumping the change buffer";
- ibuf_mtr_start(&mtr);
- btr_pcur_t pcur;
- if (DB_SUCCESS
- == pcur.open_leaf(true, ibuf.index, BTR_SEARCH_LEAF, &mtr)) {
- while (btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
- rec_print_old(stderr, btr_pcur_get_rec(&pcur));
- }
- }
- ibuf_mtr_commit(&mtr);
- ib::info() << "Dumped the change buffer";
-#endif
-
- return DB_SUCCESS;
-}
-
-/*********************************************************************//**
-Updates the max_size value for ibuf. */
-void
-ibuf_max_size_update(
-/*=================*/
- ulint new_val) /*!< in: new value in terms of
- percentage of the buffer pool size */
-{
- if (UNIV_UNLIKELY(!ibuf.index)) return;
- ulint new_size = ((buf_pool_get_curr_size() >> srv_page_size_shift)
- * new_val) / 100;
- mysql_mutex_lock(&ibuf_mutex);
- ibuf.max_size = new_size;
- mysql_mutex_unlock(&ibuf_mutex);
-}
-
-# ifdef UNIV_DEBUG
-/** Gets the desired bits for a given page from a bitmap page.
-@param[in] page bitmap page
-@param[in] page_id page id whose bits to get
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@param[in,out] mtr mini-transaction holding an x-latch on the
-bitmap page
-@return value of bits */
-# define ibuf_bitmap_page_get_bits(page, page_id, zip_size, bit, mtr) \
- ibuf_bitmap_page_get_bits_low(page, page_id, zip_size, \
- MTR_MEMO_PAGE_X_FIX, mtr, bit)
-# else /* UNIV_DEBUG */
-/** Gets the desired bits for a given page from a bitmap page.
-@param[in] page bitmap page
-@param[in] page_id page id whose bits to get
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@param[in,out] mtr mini-transaction holding an x-latch on the
-bitmap page
-@return value of bits */
-# define ibuf_bitmap_page_get_bits(page, page_id, zip_size, bit, mtr) \
- ibuf_bitmap_page_get_bits_low(page, page_id, zip_size, bit)
-# endif /* UNIV_DEBUG */
-
-/** Gets the desired bits for a given page from a bitmap page.
-@param[in] page bitmap page
-@param[in] page_id page id whose bits to get
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] latch_type MTR_MEMO_PAGE_X_FIX, MTR_MEMO_BUF_FIX, ...
-@param[in,out] mtr mini-transaction holding latch_type on the
-bitmap page
-@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@return value of bits */
-UNIV_INLINE
-ulint
-ibuf_bitmap_page_get_bits_low(
- const page_t* page,
- const page_id_t page_id,
- ulint zip_size,
-#ifdef UNIV_DEBUG
- ulint latch_type,
- mtr_t* mtr,
-#endif /* UNIV_DEBUG */
- ulint bit)
-{
- ulint byte_offset;
- ulint bit_offset;
- ulint map_byte;
- ulint value;
- const ulint size = zip_size ? zip_size : srv_page_size;
-
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(bit < IBUF_BITS_PER_PAGE);
- compile_time_assert(!(IBUF_BITS_PER_PAGE % 2));
- ut_ad(mtr->memo_contains_page_flagged(page, latch_type));
-
- bit_offset = (page_id.page_no() & (size - 1))
- * IBUF_BITS_PER_PAGE + bit;
-
- byte_offset = bit_offset / 8;
- bit_offset = bit_offset % 8;
-
- ut_ad(byte_offset + IBUF_BITMAP < srv_page_size);
-
- map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
-
- value = ut_bit_get_nth(map_byte, bit_offset);
-
- if (bit == IBUF_BITMAP_FREE) {
- ut_ad(bit_offset + 1 < 8);
-
- value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
- }
-
- return(value);
-}
-
-/** Sets the desired bit for a given page in a bitmap page.
-@tparam bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@param[in,out] block bitmap page
-@param[in] page_id page id whose bits to set
-@param[in] physical_size page size
-@param[in] val value to set
-@param[in,out] mtr mtr containing an x-latch to the bitmap page */
-template<ulint bit>
-static void
-ibuf_bitmap_page_set_bits(
- buf_block_t* block,
- const page_id_t page_id,
- ulint physical_size,
- ulint val,
- mtr_t* mtr)
-{
- ulint byte_offset;
- ulint bit_offset;
-
- static_assert(bit < IBUF_BITS_PER_PAGE, "wrong bit");
- compile_time_assert(!(IBUF_BITS_PER_PAGE % 2));
- ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr->is_named_space(page_id.space()));
-
- bit_offset = (page_id.page_no() % physical_size)
- * IBUF_BITS_PER_PAGE + bit;
-
- byte_offset = bit_offset / 8;
- bit_offset = bit_offset % 8;
-
- ut_ad(byte_offset + IBUF_BITMAP < srv_page_size);
-
- byte* map_byte = &block->page.frame[IBUF_BITMAP + byte_offset];
- byte b = *map_byte;
-
- if (bit == IBUF_BITMAP_FREE) {
- ut_ad(bit_offset + 1 < 8);
- ut_ad(val <= 3);
- b &= static_cast<byte>(~(3U << bit_offset));
- b |= static_cast<byte>(((val & 2) >> 1) << bit_offset
- | (val & 1) << (bit_offset + 1));
- } else {
- ut_ad(val <= 1);
- b &= static_cast<byte>(~(1U << bit_offset));
-#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wconversion" /* GCC 5 may need this here */
-#endif
- b |= static_cast<byte>(val << bit_offset);
-#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6
-# pragma GCC diagnostic pop
-#endif
- }
-
- mtr->write<1,mtr_t::MAYBE_NOP>(*block, map_byte, b);
-}
-
-/** Calculates the bitmap page number for a given page number.
-@param[in] page_id page id
-@param[in] size page size
-@return the bitmap page id where the file page is mapped */
-inline page_id_t ibuf_bitmap_page_no_calc(const page_id_t page_id, ulint size)
-{
- if (!size)
- size= srv_page_size;
-
- return page_id_t(page_id.space(), FSP_IBUF_BITMAP_OFFSET
- + uint32_t(page_id.page_no() & ~(size - 1)));
-}
-
-/** Gets the ibuf bitmap page where the bits describing a given file page are
-stored.
-@param[in] page_id page id of the file page
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in,out] mtr mini-transaction
-@return bitmap page where the file page is mapped, that is, the bitmap
-page containing the descriptor bits for the file page; the bitmap page
-is x-latched */
-static
-buf_block_t*
-ibuf_bitmap_get_map_page(
- const page_id_t page_id,
- ulint zip_size,
- mtr_t* mtr)
-{
- return buf_page_get_gen(ibuf_bitmap_page_no_calc(page_id, zip_size),
- zip_size, RW_X_LATCH, nullptr,
- BUF_GET_POSSIBLY_FREED, mtr);
-}
-
-/************************************************************************//**
-Sets the free bits of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-UNIV_INLINE
-void
-ibuf_set_free_bits_low(
-/*===================*/
- const buf_block_t* block, /*!< in: index page; free bits are set if
- the index is non-clustered and page
- level is 0 */
- ulint val, /*!< in: value to set: < 4 */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- ut_ad(mtr->is_named_space(block->page.id().space()));
- if (!page_is_leaf(block->page.frame)) {
- return;
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(val <= ibuf_index_page_calc_free(block));
-#endif /* UNIV_IBUF_DEBUG */
- const page_id_t id(block->page.id());
-
- if (buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(
- id, block->zip_size(), mtr)) {
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_FREE>(
- bitmap_page, id, block->physical_size(),
- val, mtr);
- }
-}
-
-/************************************************************************//**
-Sets the free bit of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-void
-ibuf_set_free_bits_func(
-/*====================*/
- buf_block_t* block, /*!< in: index page of a non-clustered index;
- free bit is reset if page level is 0 */
-#ifdef UNIV_IBUF_DEBUG
- ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
- value which the bits must have before
- setting; this is for debugging */
-#endif /* UNIV_IBUF_DEBUG */
- ulint val) /*!< in: value to set: < 4 */
-{
- if (!page_is_leaf(block->page.frame))
- return;
-
- mtr_t mtr;
- mtr.start();
- const page_id_t id(block->page.id());
- const fil_space_t *space= mtr.set_named_space_id(id.space());
-
- if (buf_block_t *bitmap_page=
- ibuf_bitmap_get_map_page(id, block->zip_size(), &mtr))
- {
- if (space->purpose != FIL_TYPE_TABLESPACE)
- mtr.set_log_mode(MTR_LOG_NO_REDO);
-
-#ifdef UNIV_IBUF_DEBUG
- if (max_val != ULINT_UNDEFINED)
- {
- ulint old_val= ibuf_bitmap_page_get_bits(bitmap_page, id,
- IBUF_BITMAP_FREE, &mtr);
- ut_a(old_val <= max_val);
- }
-
- ut_a(val <= ibuf_index_page_calc_free(block));
-#endif /* UNIV_IBUF_DEBUG */
-
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_FREE>
- (bitmap_page, id, block->physical_size(), val, &mtr);
- }
-
- mtr.commit();
-}
-
-/************************************************************************//**
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to decrement or reset the bits in the bitmap in a mini-transaction
-that is committed before the mini-transaction that affects the free
-space. */
-void
-ibuf_reset_free_bits(
-/*=================*/
- buf_block_t* block) /*!< in: index page; free bits are set to 0
- if the index is a non-clustered
- non-unique, and page level is 0 */
-{
- ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Updates the free bits for an uncompressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-void
-ibuf_update_free_bits_low(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- ulint max_ins_size, /*!< in: value of
- maximum insert size
- with reorganize before
- the latest operation
- performed to the page */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- ulint before;
- ulint after;
-
- ut_a(!is_buf_block_get_page_zip(block));
- ut_ad(mtr->is_named_space(block->page.id().space()));
-
- before = ibuf_index_page_calc_free_bits(srv_page_size,
- max_ins_size);
-
- after = ibuf_index_page_calc_free(block);
-
- /* This approach cannot be used on compressed pages, since the
- computed value of "before" often does not match the current
- state of the bitmap. This is because the free space may
- increase or decrease when a compressed page is reorganized. */
- if (before != after) {
- ibuf_set_free_bits_low(block, after, mtr);
- }
-}
-
-/**********************************************************************//**
-Updates the free bits for a compressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-void
-ibuf_update_free_bits_zip(
-/*======================*/
- buf_block_t* block, /*!< in/out: index page */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- ut_ad(page_is_leaf(block->page.frame));
- ut_ad(block->zip_size());
-
- ulint after = ibuf_index_page_calc_free_zip(block);
-
- if (after == 0) {
- /* We move the page to the front of the buffer pool LRU list:
- the purpose of this is to prevent those pages to which we
- cannot make inserts using the insert buffer from slipping
- out of the buffer pool */
-
- buf_page_make_young(&block->page);
- }
-
- if (buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(
- block->page.id(), block->zip_size(), mtr)) {
-
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_FREE>(
- bitmap_page, block->page.id(),
- block->physical_size(), after, mtr);
- }
-}
-
-/**********************************************************************//**
-Updates the free bits for the two pages to reflect the present state.
-Does this in the mtr given, which means that the latching order rules
-virtually prevent any further operations until mtr is committed.
-NOTE: The free bits in the insert buffer bitmap must never exceed the
-free space on a page. It is safe to set the free bits in the same
-mini-transaction that updated the pages. */
-void
-ibuf_update_free_bits_for_two_pages_low(
-/*====================================*/
- buf_block_t* block1, /*!< in: index page */
- buf_block_t* block2, /*!< in: index page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mtr->is_named_space(block1->page.id().space()));
- ut_ad(block1->page.id().space() == block2->page.id().space());
-
- /* Avoid deadlocks by acquiring multiple bitmap page latches in
- a consistent order (smaller pointer first). */
- if (block1 > block2)
- std::swap(block1, block2);
-
- ibuf_set_free_bits_low(block1, ibuf_index_page_calc_free(block1), mtr);
- ibuf_set_free_bits_low(block2, ibuf_index_page_calc_free(block2), mtr);
-}
-
-/** Returns TRUE if the page is one of the fixed address ibuf pages.
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@return TRUE if a fixed address ibuf i/o page */
-inline bool ibuf_fixed_addr_page(const page_id_t page_id, ulint zip_size)
-{
- return(page_id == page_id_t(IBUF_SPACE_ID, IBUF_TREE_ROOT_PAGE_NO)
- || ibuf_bitmap_page(page_id, zip_size));
-}
-
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==true.
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] x_latch FALSE if relaxed check (avoid latching the
-bitmap page)
-@param[in,out] mtr mtr which will contain an x-latch to the
-bitmap page if the page is not one of the fixed address ibuf pages, or NULL,
-in which case a new transaction is created.
-@return TRUE if level 2 or level 3 page */
-bool
-ibuf_page_low(
- const page_id_t page_id,
- ulint zip_size,
-#ifdef UNIV_DEBUG
- bool x_latch,
-#endif /* UNIV_DEBUG */
- mtr_t* mtr)
-{
- ibool ret;
- mtr_t local_mtr;
-
- ut_ad(!recv_no_ibuf_operations);
- ut_ad(x_latch || mtr == NULL);
-
- if (ibuf_fixed_addr_page(page_id, zip_size)) {
- return(true);
- } else if (page_id.space() != IBUF_SPACE_ID) {
- return(false);
- }
-
- compile_time_assert(IBUF_SPACE_ID == 0);
- ut_ad(fil_system.sys_space->purpose == FIL_TYPE_TABLESPACE);
-
-#ifdef UNIV_DEBUG
- if (!x_latch) {
- mtr_start(&local_mtr);
-
- /* Get the bitmap page without a page latch, so that
- we will not be violating the latching order when
- another bitmap page has already been latched by this
- thread. The page will be buffer-fixed, and thus it
- cannot be removed or relocated while we are looking at
- it. The contents of the page could change, but the
- IBUF_BITMAP_IBUF bit that we are interested in should
- not be modified by any other thread. Nobody should be
- calling ibuf_add_free_page() or ibuf_remove_free_page()
- while the page is linked to the insert buffer b-tree. */
- buf_block_t* block = buf_page_get_gen(
- ibuf_bitmap_page_no_calc(page_id, zip_size),
- zip_size, RW_NO_LATCH, nullptr, BUF_GET, &local_mtr);
-
- ret = block
- && ibuf_bitmap_page_get_bits_low(
- block->page.frame, page_id, zip_size,
- MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF);
-
- mtr_commit(&local_mtr);
- return(ret);
- }
-#endif /* UNIV_DEBUG */
-
- if (mtr == NULL) {
- mtr = &local_mtr;
- mtr_start(mtr);
- }
-
- buf_block_t *block = ibuf_bitmap_get_map_page(page_id, zip_size,
- mtr);
- ret = block
- && ibuf_bitmap_page_get_bits(block->page.frame,
- page_id, zip_size,
- IBUF_BITMAP_IBUF, mtr);
-
- if (mtr == &local_mtr) {
- mtr_commit(mtr);
- }
-
- return(ret);
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(mtr,rec)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(rec)
-#endif /* UNIV_DEBUG */
+/** first user record field */
+constexpr unsigned IBUF_REC_FIELD_USER= 4;
/********************************************************************//**
Returns the page number field of an ibuf record.
@return page number */
-static
-uint32_t
-ibuf_rec_get_page_no_func(
-/*======================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec) /*!< in: ibuf record */
+static uint32_t ibuf_rec_get_page_no(const rec_t *rec)
{
- const byte* field;
- ulint len;
-
- ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
- | MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
-
- ut_a(len == 1);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
-
- ut_a(len == 4);
-
- return(mach_read_from_4(field));
+ return mach_read_from_4(rec + 5);
}
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(mtr,rec)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(rec)
-#endif /* UNIV_DEBUG */
-
/********************************************************************//**
-Returns the space id field of an ibuf record. For < 4.1.x format records
-returns 0.
+Returns the space id field of an ibuf record.
@return space id */
-static
-uint32_t
-ibuf_rec_get_space_func(
-/*====================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec) /*!< in: ibuf record */
-{
- const byte* field;
- ulint len;
-
- ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
- | MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
-
- ut_a(len == 1);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
-
- ut_a(len == 4);
-
- return(mach_read_from_4(field));
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \
- ibuf_rec_get_info_func(mtr,rec,op,comp,info_len,counter)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \
- ibuf_rec_get_info_func(rec,op,comp,info_len,counter)
-#endif
-/****************************************************************//**
-Get various information about an ibuf record in >= 4.1.x format. */
-static
-void
-ibuf_rec_get_info_func(
-/*===================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec, /*!< in: ibuf record */
- ibuf_op_t* op, /*!< out: operation type, or NULL */
- ibool* comp, /*!< out: compact flag, or NULL */
- ulint* info_len, /*!< out: length of info fields at the
- start of the fourth field, or
- NULL */
- ulint* counter) /*!< in: counter value, or NULL */
-{
- const byte* types;
- ulint fields;
- ulint len;
-
- /* Local variables to shadow arguments. */
- ibuf_op_t op_local;
- ibool comp_local;
- ulint info_len_local;
- ulint counter_local;
-
- ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
- | MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- fields = rec_get_n_fields_old(rec);
- ut_a(fields > IBUF_REC_FIELD_USER);
-
- types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
-
- info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
- compile_time_assert(IBUF_REC_INFO_SIZE
- < DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- switch (info_len_local) {
- case 0:
- case 1:
- op_local = IBUF_OP_INSERT;
- comp_local = info_len_local;
- ut_ad(!counter);
- counter_local = ULINT_UNDEFINED;
- break;
-
- case IBUF_REC_INFO_SIZE:
- op_local = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
- comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT;
- counter_local = mach_read_from_2(
- types + IBUF_REC_OFFSET_COUNTER);
- break;
-
- default:
- ut_error;
- }
-
- ut_a(op_local < IBUF_OP_COUNT);
- ut_a((len - info_len_local) ==
- (fields - IBUF_REC_FIELD_USER)
- * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- if (op) {
- *op = op_local;
- }
-
- if (comp) {
- *comp = comp_local;
- }
-
- if (info_len) {
- *info_len = info_len_local;
- }
-
- if (counter) {
- *counter = counter_local;
- }
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(mtr,rec)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(rec)
-#endif
-
-/****************************************************************//**
-Returns the operation type field of an ibuf record.
-@return operation type */
-static
-ibuf_op_t
-ibuf_rec_get_op_type_func(
-/*======================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec) /*!< in: ibuf record */
-{
- ulint len;
-
- ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
- | MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- (void) rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
-
- if (len > 1) {
- /* This is a < 4.1.x format record */
-
- return(IBUF_OP_INSERT);
- } else {
- ibuf_op_t op;
-
- ibuf_rec_get_info(mtr, rec, &op, NULL, NULL, NULL);
-
- return(op);
- }
-}
-
-/****************************************************************//**
-Read the first two bytes from a record's fourth field (counter field in new
-records; something else in older records).
-@return "counter" field, or ULINT_UNDEFINED if for some reason it
-can't be read */
-ulint
-ibuf_rec_get_counter(
-/*=================*/
- const rec_t* rec) /*!< in: ibuf record */
-{
- const byte* ptr;
- ulint len;
-
- if (rec_get_n_fields_old(rec) <= IBUF_REC_FIELD_METADATA) {
-
- return(ULINT_UNDEFINED);
- }
-
- ptr = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
-
- if (len >= 2) {
-
- return(mach_read_from_2(ptr));
- } else {
-
- return(ULINT_UNDEFINED);
- }
-}
-
-
-/**
- Add accumulated operation counts to a permanent array.
- Both arrays must be of size IBUF_OP_COUNT.
-*/
-static void ibuf_add_ops(Atomic_counter<ulint> *out, const ulint *in)
+static uint32_t ibuf_rec_get_space(const rec_t *rec)
{
- for (auto i = 0; i < IBUF_OP_COUNT; i++)
- out[i]+= in[i];
-}
-
-
-/****************************************************************//**
-Print operation counts. The array must be of size IBUF_OP_COUNT. */
-static
-void
-ibuf_print_ops(
-/*===========*/
- const char* op_name,/*!< in: operation name */
- const Atomic_counter<ulint>* ops, /*!< in: operation counts */
- FILE* file) /*!< in: file where to print */
-{
- static const char* op_names[] = {
- "insert",
- "delete mark",
- "delete"
- };
-
- static_assert(array_elements(op_names) == IBUF_OP_COUNT, "");
- fputs(op_name, file);
-
- for (ulint i = 0; i < IBUF_OP_COUNT; i++) {
- fprintf(file, "%s " ULINTPF "%s", op_names[i],
- ulint{ops[i]}, (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
- }
-
- putc('\n', file);
+ return mach_read_from_4(rec);
}
/********************************************************************//**
-Creates a dummy index for inserting a record to a non-clustered index.
-@return dummy index */
-static
-dict_index_t*
-ibuf_dummy_index_create(
-/*====================*/
- ulint n, /*!< in: number of fields */
- ibool comp) /*!< in: TRUE=use compact record format */
-{
- dict_table_t* table;
- dict_index_t* index;
-
- table = dict_table_t::create({C_STRING_WITH_LEN("IBUF_DUMMY")},
- nullptr, n, 0,
- comp ? DICT_TF_COMPACT : 0, 0);
-
- index = dict_mem_index_create(table, "IBUF_DUMMY", 0, n);
-
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- index->cached = TRUE;
- ut_d(index->is_dummy = true);
-
- return(index);
-}
-/********************************************************************//**
Add a column to the dummy index */
static
void
@@ -1287,93 +127,79 @@ ibuf_dummy_index_add_col(
dict_index_add_col(index, index->table,
dict_table_get_nth_col(index->table, i), len);
}
-/********************************************************************//**
-Deallocates a dummy index for inserting a record to a non-clustered index. */
+
+/**********************************************************************//**
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
+storage format. */
static
void
-ibuf_dummy_index_free(
-/*==================*/
- dict_index_t* index) /*!< in, own: dummy index */
+dtype_new_read_for_order_and_null_size(
+/*===================================*/
+ dtype_t* type, /*!< in: type struct */
+ const byte* buf) /*!< in: buffer for stored type order info */
{
- dict_table_t* table = index->table;
-
- dict_mem_index_free(index);
- dict_mem_table_free(table);
-}
+ type->mtype = buf[0] & 63;
+ type->prtype = buf[1];
-#ifdef UNIV_DEBUG
-# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \
- ibuf_build_entry_from_ibuf_rec_func(mtr,ibuf_rec,heap,pindex)
-#else /* UNIV_DEBUG */
-# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \
- ibuf_build_entry_from_ibuf_rec_func(ibuf_rec,heap,pindex)
-#endif
+ if (buf[0] & 128) {
+ type->prtype |= DATA_BINARY_TYPE;
+ }
-/*********************************************************************//**
-Builds the entry used to
+ if (buf[4] & 128) {
+ type->prtype |= DATA_NOT_NULL;
+ }
-1) IBUF_OP_INSERT: insert into a non-clustered index
+ type->len = mach_read_from_2(buf + 2);
-2) IBUF_OP_DELETE_MARK: find the record whose delete-mark flag we need to
- activate
+ uint32_t charset_coll = (mach_read_from_2(buf + 4) & CHAR_COLL_MASK)
+ << 16;
-3) IBUF_OP_DELETE: find the record we need to delete
+ if (dtype_is_string_type(type->mtype)) {
+ type->prtype |= charset_coll << 16;
-when we have the corresponding record in an ibuf index.
+ if (charset_coll == 0) {
+ /* This insert buffer record was inserted before
+ MySQL 4.1.2, and the charset-collation code was not
+ explicitly stored to dtype->prtype at that time. It
+ must be the default charset-collation of this MySQL
+ installation. */
+ type->prtype |= default_charset_info->number << 16;
+ }
+ }
-NOTE that as we copy pointers to fields in ibuf_rec, the caller must
-hold a latch to the ibuf_rec page as long as the entry is used!
+ dtype_set_mblen(type);
+}
-@return own: entry to insert to a non-clustered index */
-static
-dtuple_t*
-ibuf_build_entry_from_ibuf_rec_func(
-/*================================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
- mem_heap_t* heap, /*!< in: heap where built */
- dict_index_t** pindex) /*!< out, own: dummy index that
- describes the entry */
+/** Construct an index entry and an index for applying an operation.
+@param ibuf_rec change buffer record in an X-latched page
+@param not_redundant whether another format than ROW_FORMAT=REDUNDANT is used
+@param n_fields number of index record fields
+@param types type information
+@param heap memory heap
+@param index dummy index metadata
+@return the index entry for applying the operation */
+static dtuple_t *ibuf_entry_build(const rec_t *ibuf_rec, ulint not_redundant,
+ ulint n_fields, const byte *types,
+ mem_heap_t *heap, dict_index_t *&index)
{
dtuple_t* tuple;
dfield_t* field;
- ulint n_fields;
- const byte* types;
const byte* data;
ulint len;
- ulint info_len;
- ulint i;
- ulint comp;
- dict_index_t* index;
-
- ut_ad(mtr->memo_contains_page_flagged(ibuf_rec, MTR_MEMO_PAGE_X_FIX
- | MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
-
- data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
-
- ut_a(len == 1);
- ut_a(*data == 0);
- ut_a(rec_get_n_fields_old(ibuf_rec) > IBUF_REC_FIELD_USER);
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - IBUF_REC_FIELD_USER;
tuple = dtuple_create(heap, n_fields);
- types = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
-
- ibuf_rec_get_info(mtr, ibuf_rec, NULL, &comp, &info_len, NULL);
-
- index = ibuf_dummy_index_create(n_fields, comp);
-
- len -= info_len;
- types += info_len;
-
- ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+ index = dict_mem_index_create(
+ dict_table_t::create({C_STRING_WITH_LEN("")}, nullptr,
+ n_fields, 0,
+ not_redundant ? DICT_TF_COMPACT : 0, 0),
+ "IBUF_DUMMY", 0, n_fields);
+ /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
+ ut_d(index->cached = true);
+ ut_d(index->is_dummy = true);
- for (i = 0; i < n_fields; i++) {
+ for (ulint i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
data = rec_get_nth_field_old(
@@ -1382,8 +208,7 @@ ibuf_build_entry_from_ibuf_rec_func(
dfield_set_data(field, data, len);
dtype_new_read_for_order_and_null_size(
- dfield_get_type(field),
- types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+ dfield_get_type(field), types + i * 6);
ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
}
@@ -1393,2220 +218,82 @@ ibuf_build_entry_from_ibuf_rec_func(
/* Prevent an ut_ad() failure in page_zip_write_rec() by
adding system columns to the dummy table pointed to by the
- dummy secondary index. The insert buffer is only used for
+ dummy secondary index. The change buffer was only used for
secondary indexes, whose records never contain any system
columns, such as DB_TRX_ID. */
ut_d(dict_table_add_system_columns(index->table, index->table->heap));
-
- *pindex = index;
-
return(tuple);
}
-/******************************************************************//**
-Get the data size.
-@return size of fields */
-UNIV_INLINE
-ulint
-ibuf_rec_get_size(
-/*==============*/
- const rec_t* rec, /*!< in: ibuf record */
- const byte* types, /*!< in: fields */
- ulint n_fields, /*!< in: number of fields */
- ulint comp) /*!< in: 0=ROW_FORMAT=REDUNDANT,
- nonzero=ROW_FORMAT=COMPACT */
+/** Removes a page from the free list and frees it to the fsp system.
+@param mtr mini-transaction
+@return error code
+@retval DB_SUCCESS if more work may remain to be done
+@retval DB_SUCCESS_LOCKED_REC if everything was freed */
+ATTRIBUTE_COLD static dberr_t ibuf_remove_free_page(mtr_t &mtr)
{
- ulint i;
- ulint field_offset;
- ulint types_offset;
- ulint size = 0;
-
- field_offset = IBUF_REC_FIELD_USER;
- types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
-
- for (i = 0; i < n_fields; i++) {
- ulint len;
- dtype_t dtype;
-
- rec_get_nth_field_offs_old(rec, i + field_offset, &len);
-
- if (len != UNIV_SQL_NULL) {
- size += len;
- } else {
- dtype_new_read_for_order_and_null_size(&dtype, types);
-
- size += dtype_get_sql_null_size(&dtype, comp);
- }
-
- types += types_offset;
- }
-
- return(size);
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(mtr,rec)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(rec)
-#endif
+ log_free_check();
-/********************************************************************//**
-Returns the space taken by a stored non-clustered index entry if converted to
-an index record.
-@return size of index record in bytes + an upper limit of the space
-taken in the page directory */
-static
-ulint
-ibuf_rec_get_volume_func(
-/*=====================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* ibuf_rec)/*!< in: ibuf record */
-{
- ulint len;
- const byte* data;
- const byte* types;
- ulint n_fields;
- ulint data_size;
- ulint comp;
- ibuf_op_t op;
- ulint info_len;
-
- ut_ad(mtr->memo_contains_page_flagged(ibuf_rec, MTR_MEMO_PAGE_X_FIX
- | MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
-
- data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
- ut_a(len == 1);
- ut_a(*data == 0);
-
- types = rec_get_nth_field_old(
- ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
-
- ibuf_rec_get_info(mtr, ibuf_rec, &op, &comp, &info_len, NULL);
-
- if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
- /* Delete-marking a record doesn't take any
- additional space, and while deleting a record
- actually frees up space, we have to play it safe and
- pretend it takes no additional space (the record
- might not exist, etc.). */
-
- return(0);
- } else if (comp) {
- dtuple_t* entry;
- ulint volume;
- dict_index_t* dummy_index;
- mem_heap_t* heap = mem_heap_create(500);
-
- entry = ibuf_build_entry_from_ibuf_rec(mtr, ibuf_rec,
- heap, &dummy_index);
-
- volume = rec_get_converted_size(dummy_index, entry, 0);
-
- ibuf_dummy_index_free(dummy_index);
- mem_heap_free(heap);
-
- return(volume + page_dir_calc_reserved_space(1));
- }
-
- types += info_len;
- n_fields = rec_get_n_fields_old(ibuf_rec)
- - IBUF_REC_FIELD_USER;
-
- data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, comp);
-
- return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
- + page_dir_calc_reserved_space(1));
-}
-
-/*********************************************************************//**
-Builds the tuple to insert to an ibuf tree when we have an entry for a
-non-clustered index.
-
-NOTE that the original entry must be kept because we copy pointers to
-its fields.
-
-@return own: entry to insert into an ibuf index tree */
-static
-dtuple_t*
-ibuf_entry_build(
-/*=============*/
- ibuf_op_t op, /*!< in: operation type */
- dict_index_t* index, /*!< in: non-clustered index */
- const dtuple_t* entry, /*!< in: entry for a non-clustered index */
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number where entry should
- be inserted */
- ulint counter,/*!< in: counter value;
- ULINT_UNDEFINED=not used */
- mem_heap_t* heap) /*!< in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- const dfield_t* entry_field;
- ulint n_fields;
- byte* buf;
- byte* ti;
- byte* type_info;
- ulint i;
-
- ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT);
- ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF);
- ut_ad(op < IBUF_OP_COUNT);
-
- /* We have to build a tuple with the following fields:
-
- 1-4) These are described at the top of this file.
-
- 5) The rest of the fields are copied from the entry.
-
- All fields in the tuple are ordered like the type binary in our
- insert buffer tree. */
-
- n_fields = dtuple_get_n_fields(entry);
-
- tuple = dtuple_create(heap, n_fields + IBUF_REC_FIELD_USER);
-
- /* 1) Space Id */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- mach_write_to_4(buf, space);
-
- dfield_set_data(field, buf, 4);
-
- /* 2) Marker byte */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 1));
-
- /* We set the marker byte zero */
-
- mach_write_to_1(buf, 0);
-
- dfield_set_data(field, buf, 1);
-
- /* 3) Page number */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- /* 4) Type info, part #1 */
-
- if (counter == ULINT_UNDEFINED) {
- i = dict_table_is_comp(index->table) ? 1 : 0;
- } else {
- ut_ad(counter <= 0xFFFF);
- i = IBUF_REC_INFO_SIZE;
- }
-
- ti = type_info = static_cast<byte*>(
- mem_heap_alloc(
- heap,
- i + n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE));
-
- switch (i) {
- default:
- ut_error;
- break;
- case 1:
- /* set the flag for ROW_FORMAT=COMPACT */
- *ti++ = 0;
- /* fall through */
- case 0:
- /* the old format does not allow delete buffering */
- ut_ad(op == IBUF_OP_INSERT);
- break;
- case IBUF_REC_INFO_SIZE:
- mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter);
-
- ti[IBUF_REC_OFFSET_TYPE] = (byte) op;
- ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table)
- ? IBUF_REC_COMPACT : 0;
- ti += IBUF_REC_INFO_SIZE;
- break;
- }
-
- /* 5+) Fields from the entry */
-
- for (i = 0; i < n_fields; i++) {
- ulint fixed_len;
- const dict_field_t* ifield;
-
- field = dtuple_get_nth_field(tuple, i + IBUF_REC_FIELD_USER);
- entry_field = dtuple_get_nth_field(entry, i);
- dfield_copy(field, entry_field);
-
- ifield = dict_index_get_nth_field(index, i);
- ut_ad(!ifield->descending);
- /* Prefix index columns of fixed-length columns are of
- fixed length. However, in the function call below,
- dfield_get_type(entry_field) contains the fixed length
- of the column in the clustered index. Replace it with
- the fixed length of the secondary index column. */
- fixed_len = ifield->fixed_len;
-
-#ifdef UNIV_DEBUG
- if (fixed_len) {
- /* dict_index_add_col() should guarantee these */
- ut_ad(fixed_len <= (ulint)
- dfield_get_type(entry_field)->len);
- if (ifield->prefix_len) {
- ut_ad(ifield->prefix_len == fixed_len);
- } else {
- ut_ad(fixed_len == (ulint)
- dfield_get_type(entry_field)->len);
- }
- }
-#endif /* UNIV_DEBUG */
-
- dtype_new_store_for_order_and_null_size(
- ti, dfield_get_type(entry_field), fixed_len);
- ti += DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
- }
-
- /* 4) Type info, part #2 */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_METADATA);
-
- dfield_set_data(field, type_info, ulint(ti - type_info));
-
- /* Set all the types in the new tuple binary */
-
- dtuple_set_types_binary(tuple, n_fields + IBUF_REC_FIELD_USER);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Builds a search tuple used to search buffered inserts for an index page.
-This is for >= 4.1.x format records.
-@return own: search tuple */
-static
-dtuple_t*
-ibuf_search_tuple_build(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number */
- mem_heap_t* heap) /*!< in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- byte* buf;
-
- tuple = dtuple_create(heap, IBUF_REC_FIELD_METADATA);
-
- /* Store the space id in tuple */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- mach_write_to_4(buf, space);
-
- dfield_set_data(field, buf, 4);
-
- /* Store the new format record marker byte */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 1));
-
- mach_write_to_1(buf, 0);
-
- dfield_set_data(field, buf, 1);
-
- /* Store the page number in tuple */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- dtuple_set_types_binary(tuple, IBUF_REC_FIELD_METADATA);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Checks if there are enough pages in the free list of the ibuf tree that we
-dare to start a pessimistic insert to the insert buffer.
-@return whether enough free pages in list */
-static inline bool ibuf_data_enough_free_for_insert()
-{
- mysql_mutex_assert_owner(&ibuf_mutex);
-
- /* We want a big margin of free pages, because a B-tree can sometimes
- grow in size also if records are deleted from it, as the node pointers
- can change, and we must make sure that we are able to delete the
- inserts buffered for pages that we read to the buffer pool, without
- any risk of running out of free space in the insert buffer. */
-
- return(ibuf.free_list_len >= (ibuf.size / 2) + 3 * ibuf.height);
-}
-
-/*********************************************************************//**
-Checks if there are enough pages in the free list of the ibuf tree that we
-should remove them and free to the file space management.
-@return TRUE if enough free pages in list */
-UNIV_INLINE
-ibool
-ibuf_data_too_much_free(void)
-/*=========================*/
-{
- mysql_mutex_assert_owner(&ibuf_mutex);
-
- return(ibuf.free_list_len >= 3 + (ibuf.size / 2) + 3 * ibuf.height);
-}
-
-/** Allocate a change buffer page.
-@retval true on success
-@retval false if no space left */
-static bool ibuf_add_free_page()
-{
- mtr_t mtr;
- page_t* header_page;
- buf_block_t* block;
-
- mtr.start();
- /* Acquire the fsp latch before the ibuf header, obeying the latching
- order */
- mtr.x_lock_space(fil_system.sys_space);
- header_page = ibuf_header_page_get(&mtr);
- if (!header_page) {
- mtr.commit();
- return false;
- }
-
- /* Allocate a new page: NOTE that if the page has been a part of a
- non-clustered index which has subsequently been dropped, then the
- page may have buffered inserts in the insert buffer, and these
- should be deleted from there. These get deleted when the page
- allocation creates the page in buffer. Thus the call below may end
- up calling the insert buffer routines and, as we yet have no latches
- to insert buffer tree pages, these routines can run without a risk
- of a deadlock. This is the reason why we created a special ibuf
- header page apart from the ibuf tree. */
-
- dberr_t err;
- block = fseg_alloc_free_page_general(
- header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
- false, &mtr, &mtr, &err);
-
- if (!block) {
- mtr.commit();
- return false;
- }
-
- ut_ad(block->page.lock.not_recursive());
- ibuf_enter(&mtr);
- mysql_mutex_lock(&ibuf_mutex);
-
- mtr.write<2>(*block, block->page.frame + FIL_PAGE_TYPE,
- FIL_PAGE_IBUF_FREE_LIST);
- buf_block_t* ibuf_root = ibuf_tree_root_get(&mtr);
- if (UNIV_UNLIKELY(!ibuf_root)) {
-corrupted:
- /* Do not bother to try to free the allocated block, because
- the change buffer is seriously corrupted already. */
- mysql_mutex_unlock(&ibuf_mutex);
- ibuf_mtr_commit(&mtr);
- return false;
- }
-
- /* Add the page to the free list and update the ibuf size data */
-
- err = flst_add_last(ibuf_root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
- &mtr);
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- goto corrupted;
- }
-
- /* Set the bit indicating that this page is now an ibuf tree page
- (level 2 page) */
-
- const page_id_t page_id(block->page.id());
- buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(page_id, 0, &mtr);
-
- if (UNIV_UNLIKELY(!bitmap_page)) {
- goto corrupted;
- }
-
- ibuf.seg_size++;
- ibuf.free_list_len++;
-
- mysql_mutex_unlock(&ibuf_mutex);
-
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_IBUF>(bitmap_page, page_id,
- srv_page_size, true, &mtr);
- ibuf_mtr_commit(&mtr);
- return true;
-}
-
-/*********************************************************************//**
-Removes a page from the free list and frees it to the fsp system. */
-static void ibuf_remove_free_page()
-{
- mtr_t mtr;
- mtr_t mtr2;
- page_t* header_page;
-
- log_free_check();
-
- mtr_start(&mtr);
- /* Acquire the fsp latch before the ibuf header, obeying the latching
- order */
-
- mtr.x_lock_space(fil_system.sys_space);
- header_page = ibuf_header_page_get(&mtr);
-
- /* Prevent pessimistic inserts to insert buffer trees for a while */
- ibuf_enter(&mtr);
- mysql_mutex_lock(&ibuf_pessimistic_insert_mutex);
- mysql_mutex_lock(&ibuf_mutex);
-
- if (!header_page || !ibuf_data_too_much_free()) {
-early_exit:
- mysql_mutex_unlock(&ibuf_mutex);
- mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
-
- ibuf_mtr_commit(&mtr);
-
- return;
- }
-
- ibuf_mtr_start(&mtr2);
-
- buf_block_t* root = ibuf_tree_root_get(&mtr2);
-
- if (UNIV_UNLIKELY(!root)) {
- ibuf_mtr_commit(&mtr2);
- goto early_exit;
- }
-
- mysql_mutex_unlock(&ibuf_mutex);
-
- const uint32_t page_no = flst_get_last(PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST
- + root->page.frame).page;
-
- /* NOTE that we must release the latch on the ibuf tree root
- because in fseg_free_page we access level 1 pages, and the root
- is a level 2 page. */
-
- ibuf_mtr_commit(&mtr2);
- ibuf_exit(&mtr);
-
- /* Since pessimistic inserts were prevented, we know that the
- page is still in the free list. NOTE that also deletes may take
- pages from the free list, but they take them from the start, and
- the free list was so long that they cannot have taken the last
- page from it. */
-
- compile_time_assert(IBUF_SPACE_ID == 0);
- const page_id_t page_id{IBUF_SPACE_ID, page_no};
- buf_block_t* bitmap_page = nullptr;
- dberr_t err = fseg_free_page(
- header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- fil_system.sys_space, page_no, &mtr);
-
- if (err != DB_SUCCESS) {
- goto func_exit;
- }
-
- ibuf_enter(&mtr);
-
- mysql_mutex_lock(&ibuf_mutex);
-
- root = ibuf_tree_root_get(&mtr, &err);
- if (UNIV_UNLIKELY(!root)) {
- mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
- goto func_exit;
- }
-
- ut_ad(page_no == flst_get_last(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST
- + root->page.frame).page);
-
- /* Remove the page from the free list and update the ibuf size data */
- if (buf_block_t* block =
- buf_page_get_gen(page_id, 0, RW_X_LATCH, nullptr, BUF_GET,
- &mtr, &err)) {
- err = flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- block,
- PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
- &mtr);
- }
-
- mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
+ mtr.start();
- if (err == DB_SUCCESS) {
- ibuf.seg_size--;
- ibuf.free_list_len--;
- bitmap_page = ibuf_bitmap_get_map_page(page_id, 0, &mtr);
- }
+ mtr.x_lock_space(fil_system.sys_space);
+ dberr_t err;
+ buf_block_t* header= buf_page_get_gen(ibuf_header, 0, RW_X_LATCH, nullptr,
+ BUF_GET, &mtr, &err);
+ if (!header)
+ {
func_exit:
- mysql_mutex_unlock(&ibuf_mutex);
-
- if (bitmap_page) {
- /* Set the bit indicating that this page is no more an
- ibuf tree page (level 2 page) */
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_IBUF>(
- bitmap_page, page_id, srv_page_size, false, &mtr);
- }
-
- if (err == DB_SUCCESS) {
- buf_page_free(fil_system.sys_space, page_no, &mtr);
- }
-
- ibuf_mtr_commit(&mtr);
-}
-
-/***********************************************************************//**
-Frees excess pages from the ibuf free list. This function is called when an OS
-thread calls fsp services to allocate a new file segment, or a new page to a
-file segment, and the thread did not own the fsp latch before this call. */
-void
-ibuf_free_excess_pages(void)
-/*========================*/
-{
- if (UNIV_UNLIKELY(!ibuf.index)) return;
- /* Free at most a few pages at a time, so that we do not delay the
- requested service too much */
-
- for (ulint i = 0; i < 4; i++) {
-
- ibool too_much_free;
-
- mysql_mutex_lock(&ibuf_mutex);
- too_much_free = ibuf_data_too_much_free();
- mysql_mutex_unlock(&ibuf_mutex);
-
- if (!too_much_free) {
- return;
- }
-
- ibuf_remove_free_page();
- }
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,pages,n_stored) \
- ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,pages,n_stored)
-#else /* UNIV_DEBUG */
-# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,pages,n_stored) \
- ibuf_get_merge_page_nos_func(contract,rec,ids,pages,n_stored)
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Reads page numbers from a leaf in an ibuf tree.
-@return a lower limit for the combined volume of records which will be
-merged */
-static
-ulint
-ibuf_get_merge_page_nos_func(
-/*=========================*/
- ibool contract,/*!< in: TRUE if this function is called to
- contract the tree, FALSE if this is called
- when a single page becomes full and we look
- if it pays to read also nearby pages */
- const rec_t* rec, /*!< in: insert buffer record */
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction holding rec */
-#endif /* UNIV_DEBUG */
- uint32_t* space_ids,/*!< in/out: space id's of the pages */
- uint32_t* page_nos,/*!< in/out: buffer for at least
- IBUF_MAX_N_PAGES_MERGED many page numbers;
- the page numbers are in an ascending order */
- ulint* n_stored)/*!< out: number of page numbers stored to
- page_nos in this function */
-{
- uint32_t prev_page_no;
- uint32_t prev_space_id;
- uint32_t first_page_no;
- uint32_t first_space_id;
- uint32_t rec_page_no;
- uint32_t rec_space_id;
- ulint sum_volumes;
- ulint volume_for_page;
- ulint rec_volume;
- ulint limit;
- ulint n_pages;
-
- ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
- | MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
-
- *n_stored = 0;
-
- if (page_rec_is_supremum(rec)) {
-
- rec = page_rec_get_prev_const(rec);
- if (UNIV_UNLIKELY(!rec)) {
-corruption:
- ut_ad("corrupted page" == 0);
- return 0;
- }
- }
-
- if (page_rec_is_infimum(rec)) {
- rec = page_rec_get_next_const(rec);
- if (!rec || page_rec_is_supremum(rec)) {
- return 0;
- }
- }
-
- limit = ut_min(IBUF_MAX_N_PAGES_MERGED,
- buf_pool_get_curr_size() / 4);
-
- first_page_no = ibuf_rec_get_page_no(mtr, rec);
- first_space_id = ibuf_rec_get_space(mtr, rec);
- n_pages = 0;
- prev_page_no = 0;
- prev_space_id = 0;
-
- /* Go backwards from the first rec until we reach the border of the
- 'merge area', or the page start or the limit of storeable pages is
- reached */
-
- while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
-
- rec_page_no = ibuf_rec_get_page_no(mtr, rec);
- rec_space_id = ibuf_rec_get_space(mtr, rec);
-
- if (rec_space_id != first_space_id
- || (rec_page_no / IBUF_MERGE_AREA)
- != (first_page_no / IBUF_MERGE_AREA)) {
-
- break;
- }
-
- if (rec_page_no != prev_page_no
- || rec_space_id != prev_space_id) {
- n_pages++;
- }
-
- prev_page_no = rec_page_no;
- prev_space_id = rec_space_id;
-
- if (UNIV_UNLIKELY(!(rec = page_rec_get_prev_const(rec)))) {
- goto corruption;
- }
- }
-
- rec = page_rec_get_next_const(rec);
-
- /* At the loop start there is no prev page; we mark this with a pair
- of space id, page no (0, 0) for which there can never be entries in
- the insert buffer */
-
- prev_page_no = 0;
- prev_space_id = 0;
- sum_volumes = 0;
- volume_for_page = 0;
-
- while (*n_stored < limit && rec) {
- if (page_rec_is_supremum(rec)) {
- /* When no more records available, mark this with
- another 'impossible' pair of space id, page no */
- rec_page_no = 1;
- rec_space_id = 0;
- } else {
- rec_page_no = ibuf_rec_get_page_no(mtr, rec);
- rec_space_id = ibuf_rec_get_space(mtr, rec);
- /* In the system tablespace the smallest
- possible secondary index leaf page number is
- bigger than FSP_DICT_HDR_PAGE_NO (7).
- In all tablespaces, pages 0 and 1 are reserved
- for the allocation bitmap and the change
- buffer bitmap. In file-per-table tablespaces,
- a file segment inode page will be created at
- page 2 and the clustered index tree is created
- at page 3. So for file-per-table tablespaces,
- page 4 is the smallest possible secondary
- index leaf page. CREATE TABLESPACE also initially
- uses pages 2 and 3 for the first created table,
- but that table may be dropped, allowing page 2
- to be reused for a secondary index leaf page.
- To keep this assertion simple, just
- make sure the page is >= 2. */
- ut_ad(rec_page_no >= FSP_FIRST_INODE_PAGE_NO);
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
-#endif
- if ((rec_space_id != prev_space_id
- || rec_page_no != prev_page_no)
- && (prev_space_id != 0 || prev_page_no != 0)) {
-
- if (contract
- || (prev_page_no == first_page_no
- && prev_space_id == first_space_id)
- || (volume_for_page
- > ((IBUF_MERGE_THRESHOLD - 1)
- * 4U << srv_page_size_shift
- / IBUF_PAGE_SIZE_PER_FREE_SPACE)
- / IBUF_MERGE_THRESHOLD)) {
-
- space_ids[*n_stored] = prev_space_id;
- page_nos[*n_stored] = prev_page_no;
-
- (*n_stored)++;
-
- sum_volumes += volume_for_page;
- }
-
- if (rec_space_id != first_space_id
- || rec_page_no / IBUF_MERGE_AREA
- != first_page_no / IBUF_MERGE_AREA) {
-
- break;
- }
-
- volume_for_page = 0;
- }
-
- if (rec_page_no == 1 && rec_space_id == 0) {
- /* Supremum record */
-
- break;
- }
-
- rec_volume = ibuf_rec_get_volume(mtr, rec);
-
- volume_for_page += rec_volume;
-
- prev_page_no = rec_page_no;
- prev_space_id = rec_space_id;
-
- rec = page_rec_get_next_const(rec);
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
-#endif
-#if 0
- fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
- *n_stored, sum_volumes);
-#endif
- return(sum_volumes);
-}
-
-/*******************************************************************//**
-Get the matching records for space id.
-@return current rec or NULL */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-const rec_t*
-ibuf_get_user_rec(
-/*===============*/
- btr_pcur_t* pcur, /*!< in: the current cursor */
- mtr_t* mtr) /*!< in: mini transaction */
-{
- do {
- const rec_t* rec = btr_pcur_get_rec(pcur);
-
- if (page_rec_is_user_rec(rec)) {
- return(rec);
- }
- } while (btr_pcur_move_to_next(pcur, mtr));
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Reads page numbers for a space id from an ibuf tree.
-@return a lower limit for the combined volume of records which will be
-merged */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ulint
-ibuf_get_merge_pages(
-/*=================*/
- btr_pcur_t* pcur, /*!< in/out: cursor */
- uint32_t space, /*!< in: space for which to merge */
- ulint limit, /*!< in: max page numbers to read */
- uint32_t* pages, /*!< out: pages read */
- uint32_t* spaces, /*!< out: spaces read */
- ulint* n_pages,/*!< out: number of pages read */
- mtr_t* mtr) /*!< in: mini transaction */
-{
- const rec_t* rec;
- ulint volume = 0;
-
- *n_pages = 0;
-
- while ((rec = ibuf_get_user_rec(pcur, mtr)) != 0
- && ibuf_rec_get_space(mtr, rec) == space
- && *n_pages < limit) {
-
- uint32_t page_no = ibuf_rec_get_page_no(mtr, rec);
-
- if (*n_pages == 0 || pages[*n_pages - 1] != page_no) {
- spaces[*n_pages] = space;
- pages[*n_pages] = page_no;
- ++*n_pages;
- }
-
- volume += ibuf_rec_get_volume(mtr, rec);
-
- btr_pcur_move_to_next(pcur, mtr);
- }
-
- return(volume);
-}
-
-/**
-Delete a change buffer record.
-@param[in] page_id page identifier
-@param[in,out] pcur persistent cursor positioned on the record
-@param[in] search_tuple search key for (space,page_no)
-@param[in,out] mtr mini-transaction
-@return whether mtr was committed (due to pessimistic operation) */
-static MY_ATTRIBUTE((warn_unused_result, nonnull))
-bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur,
- const dtuple_t* search_tuple, mtr_t* mtr);
-
-/** Delete the change buffer records for the given page id
-@param page_id page identifier */
-static void ibuf_delete_recs(const page_id_t page_id)
-{
- if (!ibuf.index || srv_read_only_mode)
- return;
- dfield_t dfield[IBUF_REC_FIELD_METADATA];
- dtuple_t tuple {0,IBUF_REC_FIELD_METADATA,IBUF_REC_FIELD_METADATA,
- dfield,0,nullptr
-#ifdef UNIV_DEBUG
- ,DATA_TUPLE_MAGIC_N
-#endif
- };
- byte space_id[4], page_no[4];
-
- mach_write_to_4(space_id, page_id.space());
- mach_write_to_4(page_no, page_id.page_no());
+ mtr.commit();
+ return err;
+ }
- dfield_set_data(&dfield[0], space_id, 4);
- dfield_set_data(&dfield[1], field_ref_zero, 1);
- dfield_set_data(&dfield[2], page_no, 4);
- dtuple_set_types_binary(&tuple, IBUF_REC_FIELD_METADATA);
+ buf_block_t *root= buf_page_get_gen(ibuf_root, 0, RW_X_LATCH,
+ nullptr, BUF_GET, &mtr, &err);
- mtr_t mtr;
-loop:
- btr_pcur_t pcur;
- pcur.btr_cur.page_cur.index= ibuf.index;
- ibuf_mtr_start(&mtr);
- if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr))
+ if (UNIV_UNLIKELY(!root))
goto func_exit;
- if (!btr_pcur_is_on_user_rec(&pcur))
- {
- ut_ad(btr_pcur_is_after_last_on_page(&pcur));
- goto func_exit;
- }
- for (;;)
+ const uint32_t page_no= flst_get_last(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST +
+ root->page.frame).page;
+ if (page_no == FIL_NULL)
{
- ut_ad(btr_pcur_is_on_user_rec(&pcur));
- const rec_t* ibuf_rec = btr_pcur_get_rec(&pcur);
- if (ibuf_rec_get_space(&mtr, ibuf_rec) != page_id.space()
- || ibuf_rec_get_page_no(&mtr, ibuf_rec) != page_id.page_no())
- break;
- /* Delete the record from ibuf */
- if (ibuf_delete_rec(page_id, &pcur, &tuple, &mtr))
- {
- /* Deletion was pessimistic and mtr was committed:
- we start from the beginning again */
- ut_ad(mtr.has_committed());
- goto loop;
- }
-
- if (btr_pcur_is_after_last_on_page(&pcur))
- {
- ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
- goto loop;
- }
+ mtr.set_modified(*root);
+ fsp_init_file_page(fil_system.sys_space, root, &mtr);
+ err= DB_SUCCESS_LOCKED_REC;
+ goto func_exit;
}
-func_exit:
- ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-}
-
-/** Merge the change buffer to some pages. */
-static void ibuf_read_merge_pages(const uint32_t* space_ids,
- const uint32_t* page_nos, ulint n_stored)
-{
- for (ulint i = 0; i < n_stored; i++) {
- const uint32_t space_id = space_ids[i];
- fil_space_t* s = fil_space_t::get(space_id);
- if (!s) {
-tablespace_deleted:
- /* The tablespace was not found: remove all
- entries for it */
- ibuf_delete_for_discarded_space(space_id);
- while (i + 1 < n_stored
- && space_ids[i + 1] == space_id) {
- i++;
- }
- continue;
- }
-
- const ulint zip_size = s->zip_size(), size = s->size;
- s->release();
- mtr_t mtr;
-
- if (UNIV_LIKELY(page_nos[i] < size)) {
- mtr.start();
- dberr_t err;
- buf_block_t *block =
- buf_page_get_gen(page_id_t(space_id, page_nos[i]),
- zip_size, RW_X_LATCH, nullptr,
- BUF_GET_POSSIBLY_FREED,
- &mtr, &err, true);
- bool remove = !block
- || fil_page_get_type(block->page.frame)
- != FIL_PAGE_INDEX
- || !page_is_leaf(block->page.frame);
- mtr.commit();
- if (err == DB_TABLESPACE_DELETED) {
- goto tablespace_deleted;
- }
- if (!remove) {
- continue;
- }
- }
-
- if (srv_shutdown_state == SRV_SHUTDOWN_NONE
- || srv_fast_shutdown) {
- continue;
- }
-
- /* The following code works around a hang when the
- change buffer is corrupted, likely due to the
- failure of ibuf_merge_or_delete_for_page() to
- invoke ibuf_delete_recs() if (!bitmap_bits).
-
- It also introduced corruption by itself in the
- following scenario:
-
- (1) We merged buffered changes in buf_page_get_gen()
- (2) We committed the mini-transaction
- (3) Redo log and the page with the merged changes is written
- (4) A write completion callback thread evicts the page.
- (5) Other threads buffer changes for that page.
- (6) We will wrongly discard those newly buffered changes below.
-
- To prevent this scenario, we will only invoke this code
- on shutdown. A call to ibuf_max_size_update(0) will cause
- ibuf_insert_low() to refuse to insert anything into the
- change buffer. */
-
- /* Prevent an infinite loop, by removing entries from
- the change buffer in the case the bitmap bits were
- wrongly clear even though buffered changes exist. */
- ibuf_delete_recs(page_id_t(space_ids[i], page_nos[i]));
- }
-}
-
-/** Contract the change buffer by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read
-@retval 0 if ibuf.empty */
-ulint ibuf_contract()
-{
- if (UNIV_UNLIKELY(!ibuf.index)) return 0;
- mtr_t mtr;
- btr_cur_t cur;
- ulint sum_sizes;
- uint32_t page_nos[IBUF_MAX_N_PAGES_MERGED];
- uint32_t space_ids[IBUF_MAX_N_PAGES_MERGED];
-
- ibuf_mtr_start(&mtr);
-
- if (cur.open_leaf(true, ibuf.index, BTR_SEARCH_LEAF, &mtr) !=
- DB_SUCCESS) {
- return 0;
- }
-
- ut_ad(page_validate(btr_cur_get_page(&cur), ibuf.index));
-
- if (page_is_empty(btr_cur_get_page(&cur))) {
- /* If a B-tree page is empty, it must be the root page
- and the whole B-tree must be empty. InnoDB does not
- allow empty B-tree pages other than the root. */
- ut_ad(ibuf.empty);
- ut_ad(btr_cur_get_block(&cur)->page.id()
- == page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO));
-
- ibuf_mtr_commit(&mtr);
-
- return(0);
- }
-
- ulint n_pages = 0;
- sum_sizes = ibuf_get_merge_page_nos(TRUE,
- btr_cur_get_rec(&cur), &mtr,
- space_ids,
- page_nos, &n_pages);
- ibuf_mtr_commit(&mtr);
-
- ibuf_read_merge_pages(space_ids, page_nos, n_pages);
-
- return(sum_sizes + 1);
-}
-
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages referring to space_id
-to the buffer pool.
-@returns number of pages merged.*/
-ulint
-ibuf_merge_space(
-/*=============*/
- ulint space) /*!< in: tablespace id to merge */
-{
- if (UNIV_UNLIKELY(!ibuf.index)) return 0;
- mtr_t mtr;
- btr_pcur_t pcur;
-
- dfield_t dfield[IBUF_REC_FIELD_METADATA];
- dtuple_t tuple {0, IBUF_REC_FIELD_METADATA,
- IBUF_REC_FIELD_METADATA,dfield,0,nullptr
-#ifdef UNIV_DEBUG
- , DATA_TUPLE_MAGIC_N
-#endif
- };
- byte space_id[4];
-
- mach_write_to_4(space_id, space);
-
- dfield_set_data(&dfield[0], space_id, 4);
- dfield_set_data(&dfield[1], field_ref_zero, 1);
- dfield_set_data(&dfield[2], field_ref_zero, 4);
-
- dtuple_set_types_binary(&tuple, IBUF_REC_FIELD_METADATA);
- ulint n_pages = 0;
-
- ut_ad(space < SRV_SPACE_ID_UPPER_BOUND);
-
- log_free_check();
- ibuf_mtr_start(&mtr);
-
- /* Position the cursor on the first matching record. */
-
- pcur.btr_cur.page_cur.index = ibuf.index;
- dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF,
- &pcur, &mtr);
- ut_ad(err != DB_SUCCESS || page_validate(btr_pcur_get_page(&pcur),
- ibuf.index));
-
- ulint sum_sizes = 0;
- uint32_t pages[IBUF_MAX_N_PAGES_MERGED];
- uint32_t spaces[IBUF_MAX_N_PAGES_MERGED];
-
- if (err != DB_SUCCESS) {
- } else if (page_is_empty(btr_pcur_get_page(&pcur))) {
- /* If a B-tree page is empty, it must be the root page
- and the whole B-tree must be empty. InnoDB does not
- allow empty B-tree pages other than the root. */
- ut_ad(ibuf.empty);
- ut_ad(btr_pcur_get_block(&pcur)->page.id()
- == page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO));
- } else {
-
- sum_sizes = ibuf_get_merge_pages(
- &pcur, uint32_t(space), IBUF_MAX_N_PAGES_MERGED,
- &pages[0], &spaces[0], &n_pages,
- &mtr);
- ib::info() << "Size of pages merged " << sum_sizes;
- }
-
- ibuf_mtr_commit(&mtr);
-
- if (n_pages > 0) {
- ut_ad(n_pages <= UT_ARR_SIZE(pages));
-
-#ifdef UNIV_DEBUG
- for (ulint i = 0; i < n_pages; ++i) {
- ut_ad(spaces[i] == space);
- }
-#endif /* UNIV_DEBUG */
-
- ibuf_read_merge_pages(spaces, pages, n_pages);
- }
-
- return(n_pages);
-}
-
-/*********************************************************************//**
-Contract insert buffer trees after insert if they are too big. */
-UNIV_INLINE
-void
-ibuf_contract_after_insert(
-/*=======================*/
- ulint entry_size) /*!< in: size of a record which was inserted
- into an ibuf tree */
-{
- /* dirty comparison, to avoid contention on ibuf_mutex */
- if (ibuf.size < ibuf.max_size) {
- return;
- }
-
- /* Contract at least entry_size many bytes */
- ulint sum_sizes = 0;
- ulint size;
-
- do {
- size = ibuf_contract();
- sum_sizes += size;
- } while (size > 0 && sum_sizes < entry_size);
-}
-
-/** Determine if a change buffer record has been encountered already.
-@param rec change buffer record in the MySQL 5.5 format
-@param hash hash table of encountered records
-@param size number of elements in hash
-@retval true if a distinct record
-@retval false if this may be duplicating an earlier record */
-static bool ibuf_get_volume_buffered_hash(const rec_t *rec, ulint *hash,
- ulint size)
-{
- ut_ad(rec_get_n_fields_old(rec) > IBUF_REC_FIELD_USER);
- const ulint start= rec_get_field_start_offs(rec, IBUF_REC_FIELD_USER);
- const ulint len= rec_get_data_size_old(rec) - start;
- const uint32_t fold= my_crc32c(0, rec + start, len);
- hash+= (fold / (CHAR_BIT * sizeof *hash)) % size;
- ulint bitmask= static_cast<ulint>(1) << (fold % (CHAR_BIT * sizeof(*hash)));
-
- if (*hash & bitmask)
- return false;
-
- /* We have not seen this record yet. Remember it. */
- *hash|= bitmask;
- return true;
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \
- ibuf_get_volume_buffered_count_func(mtr,rec,hash,size,n_recs)
-#else /* UNIV_DEBUG */
-# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \
- ibuf_get_volume_buffered_count_func(rec,hash,size,n_recs)
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Update the estimate of the number of records on a page, and
-get the space taken by merging the buffered record to the index page.
-@return size of index record in bytes + an upper limit of the space
-taken in the page directory */
-static
-ulint
-ibuf_get_volume_buffered_count_func(
-/*================================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec, /*!< in: insert buffer record */
- ulint* hash, /*!< in/out: hash array */
- ulint size, /*!< in: number of elements in hash array */
- lint* n_recs) /*!< in/out: estimated number of records
- on the page that rec points to */
-{
- ulint len;
- ibuf_op_t ibuf_op;
- const byte* types;
- ulint n_fields;
-
- ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
- | MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
-
- n_fields = rec_get_n_fields_old(rec);
- ut_ad(n_fields > IBUF_REC_FIELD_USER);
- n_fields -= IBUF_REC_FIELD_USER;
-
- rec_get_nth_field_offs_old(rec, 1, &len);
- /* This function is only invoked when buffering new
- operations. All pre-4.1 records should have been merged
- when the database was started up. */
- ut_a(len == 1);
-
- if (rec_get_deleted_flag(rec, 0)) {
- /* This record has been merged already,
- but apparently the system crashed before
- the change was discarded from the buffer.
- Pretend that the record does not exist. */
- return(0);
- }
-
- types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
-
- switch (UNIV_EXPECT(int(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE),
- IBUF_REC_INFO_SIZE)) {
- default:
- ut_error;
- case 0:
- /* This ROW_TYPE=REDUNDANT record does not include an
- operation counter. Exclude it from the *n_recs,
- because deletes cannot be buffered if there are
- old-style inserts buffered for the page. */
-
- len = ibuf_rec_get_size(rec, types, n_fields, 0);
-
- return(len
- + rec_get_converted_extra_size(len, n_fields, 0)
- + page_dir_calc_reserved_space(1));
- case 1:
- /* This ROW_TYPE=COMPACT record does not include an
- operation counter. Exclude it from the *n_recs,
- because deletes cannot be buffered if there are
- old-style inserts buffered for the page. */
- goto get_volume_comp;
-
- case IBUF_REC_INFO_SIZE:
- ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
- break;
- }
-
- switch (ibuf_op) {
- case IBUF_OP_INSERT:
- /* Inserts can be done by updating a delete-marked record.
- Because delete-mark and insert operations can be pointing to
- the same records, we must not count duplicates. */
- case IBUF_OP_DELETE_MARK:
- /* There must be a record to delete-mark.
- See if this record has been already buffered. */
- if (n_recs && ibuf_get_volume_buffered_hash(rec, hash, size)) {
- (*n_recs)++;
- }
-
- if (ibuf_op == IBUF_OP_DELETE_MARK) {
- /* Setting the delete-mark flag does not
- affect the available space on the page. */
- return(0);
- }
- break;
- case IBUF_OP_DELETE:
- /* A record will be removed from the page. */
- if (n_recs) {
- (*n_recs)--;
- }
- /* While deleting a record actually frees up space,
- we have to play it safe and pretend that it takes no
- additional space (the record might not exist, etc.). */
- return(0);
- default:
- ut_error;
- }
-
- ut_ad(ibuf_op == IBUF_OP_INSERT);
-
-get_volume_comp:
- {
- dtuple_t* entry;
- ulint volume;
- dict_index_t* dummy_index;
- mem_heap_t* heap = mem_heap_create(500);
-
- entry = ibuf_build_entry_from_ibuf_rec(
- mtr, rec, heap, &dummy_index);
- volume = rec_get_converted_size(dummy_index, entry, 0);
+ /* Since pessimistic inserts were prevented, we know that the
+ page is still in the free list. NOTE that also deletes may take
+ pages from the free list, but they take them from the start, and
+ the free list was so long that they cannot have taken the last
+ page from it. */
- ibuf_dummy_index_free(dummy_index);
- mem_heap_free(heap);
+ err= fseg_free_page(header->page.frame + PAGE_DATA, fil_system.sys_space,
+ page_no, &mtr);
- return(volume + page_dir_calc_reserved_space(1));
- }
-}
-
-/*********************************************************************//**
-Gets an upper limit for the combined size of entries buffered in the insert
-buffer for a given page.
-@return upper limit for the volume of buffered inserts for the index
-page, in bytes; srv_page_size, if the entries for the index page span
-several pages in the insert buffer */
-static
-ulint
-ibuf_get_volume_buffered(
-/*=====================*/
- const btr_pcur_t*pcur, /*!< in: pcur positioned at a place in an
- insert buffer tree where we would insert an
- entry for the index page whose number is
- page_no, latch mode has to be BTR_MODIFY_PREV
- or BTR_MODIFY_TREE */
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: page number of an index page */
- lint* n_recs, /*!< in/out: minimum number of records on the
- page after the buffered changes have been
- applied, or NULL to disable the counting */
- mtr_t* mtr) /*!< in: mini-transaction of pcur */
-{
- ulint volume;
- const rec_t* rec;
- const page_t* page;
- const page_t* prev_page;
- const page_t* next_page;
- /* bitmap of buffered recs */
- ulint hash_bitmap[128 / sizeof(ulint)];
-
- ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
- || (pcur->latch_mode == BTR_MODIFY_TREE));
-
- /* Count the volume of inserts earlier in the alphabetical order than
- pcur */
-
- volume = 0;
-
- if (n_recs) {
- memset(hash_bitmap, 0, sizeof hash_bitmap);
- }
-
- rec = btr_pcur_get_rec(pcur);
- page = page_align(rec);
- ut_ad(page_validate(page, ibuf.index));
-
- if (page_rec_is_supremum(rec)
- && UNIV_UNLIKELY(!(rec = page_rec_get_prev_const(rec)))) {
-corruption:
- ut_ad("corrupted page" == 0);
- return srv_page_size;
- }
-
- uint32_t prev_page_no;
-
- for (; !page_rec_is_infimum(rec); ) {
- ut_ad(page_align(rec) == page);
-
- if (page_no != ibuf_rec_get_page_no(mtr, rec)
- || space != ibuf_rec_get_space(mtr, rec)) {
-
- goto count_later;
- }
-
- volume += ibuf_get_volume_buffered_count(
- mtr, rec,
- hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
-
- if (UNIV_UNLIKELY(!(rec = page_rec_get_prev_const(rec)))) {
- goto corruption;
- }
- }
-
- /* Look at the previous page */
-
- prev_page_no = btr_page_get_prev(page);
-
- if (prev_page_no == FIL_NULL) {
-
- goto count_later;
- }
-
- if (buf_block_t* block =
- buf_page_get(page_id_t(IBUF_SPACE_ID, prev_page_no),
- 0, RW_X_LATCH, mtr)) {
- prev_page = buf_block_get_frame(block);
- ut_ad(page_validate(prev_page, ibuf.index));
- } else {
- return srv_page_size;
- }
-
- static_assert(FIL_PAGE_NEXT % 4 == 0, "alignment");
- static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment");
-
- if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_page + FIL_PAGE_NEXT,
- page + FIL_PAGE_OFFSET, 4))) {
- return srv_page_size;
- }
-
- rec = page_rec_get_prev_const(page_get_supremum_rec(prev_page));
-
- if (UNIV_UNLIKELY(!rec)) {
- goto corruption;
- }
-
- for (;;) {
- ut_ad(page_align(rec) == prev_page);
-
- if (page_rec_is_infimum(rec)) {
-
- /* We cannot go to yet a previous page, because we
- do not have the x-latch on it, and cannot acquire one
- because of the latching order: we have to give up */
-
- return(srv_page_size);
- }
-
- if (page_no != ibuf_rec_get_page_no(mtr, rec)
- || space != ibuf_rec_get_space(mtr, rec)) {
-
- goto count_later;
- }
-
- volume += ibuf_get_volume_buffered_count(
- mtr, rec,
- hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
-
- if (UNIV_UNLIKELY(!(rec = page_rec_get_prev_const(rec)))) {
- goto corruption;
- }
- }
-
-count_later:
- rec = btr_pcur_get_rec(pcur);
-
- if (!page_rec_is_supremum(rec)) {
- rec = page_rec_get_next_const(rec);
- }
-
- for (; !page_rec_is_supremum(rec);
- rec = page_rec_get_next_const(rec)) {
- if (UNIV_UNLIKELY(!rec)) {
- return srv_page_size;
- }
- if (page_no != ibuf_rec_get_page_no(mtr, rec)
- || space != ibuf_rec_get_space(mtr, rec)) {
-
- return(volume);
- }
-
- volume += ibuf_get_volume_buffered_count(
- mtr, rec,
- hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
- }
-
- /* Look at the next page */
-
- uint32_t next_page_no = btr_page_get_next(page);
-
- if (next_page_no == FIL_NULL) {
-
- return(volume);
- }
-
- if (buf_block_t* block =
- buf_page_get(page_id_t(IBUF_SPACE_ID, next_page_no),
- 0, RW_X_LATCH, mtr)) {
- next_page = buf_block_get_frame(block);
- ut_ad(page_validate(next_page, ibuf.index));
- } else {
- return srv_page_size;
- }
-
- static_assert(FIL_PAGE_PREV % 4 == 0, "alignment");
- static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment");
-
- if (UNIV_UNLIKELY(memcmp_aligned<4>(next_page + FIL_PAGE_PREV,
- page + FIL_PAGE_OFFSET, 4))) {
- return 0;
- }
-
- rec = page_get_infimum_rec(next_page);
- rec = page_rec_get_next_const(rec);
-
- for (; ; rec = page_rec_get_next_const(rec)) {
- if (!rec || page_rec_is_supremum(rec)) {
- /* We give up */
- return(srv_page_size);
- }
-
- ut_ad(page_align(rec) == next_page);
-
- if (page_no != ibuf_rec_get_page_no(mtr, rec)
- || space != ibuf_rec_get_space(mtr, rec)) {
-
- return(volume);
- }
-
- volume += ibuf_get_volume_buffered_count(
- mtr, rec,
- hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
- }
-}
-
-/*********************************************************************//**
-Reads the biggest tablespace id from the high end of the insert buffer
-tree and updates the counter in fil_system. */
-void
-ibuf_update_max_tablespace_id(void)
-/*===============================*/
-{
- if (UNIV_UNLIKELY(!ibuf.index)) return;
- const rec_t* rec;
- const byte* field;
- ulint len;
- btr_pcur_t pcur;
- mtr_t mtr;
-
- ut_ad(!ibuf.index->table->not_redundant());
-
- ibuf_mtr_start(&mtr);
-
- if (pcur.open_leaf(false, ibuf.index, BTR_SEARCH_LEAF, &mtr)
- != DB_SUCCESS) {
-func_exit:
- ibuf_mtr_commit(&mtr);
- return;
- }
-
- ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf.index));
-
- if (!btr_pcur_move_to_prev(&pcur, &mtr)
- || btr_pcur_is_before_first_on_page(&pcur)) {
- goto func_exit;
- }
-
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
-
- ut_a(len == 4);
-
- const uint32_t max_space_id = mach_read_from_4(field);
-
- ibuf_mtr_commit(&mtr);
-
- /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
-
- fil_set_max_space_id_if_bigger(max_space_id);
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \
- ibuf_get_entry_counter_low_func(mtr,rec,space,page_no)
-#else /* UNIV_DEBUG */
-# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \
- ibuf_get_entry_counter_low_func(rec,space,page_no)
-#endif
-/****************************************************************//**
-Helper function for ibuf_get_entry_counter_func. Checks if rec is for
-(space, page_no), and if so, reads counter value from it and returns
-that + 1.
-@retval ULINT_UNDEFINED if the record does not contain any counter
-@retval 0 if the record is not for (space, page_no)
-@retval 1 + previous counter value, otherwise */
-static
-ulint
-ibuf_get_entry_counter_low_func(
-/*============================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction of rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec, /*!< in: insert buffer record */
- ulint space, /*!< in: space id */
- ulint page_no) /*!< in: page number */
-{
- ulint counter;
- const byte* field;
- ulint len;
-
- ut_ad(ibuf_inside(mtr));
- ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
- | MTR_MEMO_PAGE_S_FIX));
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
-
- ut_a(len == 1);
-
- /* Check the tablespace identifier. */
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
-
- ut_a(len == 4);
-
- if (mach_read_from_4(field) != space) {
-
- return(0);
- }
-
- /* Check the page offset. */
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
- ut_a(len == 4);
-
- if (mach_read_from_4(field) != page_no) {
-
- return(0);
- }
-
- /* Check if the record contains a counter field. */
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
-
- switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
- default:
- ut_error;
- case 0: /* ROW_FORMAT=REDUNDANT */
- case 1: /* ROW_FORMAT=COMPACT */
- return(ULINT_UNDEFINED);
-
- case IBUF_REC_INFO_SIZE:
- counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER);
- ut_a(counter < 0xFFFF);
- return(counter + 1);
- }
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
- ibuf_get_entry_counter_func(space,page_no,rec,mtr,exact_leaf)
-#else /* UNIV_DEBUG */
-# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
- ibuf_get_entry_counter_func(space,page_no,rec,exact_leaf)
-#endif /* UNIV_DEBUG */
-
-/****************************************************************//**
-Calculate the counter field for an entry based on the current
-last record in ibuf for (space, page_no).
-@return the counter field, or ULINT_UNDEFINED
-if we should abort this insertion to ibuf */
-static
-ulint
-ibuf_get_entry_counter_func(
-/*========================*/
- ulint space, /*!< in: space id of entry */
- ulint page_no, /*!< in: page number of entry */
- const rec_t* rec, /*!< in: the record preceding the
- insertion point */
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction */
-#endif /* UNIV_DEBUG */
- ibool only_leaf) /*!< in: TRUE if this is the only
- leaf page that can contain entries
- for (space,page_no), that is, there
- was no exact match for (space,page_no)
- in the node pointer */
-{
- ut_ad(ibuf_inside(mtr));
- ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_validate(page_align(rec), ibuf.index));
-
- if (page_rec_is_supremum(rec)) {
- /* This is just for safety. The record should be a
- page infimum or a user record. */
- ut_ad(0);
- return(ULINT_UNDEFINED);
- } else if (!page_rec_is_infimum(rec)) {
- return(ibuf_get_entry_counter_low(mtr, rec, space, page_no));
- } else if (only_leaf || !page_has_prev(page_align(rec))) {
- /* The parent node pointer did not contain the
- searched for (space, page_no), which means that the
- search ended on the correct page regardless of the
- counter value, and since we're at the infimum record,
- there are no existing records. */
- return(0);
- } else {
- /* We used to read the previous page here. It would
- break the latching order, because the caller has
- buffer-fixed an insert buffer bitmap page. */
- return(ULINT_UNDEFINED);
- }
-}
-
-
-/** Translates the ibuf free bits to the free space on a page in bytes.
-@param[in] physical_size page_size
-@param[in] bits value for ibuf bitmap bits
-@return maximum insert size after reorganize for the page */
-inline ulint
-ibuf_index_page_calc_free_from_bits(ulint physical_size, ulint bits)
-{
- ut_ad(bits < 4);
- ut_ad(physical_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
-
- if (bits == 3) {
- bits = 4;
- }
-
- return bits * physical_size / IBUF_PAGE_SIZE_PER_FREE_SPACE;
-}
-
-/** Buffer an operation in the insert/delete buffer, instead of doing it
-directly to the disk page, if this is possible.
-@param[in] mode BTR_MODIFY_PREV or BTR_INSERT_TREE
-@param[in] op operation type
-@param[in] no_counter TRUE=use 5.0.3 format; FALSE=allow delete
-buffering
-@param[in] entry index entry to insert
-@param[in] entry_size rec_get_converted_size(index, entry)
-@param[in,out] index index where to insert; must not be unique
-or clustered
-@param[in] page_id page id where to insert
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in,out] thr query thread
-@return DB_SUCCESS, DB_STRONG_FAIL or other error */
-static TRANSACTIONAL_TARGET MY_ATTRIBUTE((warn_unused_result))
-dberr_t
-ibuf_insert_low(
- btr_latch_mode mode,
- ibuf_op_t op,
- ibool no_counter,
- const dtuple_t* entry,
- ulint entry_size,
- dict_index_t* index,
- const page_id_t page_id,
- ulint zip_size,
- que_thr_t* thr)
-{
- big_rec_t* dummy_big_rec;
- btr_pcur_t pcur;
- btr_cur_t* cursor;
- dtuple_t* ibuf_entry;
- mem_heap_t* offsets_heap = NULL;
- mem_heap_t* heap;
- rec_offs* offsets = NULL;
- ulint buffered;
- lint min_n_recs;
- rec_t* ins_rec;
- buf_block_t* bitmap_page;
- buf_block_t* block = NULL;
- page_t* root;
- dberr_t err;
- ibool do_merge;
- uint32_t space_ids[IBUF_MAX_N_PAGES_MERGED];
- uint32_t page_nos[IBUF_MAX_N_PAGES_MERGED];
- ulint n_stored;
- mtr_t mtr;
- mtr_t bitmap_mtr;
-
- ut_a(!dict_index_is_clust(index));
- ut_ad(!dict_index_is_spatial(index));
- ut_ad(dtuple_check_typed(entry));
- ut_ad(!no_counter || op == IBUF_OP_INSERT);
- ut_ad(page_id.space() == index->table->space_id);
- ut_a(op < IBUF_OP_COUNT);
-
- do_merge = FALSE;
-
- /* Perform dirty comparison of ibuf.max_size and ibuf.size to
- reduce ibuf_mutex contention. This should be OK; at worst we
- are doing some excessive ibuf_contract() or occasionally
- skipping an ibuf_contract(). */
- const ulint max_size = ibuf.max_size;
-
- if (max_size == 0) {
- return(DB_STRONG_FAIL);
- }
-
- if (ibuf.size >= max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
- /* Insert buffer is now too big, contract it but do not try
- to insert */
-
-
-#ifdef UNIV_IBUF_DEBUG
- fputs("Ibuf too big\n", stderr);
-#endif
- ibuf_contract();
-
- return(DB_STRONG_FAIL);
- }
-
- heap = mem_heap_create(1024);
-
- /* Build the entry which contains the space id and the page number
- as the first fields and the type information for other fields, and
- which will be inserted to the insert buffer. Using a counter value
- of 0xFFFF we find the last record for (space, page_no), from which
- we can then read the counter value N and use N + 1 in the record we
- insert. (We patch the ibuf_entry's counter field to the correct
- value just before actually inserting the entry.) */
-
- ibuf_entry = ibuf_entry_build(
- op, index, entry, page_id.space(), page_id.page_no(),
- no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
-
- /* Open a cursor to the insert buffer tree to calculate if we can add
- the new entry to it without exceeding the free space limit for the
- page. */
-
- if (mode == BTR_INSERT_TREE) {
- for (;;) {
- mysql_mutex_lock(&ibuf_pessimistic_insert_mutex);
- mysql_mutex_lock(&ibuf_mutex);
-
- if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
-
- break;
- }
-
- mysql_mutex_unlock(&ibuf_mutex);
- mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
-
- if (!ibuf_add_free_page()) {
-
- mem_heap_free(heap);
- return(DB_STRONG_FAIL);
- }
- }
- }
-
- ibuf_mtr_start(&mtr);
- pcur.btr_cur.page_cur.index = ibuf.index;
-
- err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
- if (err != DB_SUCCESS) {
-func_exit:
- ibuf_mtr_commit(&mtr);
- ut_free(pcur.old_rec_buf);
- mem_heap_free(heap);
-
- if (err == DB_SUCCESS && mode == BTR_INSERT_TREE) {
- ibuf_contract_after_insert(entry_size);
- }
-
- if (do_merge) {
-#ifdef UNIV_IBUF_DEBUG
- ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
-#endif
- ibuf_read_merge_pages(space_ids, page_nos, n_stored);
- }
- return err;
- }
-
- ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf.index));
-
- /* Find out the volume of already buffered inserts for the same index
- page */
- min_n_recs = 0;
- buffered = ibuf_get_volume_buffered(&pcur,
- page_id.space(),
- page_id.page_no(),
- op == IBUF_OP_DELETE
- ? &min_n_recs
- : NULL, &mtr);
-
- const ulint physical_size = zip_size ? zip_size : srv_page_size;
-
- if (op == IBUF_OP_DELETE
- && (min_n_recs < 2 || buf_pool.watch_occurred(page_id))) {
- /* The page could become empty after the record is
- deleted, or the page has been read in to the buffer
- pool. Refuse to buffer the operation. */
-
- /* The buffer pool watch is needed for IBUF_OP_DELETE
- because of latching order considerations. We can
- check buf_pool_watch_occurred() only after latching
- the insert buffer B-tree pages that contain buffered
- changes for the page. We never buffer IBUF_OP_DELETE,
- unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have
- been previously buffered for the page. Because there
- are buffered operations for the page, the insert
- buffer B-tree page latches held by mtr will guarantee
- that no changes for the user page will be merged
- before mtr_commit(&mtr). We must not mtr_commit(&mtr)
- until after the IBUF_OP_DELETE has been buffered. */
-
-fail_exit:
- if (mode == BTR_INSERT_TREE) {
- mysql_mutex_unlock(&ibuf_mutex);
- mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
- }
-
- err = DB_STRONG_FAIL;
- goto func_exit;
- }
-
- /* After this point, the page could still be loaded to the
- buffer pool, but we do not have to care about it, since we are
- holding a latch on the insert buffer leaf page that contains
- buffered changes for (space, page_no). If the page enters the
- buffer pool, buf_page_t::read_complete() for (space, page_no) will
- have to acquire a latch on the same insert buffer leaf page,
- which it cannot do until we have buffered the IBUF_OP_DELETE
- and done mtr_commit(&mtr) to release the latch. */
-
- ibuf_mtr_start(&bitmap_mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(page_id, zip_size, &bitmap_mtr);
-
- /* We check if the index page is suitable for buffered entries */
-
- if (!bitmap_page || buf_pool.page_hash_contains(
- page_id, buf_pool.page_hash.cell_get(page_id.fold()))) {
-commit_exit:
- ibuf_mtr_commit(&bitmap_mtr);
- goto fail_exit;
- } else if (!lock_sys.rd_lock_try()) {
- goto commit_exit;
- } else {
- hash_cell_t* cell = lock_sys.rec_hash.cell_get(page_id.fold());
- lock_sys.rec_hash.latch(cell)->acquire();
- const lock_t* lock = lock_sys_t::get_first(*cell, page_id);
- lock_sys.rec_hash.latch(cell)->release();
- lock_sys.rd_unlock();
- if (lock) {
- goto commit_exit;
- }
- }
-
- if (op == IBUF_OP_INSERT) {
- ulint bits = ibuf_bitmap_page_get_bits(
- bitmap_page->page.frame, page_id, physical_size,
- IBUF_BITMAP_FREE, &bitmap_mtr);
-
- if (buffered + entry_size + page_dir_calc_reserved_space(1)
- > ibuf_index_page_calc_free_from_bits(physical_size,
- bits)) {
- /* Release the bitmap page latch early. */
- ibuf_mtr_commit(&bitmap_mtr);
-
- /* It may not fit */
- do_merge = TRUE;
-
- ibuf_get_merge_page_nos(FALSE,
- btr_pcur_get_rec(&pcur), &mtr,
- space_ids,
- page_nos, &n_stored);
-
- goto fail_exit;
- }
- }
-
- if (!no_counter) {
- /* Patch correct counter value to the entry to
- insert. This can change the insert position, which can
- result in the need to abort in some cases. */
- ulint counter = ibuf_get_entry_counter(
- page_id.space(), page_id.page_no(),
- btr_pcur_get_rec(&pcur), &mtr,
- btr_pcur_get_btr_cur(&pcur)->low_match
- < IBUF_REC_FIELD_METADATA);
- dfield_t* field;
-
- if (counter == ULINT_UNDEFINED) {
- goto commit_exit;
- }
-
- field = dtuple_get_nth_field(
- ibuf_entry, IBUF_REC_FIELD_METADATA);
- mach_write_to_2(
- (byte*) dfield_get_data(field)
- + IBUF_REC_OFFSET_COUNTER, counter);
- }
-
- /* Set the bitmap bit denoting that the insert buffer contains
- buffered entries for this index page, if the bit is not set yet */
- index->set_modified(bitmap_mtr);
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_BUFFERED>(
- bitmap_page, page_id, physical_size, true, &bitmap_mtr);
- ibuf_mtr_commit(&bitmap_mtr);
-
- cursor = btr_pcur_get_btr_cur(&pcur);
-
- if (mode == BTR_MODIFY_PREV) {
- err = btr_cur_optimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
- cursor, &offsets, &offsets_heap,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, 0, thr, &mtr);
- block = btr_cur_get_block(cursor);
- ut_ad(block->page.id().space() == IBUF_SPACE_ID);
-
- /* If this is the root page, update ibuf.empty. */
- if (block->page.id().page_no() == FSP_IBUF_TREE_ROOT_PAGE_NO) {
- const page_t* root = buf_block_get_frame(block);
-
- ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
- ut_ad(page_get_page_no(root)
- == FSP_IBUF_TREE_ROOT_PAGE_NO);
-
- ibuf.empty = page_is_empty(root);
- }
- } else {
- ut_ad(mode == BTR_INSERT_TREE);
-
- /* We acquire an sx-latch to the root page before the insert,
- because a pessimistic insert releases the tree x-latch,
- which would cause the sx-latching of the root after that to
- break the latching order. */
- if (buf_block_t* ibuf_root = ibuf_tree_root_get(&mtr)) {
- root = ibuf_root->page.frame;
- } else {
- err = DB_CORRUPTION;
- mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
- mysql_mutex_unlock(&ibuf_mutex);
- goto ibuf_insert_done;
- }
-
- err = btr_cur_optimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
- cursor, &offsets, &offsets_heap,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, 0, thr, &mtr);
-
- if (err == DB_FAIL) {
- err = btr_cur_pessimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
- cursor, &offsets, &offsets_heap,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, 0, thr, &mtr);
- }
-
- mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
- ibuf_size_update(root);
- mysql_mutex_unlock(&ibuf_mutex);
- ibuf.empty = page_is_empty(root);
-
- block = btr_cur_get_block(cursor);
- ut_ad(block->page.id().space() == IBUF_SPACE_ID);
- }
-
-ibuf_insert_done:
- if (offsets_heap) {
- mem_heap_free(offsets_heap);
- }
-
- if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
- /* Update the page max trx id field */
- page_update_max_trx_id(block, NULL,
- thr_get_trx(thr)->id, &mtr);
- }
-
- goto func_exit;
-}
-
-/** Buffer an operation in the change buffer, instead of applying it
-directly to the file page, if this is possible. Does not do it if the index
-is clustered or unique.
-@param[in] op operation type
-@param[in] entry index entry to insert
-@param[in,out] index index where to insert
-@param[in] page_id page id where to insert
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in,out] thr query thread
-@return true if success */
-TRANSACTIONAL_TARGET
-bool
-ibuf_insert(
- ibuf_op_t op,
- const dtuple_t* entry,
- dict_index_t* index,
- const page_id_t page_id,
- ulint zip_size,
- que_thr_t* thr)
-{
- dberr_t err;
- ulint entry_size;
- ibool no_counter;
- /* Read the settable global variable only once in
- this function, so that we will have a consistent view of it. */
- ibuf_use_t use = ibuf_use_t(innodb_change_buffering);
- DBUG_ENTER("ibuf_insert");
-
- DBUG_PRINT("ibuf", ("op: %d, space: " UINT32PF ", page_no: " UINT32PF,
- op, page_id.space(), page_id.page_no()));
-
- ut_ad(dtuple_check_typed(entry));
- ut_ad(page_id.space() != SRV_TMP_SPACE_ID);
- ut_ad(index->is_btree());
- ut_a(!dict_index_is_clust(index));
- ut_ad(!index->table->is_temporary());
-
- no_counter = use <= IBUF_USE_INSERT;
-
- switch (op) {
- case IBUF_OP_INSERT:
- switch (use) {
- case IBUF_USE_NONE:
- case IBUF_USE_DELETE:
- case IBUF_USE_DELETE_MARK:
- DBUG_RETURN(false);
- case IBUF_USE_INSERT:
- case IBUF_USE_INSERT_DELETE_MARK:
- case IBUF_USE_ALL:
- goto check_watch;
- }
- break;
- case IBUF_OP_DELETE_MARK:
- switch (use) {
- case IBUF_USE_NONE:
- case IBUF_USE_INSERT:
- DBUG_RETURN(false);
- case IBUF_USE_DELETE_MARK:
- case IBUF_USE_DELETE:
- case IBUF_USE_INSERT_DELETE_MARK:
- case IBUF_USE_ALL:
- ut_ad(!no_counter);
- goto check_watch;
- }
- break;
- case IBUF_OP_DELETE:
- switch (use) {
- case IBUF_USE_NONE:
- case IBUF_USE_INSERT:
- case IBUF_USE_INSERT_DELETE_MARK:
- DBUG_RETURN(false);
- case IBUF_USE_DELETE_MARK:
- case IBUF_USE_DELETE:
- case IBUF_USE_ALL:
- ut_ad(!no_counter);
- goto skip_watch;
- }
- break;
- case IBUF_OP_COUNT:
- break;
- }
-
- /* unknown op or use */
- ut_error;
-
-check_watch:
- /* If a thread attempts to buffer an insert on a page while a
- purge is in progress on the same page, the purge must not be
- buffered, because it could remove a record that was
- re-inserted later. For simplicity, we block the buffering of
- all operations on a page that has a purge pending.
-
- We do not check this in the IBUF_OP_DELETE case, because that
- would always trigger the buffer pool watch during purge and
- thus prevent the buffering of delete operations. We assume
- that the issuer of IBUF_OP_DELETE has called
- buf_pool_t::watch_set(). */
-
- if (buf_pool.page_hash_contains<true>(
- page_id, buf_pool.page_hash.cell_get(page_id.fold()))) {
- /* A buffer pool watch has been set or the
- page has been read into the buffer pool.
- Do not buffer the request. If a purge operation
- is being buffered, have this request executed
- directly on the page in the buffer pool after the
- buffered entries for this page have been merged. */
- DBUG_RETURN(false);
- }
-
-skip_watch:
- entry_size = rec_get_converted_size(index, entry, 0);
-
- if (entry_size
- >= page_get_free_space_of_empty(dict_table_is_comp(index->table))
- / 2) {
+ if (err != DB_SUCCESS)
+ goto func_exit;
- DBUG_RETURN(false);
- }
+ if (page_no != flst_get_last(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST +
+ root->page.frame).page)
+ {
+ err= DB_CORRUPTION;
+ goto func_exit;
+ }
- err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
- entry, entry_size,
- index, page_id, zip_size, thr);
- if (err == DB_FAIL) {
- err = ibuf_insert_low(BTR_INSERT_TREE,
- op, no_counter, entry, entry_size,
- index, page_id, zip_size, thr);
- }
+ /* Remove the page from the free list and update the ibuf size data */
+ if (buf_block_t *block=
+ buf_page_get_gen(page_id_t{0, page_no}, 0, RW_X_LATCH, nullptr, BUF_GET,
+ &mtr, &err))
+ err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
- ut_a(err == DB_SUCCESS || err == DB_STRONG_FAIL
- || err == DB_TOO_BIG_RECORD);
+ if (err == DB_SUCCESS)
+ buf_page_free(fil_system.sys_space, page_no, &mtr);
- DBUG_RETURN(err == DB_SUCCESS);
+ goto func_exit;
}
MY_ATTRIBUTE((nonnull, warn_unused_result))
@@ -3629,9 +316,7 @@ ibuf_insert_to_index_page_low(
return DB_SUCCESS;
/* Page reorganization or recompression should already have been
- attempted by page_cur_tuple_insert(). Besides, per
- ibuf_index_page_calc_free_zip() the page should not have been
- recompressed or reorganized. */
+ attempted by page_cur_tuple_insert(). */
ut_ad(!is_buf_block_get_page_zip(page_cur->block));
/* If the record did not fit, reorganize */
@@ -3669,19 +354,16 @@ ibuf_insert_to_index_page(
block->page.id().page_no()));
ut_ad(!dict_index_is_online_ddl(index));// this is an ibuf_dummy index
- ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
#ifdef BTR_CUR_HASH_ADAPT
- /* A change buffer merge must occur before users are granted
- any access to the page. No adaptive hash index entries may
- point to a freshly read page. */
+ /* ibuf_cleanup() must finish before the adaptive hash index
+ can be inserted into. */
ut_ad(!block->index);
- assert_block_ahi_empty(block);
#endif /* BTR_CUR_HASH_ADAPT */
ut_ad(mtr->is_named_space(block->page.id().space()));
- if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
- != (ibool)!!page_is_comp(page))) {
+ if (UNIV_UNLIKELY(index->table->not_redundant()
+ != !!page_is_comp(page))) {
return DB_CORRUPTION;
}
@@ -3821,7 +503,6 @@ ibuf_set_del_mark(
page_cur.index = index;
ulint up_match = 0, low_match = 0;
- ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
if (!page_cur_search_with_match(entry, PAGE_CUR_LE,
@@ -3880,7 +561,6 @@ ibuf_delete(
page_cur.index = index;
ulint up_match = 0, low_match = 0;
- ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
ut_ad(!index->is_spatial());
ut_ad(!index->is_clust());
@@ -3889,7 +569,6 @@ ibuf_delete(
&up_match, &low_match, &page_cur,
nullptr)
&& low_match == dtuple_get_n_fields(entry)) {
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
page_t* page = buf_block_get_frame(block);
rec_t* rec = page_cur_get_rec(&page_cur);
@@ -3899,7 +578,6 @@ ibuf_delete(
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs* offsets = offsets_;
mem_heap_t* heap = NULL;
- ulint max_ins_size = 0;
rec_offs_init(offsets_);
@@ -3930,12 +608,8 @@ ibuf_delete(
return;
}
- if (!page_zip) {
- max_ins_size
- = page_get_max_insert_size_after_reorganize(
- page, 1);
- }
#ifdef UNIV_ZIP_DEBUG
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
page_cur_delete_rec(&page_cur, offsets, mtr);
@@ -3943,760 +617,416 @@ ibuf_delete(
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- if (page_zip) {
- ibuf_update_free_bits_zip(block, mtr);
- } else {
- ibuf_update_free_bits_low(block, max_ins_size, mtr);
- }
-
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
}
}
-/*********************************************************************//**
-Restores insert buffer tree cursor position
-@return whether the position was restored */
-static MY_ATTRIBUTE((nonnull))
-bool
-ibuf_restore_pos(
-/*=============*/
- const page_id_t page_id,/*!< in: page identifier */
- const dtuple_t* search_tuple,
- /*!< in: search tuple for entries of page_no */
- btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor whose
- position is to be restored */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+/** Reset the bits in the bitmap page for the given page number.
+@param bitmap change buffer bitmap page
+@param offset page number
+@param mtr mini-transaction */
+static void ibuf_reset(buf_block_t &bitmap, uint32_t offset, mtr_t *mtr)
{
- if (UNIV_LIKELY(pcur->restore_position(mode, mtr) ==
- btr_pcur_t::SAME_ALL)) {
- return true;
- }
-
- if (fil_space_t* s = fil_space_t::get(page_id.space())) {
- ib::error() << "ibuf cursor restoration fails!"
- " ibuf record inserted to page "
- << page_id
- << " in file " << s->chain.start->name;
- s->release();
-
- ib::error() << BUG_REPORT_MSG;
-
- rec_print_old(stderr, btr_pcur_get_rec(pcur));
- rec_print_old(stderr, pcur->old_rec);
- dtuple_print(stderr, search_tuple);
- }
-
- ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
- return false;
+ offset&= uint32_t(bitmap.physical_size() - 1);
+ byte *map_byte= &bitmap.page.frame[PAGE_DATA + offset / 2];
+ /* We must reset IBUF_BITMAP_BUFFERED, but at the same time we will also
+ reset IBUF_BITMAP_FREE (and IBUF_BITMAP_IBUF, which should be clear). */
+ byte b= byte(*map_byte & ((offset & 1) ? byte{0xf} : byte{0xf0}));
+ mtr->write<1,mtr_t::MAYBE_NOP>(bitmap, map_byte, b);
}
-/**
-Delete a change buffer record.
-@param[in] page_id page identifier
-@param[in,out] pcur persistent cursor positioned on the record
-@param[in] search_tuple search key for (space,page_no)
-@param[in,out] mtr mini-transaction
-@return whether mtr was committed (due to pessimistic operation) */
-static MY_ATTRIBUTE((warn_unused_result, nonnull))
-bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur,
- const dtuple_t* search_tuple, mtr_t* mtr)
+/** Move to the next change buffer record. */
+ATTRIBUTE_COLD static dberr_t ibuf_move_to_next(btr_cur_t *cur, mtr_t *mtr)
{
- dberr_t err;
-
- ut_ad(ibuf_inside(mtr));
- ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
- ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur))
- == page_id.page_no());
- ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur))
- == page_id.space());
-
- switch (btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur),
- BTR_CREATE_FLAG, mtr)) {
- case DB_FAIL:
- break;
- case DB_SUCCESS:
- if (page_is_empty(btr_pcur_get_page(pcur))) {
- /* If a B-tree page is empty, it must be the root page
- and the whole B-tree must be empty. InnoDB does not
- allow empty B-tree pages other than the root. */
- ut_d(const page_t* root = btr_pcur_get_page(pcur));
-
- ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
- ut_ad(page_get_page_no(root)
- == FSP_IBUF_TREE_ROOT_PAGE_NO);
-
- /* ibuf.empty is protected by the root page latch.
- Before the deletion, it had to be FALSE. */
- ut_ad(!ibuf.empty);
- ibuf.empty = true;
- }
- /* fall through */
- default:
- return(FALSE);
- }
-
- /* We have to resort to a pessimistic delete from ibuf.
- Delete-mark the record so that it will not be applied again,
- in case the server crashes before the pessimistic delete is
- made persistent. */
- btr_rec_set_deleted<true>(btr_pcur_get_block(pcur),
- btr_pcur_get_rec(pcur), mtr);
-
- btr_pcur_store_position(pcur, mtr);
- ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
-
- ibuf_mtr_start(mtr);
- mysql_mutex_lock(&ibuf_mutex);
- mtr_x_lock_index(ibuf.index, mtr);
-
- if (!ibuf_restore_pos(page_id, search_tuple,
- BTR_PURGE_TREE_ALREADY_LATCHED, pcur, mtr)) {
- mysql_mutex_unlock(&ibuf_mutex);
- goto func_exit;
- }
-
- if (buf_block_t* ibuf_root = ibuf_tree_root_get(mtr)) {
- btr_cur_pessimistic_delete(&err, TRUE,
- btr_pcur_get_btr_cur(pcur),
- BTR_CREATE_FLAG, false, mtr);
- ut_a(err == DB_SUCCESS);
-
- ibuf_size_update(ibuf_root->page.frame);
- ibuf.empty = page_is_empty(ibuf_root->page.frame);
- }
-
- mysql_mutex_unlock(&ibuf_mutex);
- ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
-
-func_exit:
- ut_ad(mtr->has_committed());
- btr_pcur_close(pcur);
-
- return(TRUE);
-}
+ if (!page_cur_move_to_next(&cur->page_cur))
+ return DB_CORRUPTION;
+ if (!page_cur_is_after_last(&cur->page_cur))
+ return DB_SUCCESS;
-/** Check whether buffered changes exist for a page.
-@param[in] id page identifier
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@return whether buffered changes exist */
-bool ibuf_page_exists(const page_id_t id, ulint zip_size)
-{
- ut_ad(!fsp_is_system_temporary(id.space()));
+ /* The following is adapted from btr_pcur_move_to_next_page(),
+ but we will not release any latches. */
- const ulint physical_size = zip_size ? zip_size : srv_page_size;
+ const buf_block_t &block= *cur->page_cur.block;
+ const uint32_t next_page_no= btr_page_get_next(block.page.frame);
+ switch (next_page_no) {
+ case 0:
+ case 1:
+ return DB_CORRUPTION;
+ case FIL_NULL:
+ return DB_SUCCESS;
+ }
- if (ibuf_fixed_addr_page(id, physical_size)
- || fsp_descr_page(id, physical_size)) {
- return false;
- }
+ if (UNIV_UNLIKELY(next_page_no == block.page.id().page_no()))
+ return DB_CORRUPTION;
- mtr_t mtr;
- bool bitmap_bits = false;
+ dberr_t err;
+ buf_block_t *next=
+ btr_block_get(*cur->index(), next_page_no, BTR_MODIFY_LEAF, mtr, &err);
+ if (!next)
+ return err;
- ibuf_mtr_start(&mtr);
- if (const buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(
- id, zip_size, &mtr)) {
- bitmap_bits = ibuf_bitmap_page_get_bits(
- bitmap_page->page.frame, id, zip_size,
- IBUF_BITMAP_BUFFERED, &mtr) != 0;
- }
- ibuf_mtr_commit(&mtr);
- return bitmap_bits;
-}
+ if (UNIV_UNLIKELY(memcmp_aligned<4>(next->page.frame + FIL_PAGE_PREV,
+ block.page.frame + FIL_PAGE_OFFSET, 4)))
+ return DB_CORRUPTION;
-/** Reset the bits in the bitmap page for the given block and page id.
-@param b X-latched secondary index page (nullptr to discard changes)
-@param page_id page identifier
-@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param mtr mini-transaction */
-static void ibuf_reset_bitmap(buf_block_t *b, page_id_t page_id,
- ulint zip_size, mtr_t *mtr)
-{
- buf_block_t *bitmap= ibuf_bitmap_get_map_page(page_id, zip_size, mtr);
- if (!bitmap)
- return;
-
- const ulint physical_size = zip_size ? zip_size : srv_page_size;
- /* FIXME: update the bitmap byte only once! */
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_BUFFERED>(bitmap, page_id,
- physical_size, false, mtr);
-
- if (b)
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_FREE>(bitmap, page_id, physical_size,
- ibuf_index_page_calc_free(b),
- mtr);
+ page_cur_set_before_first(next, &cur->page_cur);
+ return page_cur_move_to_next(&cur->page_cur) ? DB_SUCCESS : DB_CORRUPTION;
}
-/** When an index page is read from a disk to the buffer pool, this function
-applies any buffered operations to the page and deletes the entries from the
-insert buffer. If the page is not read, but created in the buffer pool, this
-function deletes its buffered entries from the insert buffer; there can
-exist entries for such a page if the page belonged to an index which
-subsequently was dropped.
-@param block X-latched page to try to apply changes to, or NULL to discard
-@param page_id page identifier
-@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@return error code */
-dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block,
- const page_id_t page_id,
- ulint zip_size)
+/** Apply changes to a block. */
+ATTRIBUTE_COLD
+static dberr_t ibuf_merge(fil_space_t *space, btr_cur_t *cur, mtr_t *mtr)
{
- if (trx_sys_hdr_page(page_id)) {
- return DB_SUCCESS;
- }
-
- ut_ad(!block || page_id == block->page.id());
- ut_ad(!block || block->page.frame);
- ut_ad(!block || !block->page.is_ibuf_exist());
- ut_ad(!block || !block->page.is_reinit());
- ut_ad(!trx_sys_hdr_page(page_id));
- ut_ad(page_id < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0));
-
- const ulint physical_size = zip_size ? zip_size : srv_page_size;
-
- if (ibuf_fixed_addr_page(page_id, physical_size)
- || fsp_descr_page(page_id, physical_size)) {
- return DB_SUCCESS;
- }
-
- btr_pcur_t pcur;
-#ifdef UNIV_IBUF_DEBUG
- ulint volume = 0;
-#endif /* UNIV_IBUF_DEBUG */
- dberr_t err = DB_SUCCESS;
- mtr_t mtr;
-
- fil_space_t* space = fil_space_t::get(page_id.space());
-
- if (UNIV_UNLIKELY(!space)) {
- block = nullptr;
- } else {
- ulint bitmap_bits = 0;
+ if (btr_cur_get_rec(cur)[4])
+ return DB_CORRUPTION;
- ibuf_mtr_start(&mtr);
+ const uint32_t space_id= mach_read_from_4(btr_cur_get_rec(cur));
+ const uint32_t page_no= mach_read_from_4(btr_cur_get_rec(cur) + 5);
- buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(
- page_id, zip_size, &mtr);
+ buf_block_t *block= space && page_no < space->size
+ ? buf_page_get_gen(page_id_t{space_id, page_no}, space->zip_size(),
+ RW_X_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, mtr)
+ : nullptr;
- if (bitmap_page
- && fil_page_get_type(bitmap_page->page.frame)
- != FIL_PAGE_TYPE_ALLOCATED) {
- bitmap_bits = ibuf_bitmap_page_get_bits(
- bitmap_page->page.frame, page_id, zip_size,
- IBUF_BITMAP_BUFFERED, &mtr);
- }
+ buf_block_t *bitmap= block
+ ? buf_page_get_gen(page_id_t(space_id,
+ uint32_t(page_no &
+ ~(block->physical_size() - 1)) + 1),
+ block->zip_size(), RW_X_LATCH, nullptr,
+ BUF_GET_POSSIBLY_FREED, mtr)
+ : nullptr;
- ibuf_mtr_commit(&mtr);
-
- if (bitmap_bits
- && DB_SUCCESS
- == fseg_page_is_allocated(space, page_id.page_no())) {
- ibuf_mtr_start(&mtr);
- mtr.set_named_space(space);
- ibuf_reset_bitmap(block, page_id, zip_size, &mtr);
- ibuf_mtr_commit(&mtr);
- bitmap_bits = 0;
- if (!block
- || btr_page_get_index_id(block->page.frame)
- != DICT_IBUF_ID_MIN + IBUF_SPACE_ID) {
- ibuf_delete_recs(page_id);
- }
- }
+ if (!block);
+ else if (fil_page_get_type(block->page.frame) != FIL_PAGE_INDEX ||
+ !page_is_leaf(block->page.frame) ||
+ DB_SUCCESS == fseg_page_is_allocated(space, page_no))
+ block= nullptr;
- if (!bitmap_bits) {
- /* No changes are buffered for this page. */
- space->release();
- return DB_SUCCESS;
- }
- }
-
- if (!block) {
- } else if (!fil_page_index_page_check(block->page.frame)
- || !page_is_leaf(block->page.frame)) {
- space->set_corrupted();
- err = DB_CORRUPTION;
- block = nullptr;
- } else {
- /* Move the ownership of the x-latch on the page to this OS
- thread, so that we can acquire a second x-latch on it. This
- is needed for the insert operations to the index page to pass
- the debug checks. */
-
- block->page.lock.claim_ownership();
- }
-
- mem_heap_t* heap = mem_heap_create(512);
-
- const dtuple_t* search_tuple = ibuf_search_tuple_build(
- page_id.space(), page_id.page_no(), heap);
-
- /* Counts for merged & discarded operations. */
- ulint mops[IBUF_OP_COUNT];
- ulint dops[IBUF_OP_COUNT];
-
- memset(mops, 0, sizeof(mops));
- memset(dops, 0, sizeof(dops));
- pcur.btr_cur.page_cur.index = ibuf.index;
-
-loop:
- ibuf_mtr_start(&mtr);
-
- /* Position pcur in the insert buffer at the first entry for this
- index page */
- if (btr_pcur_open_on_user_rec(search_tuple,
- BTR_MODIFY_LEAF, &pcur, &mtr)
- != DB_SUCCESS) {
- err = DB_CORRUPTION;
- goto reset_bit;
- }
-
- if (block) {
- block->page.fix();
- block->page.lock.x_lock_recursive();
- mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX);
- }
-
- if (space) {
- mtr.set_named_space(space);
- }
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- ut_ad(btr_pcur_is_after_last_on_page(&pcur));
- goto reset_bit;
- }
-
- for (;;) {
- rec_t* rec;
-
- ut_ad(btr_pcur_is_on_user_rec(&pcur));
+ do
+ {
+ rec_t *rec= cur->page_cur.rec;
+ ulint n_fields= rec_get_n_fields_old(rec);
- rec = btr_pcur_get_rec(&pcur);
+ if (n_fields <= IBUF_REC_FIELD_USER + 1 || rec[4])
+ return DB_CORRUPTION;
- /* Check if the entry is for this index page */
- if (ibuf_rec_get_page_no(&mtr, rec) != page_id.page_no()
- || ibuf_rec_get_space(&mtr, rec) != page_id.space()) {
+ n_fields-= IBUF_REC_FIELD_USER;
- if (block != NULL) {
- page_header_reset_last_insert(block, &mtr);
- }
+ ulint types_len, not_redundant;
- goto reset_bit;
- }
+ if (rec_get_1byte_offs_flag(rec))
+ {
+ if (rec_1_get_field_end_info(rec, 0) != 4 ||
+ rec_1_get_field_end_info(rec, 1) != 5 ||
+ rec_1_get_field_end_info(rec, 2) != 9)
+ return DB_CORRUPTION;
+ types_len= rec_1_get_field_end_info(rec, 3);
+ }
+ else
+ {
+ if (rec_2_get_field_end_info(rec, 0) != 4 ||
+ rec_2_get_field_end_info(rec, 1) != 5 ||
+ rec_2_get_field_end_info(rec, 2) != 9)
+ return DB_CORRUPTION;
+ types_len= rec_2_get_field_end_info(rec, 3);
+ }
- if (err) {
- fputs("InnoDB: Discarding record\n ", stderr);
- rec_print_old(stderr, rec);
- fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
- } else if (block != NULL && !rec_get_deleted_flag(rec, 0)) {
- /* Now we have at pcur a record which should be
- applied on the index page; NOTE that the call below
- copies pointers to fields in rec, and we must
- keep the latch to the rec page until the
- insertion is finished! */
- dtuple_t* entry;
- trx_id_t max_trx_id;
- dict_index_t* dummy_index;
- ibuf_op_t op = ibuf_rec_get_op_type(&mtr, rec);
-
- max_trx_id = page_get_max_trx_id(page_align(rec));
- page_update_max_trx_id(block,
- buf_block_get_page_zip(block),
- max_trx_id, &mtr);
-
- ut_ad(page_validate(page_align(rec), ibuf.index));
-
- entry = ibuf_build_entry_from_ibuf_rec(
- &mtr, rec, heap, &dummy_index);
- ut_ad(!dummy_index->table->space);
- dummy_index->table->space = space;
- dummy_index->table->space_id = space->id;
-
- ut_ad(page_validate(block->page.frame, dummy_index));
-
- switch (op) {
- case IBUF_OP_INSERT:
-#ifdef UNIV_IBUF_DEBUG
- volume += rec_get_converted_size(
- dummy_index, entry, 0);
-
- volume += page_dir_calc_reserved_space(1);
-
- ut_a(volume <= (4U << srv_page_size_shift)
- / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-#endif
- ibuf_insert_to_index_page(
- entry, block, dummy_index, &mtr);
- break;
-
- case IBUF_OP_DELETE_MARK:
- ibuf_set_del_mark(
- entry, block, dummy_index, &mtr);
- break;
-
- case IBUF_OP_DELETE:
- ibuf_delete(entry, block, dummy_index, &mtr);
- /* Because ibuf_delete() will latch an
- insert buffer bitmap page, commit mtr
- before latching any further pages.
- Store and restore the cursor position. */
- ut_ad(rec == btr_pcur_get_rec(&pcur));
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(ibuf_rec_get_page_no(&mtr, rec)
- == page_id.page_no());
- ut_ad(ibuf_rec_get_space(&mtr, rec)
- == page_id.space());
-
- /* Mark the change buffer record processed,
- so that it will not be merged again in case
- the server crashes between the following
- mtr_commit() and the subsequent mtr_commit()
- of deleting the change buffer record. */
- btr_rec_set_deleted<true>(
- btr_pcur_get_block(&pcur),
- btr_pcur_get_rec(&pcur), &mtr);
-
- btr_pcur_store_position(&pcur, &mtr);
- ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr);
-
- ibuf_mtr_start(&mtr);
- mtr.set_named_space(space);
-
- block->page.lock.x_lock_recursive();
- block->fix();
- mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX);
-
- if (!ibuf_restore_pos(page_id, search_tuple,
- BTR_MODIFY_LEAF,
- &pcur, &mtr)) {
-
- ut_ad(mtr.has_committed());
- mops[op]++;
- ibuf_dummy_index_free(dummy_index);
- goto loop;
- }
-
- break;
- default:
- ut_error;
- }
-
- mops[op]++;
-
- ibuf_dummy_index_free(dummy_index);
- } else {
- dops[ibuf_rec_get_op_type(&mtr, rec)]++;
- }
+ if (types_len < 9 || (types_len - 9) / 6 != n_fields)
+ return DB_CORRUPTION;
- /* Delete the record from ibuf */
- if (ibuf_delete_rec(page_id, &pcur, search_tuple, &mtr)) {
- /* Deletion was pessimistic and mtr was committed:
- we start from the beginning again */
+ ibuf_op op= IBUF_OP_INSERT;
+ const ulint info_len= (types_len - 9) % 6;
- ut_ad(mtr.has_committed());
- goto loop;
- } else if (btr_pcur_is_after_last_on_page(&pcur)) {
- ibuf_mtr_commit(&mtr);
- goto loop;
- }
- }
+ switch (info_len) {
+ default:
+ return DB_CORRUPTION;
+ case 0: case 1:
+ not_redundant= info_len;
+ break;
+ case 4:
+ not_redundant= rec[9 + 3];
+ if (rec[9 + 2] > IBUF_OP_DELETE || not_redundant > 1)
+ return DB_CORRUPTION;
+ op= static_cast<ibuf_op>(rec[9 + 2]);
+ }
-reset_bit:
- if (space) {
- ibuf_reset_bitmap(block, page_id, zip_size, &mtr);
- }
+ const byte *const types= rec + 9 + info_len;
- ibuf_mtr_commit(&mtr);
- ut_free(pcur.old_rec_buf);
+ if (ibuf_rec_get_space(rec) != space_id ||
+ ibuf_rec_get_page_no(rec) != page_no)
+ break;
- if (space) {
- space->release();
- }
+ if (!rec_get_deleted_flag(rec, 0))
+ {
+ /* Delete-mark the record so that it will not be applied again if
+ the server is killed before the completion of ibuf_upgrade(). */
+ btr_rec_set_deleted<true>(cur->page_cur.block, rec, mtr);
+
+ if (block)
+ {
+ page_header_reset_last_insert(block, mtr);
+ page_update_max_trx_id(block, buf_block_get_page_zip(block),
+ page_get_max_trx_id(page_align(rec)), mtr);
+ dict_index_t *index;
+ mem_heap_t *heap = mem_heap_create(512);
+ dtuple_t *entry= ibuf_entry_build(rec, not_redundant, n_fields,
+ types, heap, index);
+ dict_table_t *table= index->table;
+ ut_ad(!table->space);
+ table->space= space;
+ table->space_id= space_id;
+
+ switch (op) {
+ case IBUF_OP_INSERT:
+ ibuf_insert_to_index_page(entry, block, index, mtr);
+ break;
+ case IBUF_OP_DELETE_MARK:
+ ibuf_set_del_mark(entry, block, index, mtr);
+ break;
+ case IBUF_OP_DELETE:
+ ibuf_delete(entry, block, index, mtr);
+ break;
+ }
+
+ mem_heap_free(heap);
+ dict_mem_index_free(index);
+ dict_mem_table_free(table);
+ }
+ }
- mem_heap_free(heap);
+ if (dberr_t err= ibuf_move_to_next(cur, mtr))
+ return err;
+ }
+ while (!page_cur_is_after_last(&cur->page_cur));
- ibuf.n_merges++;
- ibuf_add_ops(ibuf.n_merged_ops, mops);
- ibuf_add_ops(ibuf.n_discarded_ops, dops);
+ if (bitmap)
+ ibuf_reset(*bitmap, page_no, mtr);
- return err;
+ return DB_SUCCESS;
}
-/** Delete all change buffer entries for a tablespace,
-in DISCARD TABLESPACE, IMPORT TABLESPACE, or read-ahead.
-@param[in] space missing or to-be-discarded tablespace */
-void ibuf_delete_for_discarded_space(uint32_t space)
+static dberr_t ibuf_open(btr_cur_t *cur, mtr_t *mtr)
{
- if (UNIV_UNLIKELY(!ibuf.index)) return;
-
- btr_pcur_t pcur;
- const rec_t* ibuf_rec;
- mtr_t mtr;
-
- /* Counts for discarded operations. */
- ulint dops[IBUF_OP_COUNT];
-
- dfield_t dfield[IBUF_REC_FIELD_METADATA];
- dtuple_t search_tuple {0,IBUF_REC_FIELD_METADATA,
- IBUF_REC_FIELD_METADATA,dfield,0
- ,nullptr
-#ifdef UNIV_DEBUG
- ,DATA_TUPLE_MAGIC_N
-#endif /* UNIV_DEBUG */
- };
- byte space_id[4];
- mach_write_to_4(space_id, space);
-
- dfield_set_data(&dfield[0], space_id, 4);
- dfield_set_data(&dfield[1], field_ref_zero, 1);
- dfield_set_data(&dfield[2], field_ref_zero, 4);
- dtuple_set_types_binary(&search_tuple, IBUF_REC_FIELD_METADATA);
- /* Use page number 0 to build the search tuple so that we get the
- cursor positioned at the first entry for this space id */
-
- memset(dops, 0, sizeof(dops));
- pcur.btr_cur.page_cur.index = ibuf.index;
-
-loop:
- log_free_check();
- ibuf_mtr_start(&mtr);
-
- /* Position pcur in the insert buffer at the first entry for the
- space */
- if (btr_pcur_open_on_user_rec(&search_tuple,
- BTR_MODIFY_LEAF, &pcur, &mtr)
- != DB_SUCCESS) {
- goto leave_loop;
- }
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- ut_ad(btr_pcur_is_after_last_on_page(&pcur));
- goto leave_loop;
- }
-
- for (;;) {
- ut_ad(btr_pcur_is_on_user_rec(&pcur));
-
- ibuf_rec = btr_pcur_get_rec(&pcur);
-
- /* Check if the entry is for this space */
- if (ibuf_rec_get_space(&mtr, ibuf_rec) != space) {
-
- goto leave_loop;
- }
-
- uint32_t page_no = ibuf_rec_get_page_no(&mtr, ibuf_rec);
+ ut_ad(mtr->get_savepoint() == 1);
- dops[ibuf_rec_get_op_type(&mtr, ibuf_rec)]++;
+ uint32_t page= FSP_IBUF_TREE_ROOT_PAGE_NO;
- /* Delete the record from ibuf */
- if (ibuf_delete_rec(page_id_t(space, page_no),
- &pcur, &search_tuple, &mtr)) {
- /* Deletion was pessimistic and mtr was committed:
- we start from the beginning again */
+ for (ulint height= ULINT_UNDEFINED;;)
+ {
+ dberr_t err;
+ buf_block_t* block= btr_block_get(*cur->index(), page, RW_X_LATCH, mtr,
+ &err);
+ ut_ad(!block == (err != DB_SUCCESS));
- ut_ad(mtr.has_committed());
-clear:
- ut_free(pcur.old_rec_buf);
- goto loop;
- }
+ if (!block)
+ return err;
- if (btr_pcur_is_after_last_on_page(&pcur)) {
- ibuf_mtr_commit(&mtr);
- goto clear;
- }
- }
+ page_cur_set_before_first(block, &cur->page_cur);
+ const uint32_t l= btr_page_get_level(block->page.frame);
-leave_loop:
- ibuf_mtr_commit(&mtr);
- ut_free(pcur.old_rec_buf);
+ if (height == ULINT_UNDEFINED)
+ height= l;
+ else
+ {
+ /* Release the parent page latch. */
+ ut_ad(mtr->get_savepoint() == 3);
+ mtr->rollback_to_savepoint(1, 2);
- ibuf_add_ops(ibuf.n_discarded_ops, dops);
-}
+ if (UNIV_UNLIKELY(height != l))
+ return DB_CORRUPTION;
+ }
-/******************************************************************//**
-Looks if the insert buffer is empty.
-@return true if empty */
-bool
-ibuf_is_empty(void)
-/*===============*/
-{
- mtr_t mtr;
+ if (!height)
+ return ibuf_move_to_next(cur, mtr);
- ibuf_mtr_start(&mtr);
+ height--;
- ut_d(mysql_mutex_lock(&ibuf_mutex));
- const buf_block_t* root = ibuf_tree_root_get(&mtr);
- bool is_empty = root && page_is_empty(root->page.frame);
- ut_ad(!root || is_empty == ibuf.empty);
- ut_d(mysql_mutex_unlock(&ibuf_mutex));
- ibuf_mtr_commit(&mtr);
+ if (!page_cur_move_to_next(&cur->page_cur))
+ return DB_CORRUPTION;
- return(is_empty);
+ const rec_t *ptr= cur->page_cur.rec;
+ const ulint n_fields= rec_get_n_fields_old(ptr);
+ if (n_fields <= IBUF_REC_FIELD_USER)
+ return DB_CORRUPTION;
+ ulint len;
+ ptr+= rec_get_nth_field_offs_old(ptr, n_fields - 1, &len);
+ if (len != 4)
+ return DB_CORRUPTION;
+ page= mach_read_from_4(ptr);
+ }
}
-/******************************************************************//**
-Prints info of ibuf. */
-void
-ibuf_print(
-/*=======*/
- FILE* file) /*!< in: file where to print */
+ATTRIBUTE_COLD dberr_t ibuf_upgrade()
{
- if (UNIV_UNLIKELY(!ibuf.index)) return;
-
- mysql_mutex_lock(&ibuf_mutex);
- if (ibuf.empty)
+ if (srv_read_only_mode)
{
- mysql_mutex_unlock(&ibuf_mutex);
- return;
+ sql_print_error("InnoDB: innodb_read_only_mode prevents an upgrade");
+ return DB_READ_ONLY;
}
- const ulint size= ibuf.size;
- const ulint free_list_len= ibuf.free_list_len;
- const ulint seg_size= ibuf.seg_size;
- mysql_mutex_unlock(&ibuf_mutex);
-
- fprintf(file,
- "-------------\n"
- "INSERT BUFFER\n"
- "-------------\n"
- "size " ULINTPF ", free list len " ULINTPF ","
- " seg size " ULINTPF ", " ULINTPF " merges\n",
- size, free_list_len, seg_size, ulint{ibuf.n_merges});
- ibuf_print_ops("merged operations:\n", ibuf.n_merged_ops, file);
- ibuf_print_ops("discarded operations:\n", ibuf.n_discarded_ops, file);
-}
-
-/** Check the insert buffer bitmaps on IMPORT TABLESPACE.
-@param[in] trx transaction
-@param[in,out] space tablespace being imported
-@return DB_SUCCESS or error code */
-dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
-{
- ut_ad(trx->mysql_thd);
- ut_ad(space->purpose == FIL_TYPE_IMPORT);
+ sql_print_information("InnoDB: Upgrading the change buffer");
- const unsigned zip_size = space->zip_size();
- const unsigned physical_size = space->physical_size();
-
- uint32_t size= std::min(space->free_limit, space->size);
-
- if (size == 0) {
- return(DB_TABLE_NOT_FOUND);
- }
+#ifdef BTR_CUR_HASH_ADAPT
+ const bool ahi= btr_search_enabled;
+ if (ahi)
+ btr_search_disable();
+#endif
- mtr_t mtr;
+ dict_table_t *ibuf_table= dict_table_t::create({C_STRING_WITH_LEN("ibuf")},
+ fil_system.sys_space,
+ 1, 0, 0, 0);
+ dict_index_t *ibuf_index=
+ dict_mem_index_create(ibuf_table, "CLUST_IND", DICT_CLUSTERED, 1);
+ ibuf_index->id= ibuf_index_id;
+ ibuf_index->n_uniq= REC_MAX_N_FIELDS;
+ ibuf_index->lock.SRW_LOCK_INIT(index_tree_rw_lock_key);
+ ibuf_index->page= FSP_IBUF_TREE_ROOT_PAGE_NO;
+ ut_d(ibuf_index->is_dummy= true);
+ ut_d(ibuf_index->cached= true);
+
+ size_t spaces=0, pages= 0;
+ dberr_t err;
+ mtr_t mtr;
+ mtr.start();
+ mtr_x_lock_index(ibuf_index, &mtr);
- /* The two bitmap pages (allocation bitmap and ibuf bitmap) repeat
- every page_size pages. For example if page_size is 16 KiB, then the
- two bitmap pages repeat every 16 KiB * 16384 = 256 MiB. In the loop
- below page_no is measured in number of pages since the beginning of
- the space, as usual. */
+ {
+ btr_cur_t cur;
+ uint32_t prev_space_id= ~0U;
+ fil_space_t *space= nullptr;
+ cur.page_cur.index= ibuf_index;
+ log_free_check();
+ err= ibuf_open(&cur, &mtr);
+
+ while (err == DB_SUCCESS && !page_cur_is_after_last(&cur.page_cur))
+ {
+ const uint32_t space_id= ibuf_rec_get_space(cur.page_cur.rec);
+ if (space_id != prev_space_id)
+ {
+ if (space)
+ space->release();
+ prev_space_id= space_id;
+ space= fil_space_t::get(space_id);
+ if (space)
+ mtr.set_named_space(space);
+ spaces++;
+ }
+ pages++;
+ err= ibuf_merge(space, &cur, &mtr);
+ if (err == DB_SUCCESS)
+ {
+ /* Move to the next user index page. We buffer-fix the current
+ change buffer leaf page to prevent it from being evicted
+ before we have started a new mini-transaction. */
+ cur.page_cur.block->fix();
+ mtr.commit();
+
+ if (recv_sys.report(time(nullptr)))
+ {
+ sql_print_information("InnoDB: merged changes to"
+ " %zu tablespaces, %zu pages", spaces, pages);
+ service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
+ "merged changes to"
+ " %zu tablespaces, %zu pages",
+ spaces, pages);
+ }
+
+ log_free_check();
+ mtr.start();
+ mtr.page_lock(cur.page_cur.block, RW_X_LATCH);
+ if (space)
+ mtr.set_named_space(space);
+ }
+ }
+ mtr.commit();
+ if (space)
+ space->release();
+ }
- for (uint32_t page_no = 0; page_no < size; page_no += physical_size) {
- if (trx_is_interrupted(trx)) {
- return(DB_INTERRUPTED);
- }
+ if (err == DB_SUCCESS)
+ {
+ mtr.start();
+ if (buf_block_t *root= buf_page_get_gen(ibuf_root, 0, RW_X_LATCH,
+ nullptr, BUF_GET, &mtr, &err))
+ {
+ page_create(root, &mtr, false);
+ mtr.write<2,mtr_t::MAYBE_NOP>(*root, PAGE_HEADER + PAGE_LEVEL +
+ root->page.frame, 0U);
+ }
+ mtr.commit();
- mtr_start(&mtr);
+ while (err == DB_SUCCESS)
+ err= ibuf_remove_free_page(mtr);
- buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(
- page_id_t(space->id, page_no), zip_size, &mtr);
- if (!bitmap_page) {
- mtr.commit();
- return DB_CORRUPTION;
- }
+ if (err == DB_SUCCESS_LOCKED_REC)
+ err= DB_SUCCESS;
+ }
- if (buf_is_zeroes(span<const byte>(bitmap_page->page.frame,
- physical_size))) {
- /* This means we got all-zero page instead of
- ibuf bitmap page. The subsequent page should be
- all-zero pages. */
-#ifdef UNIV_DEBUG
- for (uint32_t curr_page = page_no + 1;
- curr_page < physical_size; curr_page++) {
-
- buf_block_t* block = buf_page_get(
- page_id_t(space->id, curr_page),
- zip_size, RW_S_LATCH, &mtr);
- page_t* page = buf_block_get_frame(block);
- ut_ad(buf_is_zeroes(span<const byte>(
- page,
- physical_size)));
- }
-#endif /* UNIV_DEBUG */
- mtr_commit(&mtr);
- continue;
- }
+#ifdef BTR_CUR_HASH_ADAPT
+ if (ahi)
+ btr_search_enable();
+#endif
- for (uint32_t i = FSP_IBUF_BITMAP_OFFSET + 1; i < physical_size;
- i++) {
- const uint32_t offset = page_no + i;
- const page_id_t cur_page_id(space->id, offset);
-
- if (ibuf_bitmap_page_get_bits(
- bitmap_page->page.frame,
- cur_page_id, zip_size,
- IBUF_BITMAP_IBUF, &mtr)) {
-
- mtr_commit(&mtr);
-
- ib_errf(trx->mysql_thd,
- IB_LOG_LEVEL_ERROR,
- ER_INNODB_INDEX_CORRUPT,
- "File %s page %u"
- " is wrongly flagged to belong to the"
- " insert buffer",
- space->chain.start->name, offset);
- return(DB_CORRUPTION);
- }
-
- if (ibuf_bitmap_page_get_bits(
- bitmap_page->page.frame,
- cur_page_id, zip_size,
- IBUF_BITMAP_BUFFERED, &mtr)) {
-
- ib_errf(trx->mysql_thd,
- IB_LOG_LEVEL_WARN,
- ER_INNODB_INDEX_CORRUPT,
- "Buffered changes"
- " for file %s page %u are lost",
- space->chain.start->name, offset);
-
- /* Tolerate this error, so that
- slightly corrupted tables can be
- imported and dumped. Clear the bit. */
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_BUFFERED>(
- bitmap_page, cur_page_id,
- physical_size, false, &mtr);
- }
- }
+ ibuf_index->lock.free();
+ dict_mem_index_free(ibuf_index);
+ dict_mem_table_free(ibuf_table);
- mtr_commit(&mtr);
- }
+ if (err)
+ sql_print_error("InnoDB: Unable to upgrade the change buffer");
+ else
+ sql_print_information("InnoDB: Upgraded the change buffer: "
+ "%zu tablespaces, %zu pages", spaces, pages);
- return(DB_SUCCESS);
+ return err;
}
-void ibuf_set_bitmap_for_bulk_load(buf_block_t *block, mtr_t *mtr, bool reset)
+dberr_t ibuf_upgrade_needed()
{
- ut_a(page_is_leaf(block->page.frame));
- const page_id_t id{block->page.id()};
- const auto zip_size= block->zip_size();
+ mtr_t mtr;
+ mtr.start();
+ mtr.x_lock_space(fil_system.sys_space);
+ dberr_t err;
+ const buf_block_t *header_page=
+ buf_page_get_gen(ibuf_header, 0, RW_S_LATCH, nullptr, BUF_GET, &mtr, &err);
- if (buf_block_t *bitmap_page= ibuf_bitmap_get_map_page(id, zip_size, mtr))
+ if (!header_page)
{
- if (ibuf_bitmap_page_get_bits(bitmap_page->page.frame, id, zip_size,
- IBUF_BITMAP_BUFFERED, mtr))
- ibuf_delete_recs(id);
-
- ulint free_val= reset ? 0 : ibuf_index_page_calc_free(block);
- /* FIXME: update the bitmap byte only once! */
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_FREE>
- (bitmap_page, id, block->physical_size(), free_val, mtr);
- ibuf_bitmap_page_set_bits<IBUF_BITMAP_BUFFERED>
- (bitmap_page, id, block->physical_size(), false, mtr);
+ err_exit:
+ sql_print_error("InnoDB: The change buffer is corrupted");
+ if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO)
+ err= DB_SUCCESS;
+ func_exit:
+ mtr.commit();
+ return err;
+ }
+
+ const buf_block_t *root= buf_page_get_gen(ibuf_root, 0, RW_S_LATCH, nullptr,
+ BUF_GET, &mtr, &err);
+ if (!root)
+ goto err_exit;
+
+ if (UNIV_LIKELY(!page_has_siblings(root->page.frame)) &&
+ UNIV_LIKELY(!memcmp(root->page.frame + FIL_PAGE_TYPE, field_ref_zero,
+ srv_page_size -
+ (FIL_PAGE_DATA_END + FIL_PAGE_TYPE))))
+ /* the change buffer was removed; no need to upgrade */;
+ else if (page_is_comp(root->page.frame) ||
+ btr_page_get_index_id(root->page.frame) != ibuf_index_id ||
+ fil_page_get_type(root->page.frame) != FIL_PAGE_INDEX)
+ {
+ err= DB_CORRUPTION;
+ goto err_exit;
}
+ else if (srv_read_only_mode)
+ {
+ sql_print_error("InnoDB: innodb_read_only=ON prevents an upgrade"
+ " of the change buffer");
+ err= DB_READ_ONLY;
+ }
+ else if (srv_force_recovery != SRV_FORCE_NO_LOG_REDO)
+ err= DB_FAIL;
+
+ goto func_exit;
}
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index a56598d3620..bfcc559cf5f 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -56,12 +56,8 @@ is acceptable for the program to die with a clear assert failure. */
#define BTR_MAX_LEVELS 100
#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
- btr_latch_mode((latch_mode) & ~(BTR_INSERT \
- | BTR_DELETE_MARK \
- | BTR_RTREE_UNDO_INS \
+ btr_latch_mode((latch_mode) & ~(BTR_RTREE_UNDO_INS \
| BTR_RTREE_DELETE_MARK \
- | BTR_DELETE \
- | BTR_IGNORE_SEC_UNIQUE \
| BTR_ALREADY_S_LATCHED \
| BTR_LATCH_FOR_INSERT \
| BTR_LATCH_FOR_DELETE))
@@ -79,6 +75,14 @@ btr_root_adjust_on_import(
const dict_index_t* index) /*!< in: index tree */
MY_ATTRIBUTE((warn_unused_result));
+/** Check a file segment header within a B-tree root page.
+@param offset file segment header offset
+@param block B-tree root page
+@param space tablespace
+@return whether the segment header is valid */
+bool btr_root_fseg_validate(ulint offset, const buf_block_t &block,
+ const fil_space_t &space);
+
/** Report a decryption failure. */
ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index);
@@ -86,12 +90,11 @@ ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index);
@param[in] index index tree
@param[in] page page number
@param[in] mode latch mode
-@param[in] merge whether change buffer merge should be attempted
@param[in,out] mtr mini-transaction
@param[out] err error code
@return block */
buf_block_t *btr_block_get(const dict_index_t &index,
- uint32_t page, ulint mode, bool merge,
+ uint32_t page, ulint mode,
mtr_t *mtr, dberr_t *err= nullptr);
/**************************************************************//**
@@ -241,15 +244,7 @@ btr_root_raise_and_insert(
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
-
+/** Reorganize an index page.
@param cursor page cursor
@param mtr mini-transaction
@return error code
@@ -347,6 +342,7 @@ btr_check_node_ptr(
/*===============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: index page */
+ que_thr_t* thr, /*!< in/out: query thread */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((warn_unused_result));
#endif /* UNIV_DEBUG */
@@ -450,15 +446,8 @@ btr_root_block_get(
or RW_X_LATCH */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err); /*!< out: error code */
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
+/** Reorganize an index page.
@return error code
@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */
dberr_t btr_page_reorganize_block(
@@ -529,9 +518,10 @@ btr_lift_page_up(
must not be empty: use
btr_discard_only_page_on_level if the last
record from the page should be removed */
+ que_thr_t* thr, /*!< in/out: query thread for SPATIAL INDEX */
mtr_t* mtr, /*!< in/out: mini-transaction */
dberr_t* err) /*!< out: error code */
- __attribute__((nonnull));
+ __attribute__((nonnull(1,2,4,5)));
#define BTR_N_LEAF_PAGES 1
#define BTR_TOTAL_SIZE 2
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index f6abc9f5e52..dc64054eb3e 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -56,11 +56,7 @@ enum {
BTR_KEEP_POS_FLAG = 8,
/** the caller is creating the index or wants to bypass the
index->info.online creation log */
- BTR_CREATE_FLAG = 16,
- /** the caller of btr_cur_optimistic_update() or
- btr_cur_update_in_place() will take care of
- updating IBUF_BITMAP_FREE */
- BTR_KEEP_IBUF_BITMAP = 32
+ BTR_CREATE_FLAG = 16
};
#include "que0types.h"
@@ -213,14 +209,8 @@ btr_cur_pessimistic_insert(
See if there is enough place in the page modification log to log
an update-in-place.
-@retval false if out of space; IBUF_BITMAP_FREE will be reset
-outside mtr if the page was recompressed
-@retval true if enough place;
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
-a secondary index leaf page. This has to be done either within the
-same mini-transaction, or by invoking ibuf_reset_free_bits() before
-mtr_commit(mtr). */
+@retval false if out of space
+@retval true if enough place */
bool
btr_cur_update_alloc_zip_func(
/*==========================*/
@@ -262,7 +252,7 @@ Updates a record when the update causes no size changes in its fields.
@return locking or undo log related error code, or
@retval DB_SUCCESS on success
@retval DB_ZIP_OVERFLOW if there is not enough space left
-on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
+on a ROW_FORMAT=COMPRESSED page */
dberr_t
btr_cur_update_in_place(
/*====================*/
@@ -669,28 +659,13 @@ enum btr_cur_method {
reference is stored in the field
hash_node, and might be necessary to
update */
- BTR_CUR_BINARY, /*!< success using the binary search */
- BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to
- the insert buffer */
- BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete
- mark in the insert/delete buffer */
- BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in
- the insert/delete buffer */
- BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */
+ BTR_CUR_BINARY /*!< success using the binary search */
};
/** The tree cursor: the definition appears here only for the compiler
to know struct size! */
struct btr_cur_t {
page_cur_t page_cur; /*!< page cursor */
- purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
- /*------------------------------*/
- que_thr_t* thr; /*!< this field is only used
- when search_leaf()
- is called for an index entry
- insertion: the calling query
- thread is passed here to be
- used in the insert buffer */
/*------------------------------*/
/** The following fields are used in
search_leaf() to pass information: */
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index fc829e7857a..966247ffa00 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -69,7 +69,7 @@ enum btr_latch_mode {
Used in btr_pcur_move_backward_from_page(). */
BTR_SEARCH_PREV = 4 | BTR_SEARCH_LEAF,
/** Modify the previous record.
- Used in btr_pcur_move_backward_from_page() and ibuf_insert(). */
+ Used in btr_pcur_move_backward_from_page(). */
BTR_MODIFY_PREV = 4 | BTR_MODIFY_LEAF,
/** Start modifying the entire B-tree. */
BTR_MODIFY_TREE = 8 | BTR_MODIFY_LEAF,
@@ -77,24 +77,8 @@ enum btr_latch_mode {
Only used by rtr_search_to_nth_level(). */
BTR_CONT_MODIFY_TREE = 4 | BTR_MODIFY_TREE,
- /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually
- exclusive. */
- /** The search tuple will be inserted to the secondary index
- at the searched position. When the leaf page is not in the
- buffer pool, try to use the change buffer. */
- BTR_INSERT = 64,
-
- /** Try to delete mark a secondary index leaf page record at
- the searched position using the change buffer when the page is
- not in the buffer pool. */
- BTR_DELETE_MARK = 128,
-
- /** Try to purge the record using the change buffer when the
- secondary index leaf page is not in the buffer pool. */
- BTR_DELETE = BTR_INSERT | BTR_DELETE_MARK,
-
/** The caller is already holding dict_index_t::lock S-latch. */
- BTR_ALREADY_S_LATCHED = 256,
+ BTR_ALREADY_S_LATCHED = 16,
/** Search and S-latch a leaf page, assuming that the
dict_index_t::lock S-latch is being held. */
BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF
@@ -111,28 +95,15 @@ enum btr_latch_mode {
BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF
| BTR_ALREADY_S_LATCHED,
- /** Attempt to delete-mark a secondary index record. */
- BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK,
- /** Attempt to delete-mark a secondary index record
- while holding the dict_index_t::lock S-latch. */
- BTR_DELETE_MARK_LEAF_ALREADY_S_LATCHED = BTR_DELETE_MARK_LEAF
- | BTR_ALREADY_S_LATCHED,
- /** Attempt to purge a secondary index record. */
- BTR_PURGE_LEAF = BTR_MODIFY_LEAF | BTR_DELETE,
- /** Attempt to purge a secondary index record
- while holding the dict_index_t::lock S-latch. */
- BTR_PURGE_LEAF_ALREADY_S_LATCHED = BTR_PURGE_LEAF
- | BTR_ALREADY_S_LATCHED,
-
/** In the case of BTR_MODIFY_TREE, the caller specifies
the intention to delete record only. It is used to optimize
block->lock range.*/
- BTR_LATCH_FOR_DELETE = 512,
+ BTR_LATCH_FOR_DELETE = 32,
/** In the case of BTR_MODIFY_TREE, the caller specifies
the intention to delete record only. It is used to optimize
block->lock range.*/
- BTR_LATCH_FOR_INSERT = 1024,
+ BTR_LATCH_FOR_INSERT = 64,
/** Attempt to delete a record in the tree. */
BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
@@ -143,12 +114,8 @@ enum btr_latch_mode {
/** Attempt to insert a record into the tree. */
BTR_INSERT_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT,
- /** This flag ORed to BTR_INSERT says that we can ignore possible
- UNIQUE definition on secondary indexes when we decide if we can use
- the insert buffer to speed up inserts */
- BTR_IGNORE_SEC_UNIQUE = 2048,
/** Rollback in spatial index */
- BTR_RTREE_UNDO_INS = 4096,
+ BTR_RTREE_UNDO_INS = 128,
/** Try to delete mark a spatial index record */
- BTR_RTREE_DELETE_MARK = 8192
+ BTR_RTREE_DELETE_MARK = 256
};
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 55c7a504a6c..420d4a388e8 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -48,10 +48,6 @@ Created 11/5/1995 Heikki Tuuri
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
the block young in the LRU list */
-#define BUF_GET_IF_IN_POOL_OR_WATCH 15
- /*!< Get the page only if it's in the
- buffer pool, if not then set a watch
- on the page. */
#define BUF_GET_POSSIBLY_FREED 16
/*!< Like BUF_GET, but do not mind
if the file page has been freed. */
@@ -194,11 +190,9 @@ buf_page_t *buf_page_get_zip(const page_id_t page_id, ulint zip_size);
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
-BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
+or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction
@param[out] err DB_SUCCESS or error code
-@param[in] allow_ibuf_merge Allow change buffer merge while
-reading the pages from file.
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_gen(
@@ -208,9 +202,8 @@ buf_page_get_gen(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
- dberr_t* err = NULL,
- bool allow_ibuf_merge = false)
- MY_ATTRIBUTE((nonnull(6), warn_unused_result));
+ dberr_t* err = NULL)
+ MY_ATTRIBUTE((nonnull(6)));
/** This is the low level function used to get access to a database page.
@param[in] page_id page id
@@ -218,14 +211,10 @@ buf_page_get_gen(
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
-BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
+or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL if a
block with page_id is to be evicted
@param[out] err DB_SUCCESS or error code
-@param[in] allow_ibuf_merge Allow change buffer merge to happen
-while reading the page from file
-then it makes sure that it does merging of change buffer changes while
-reading the page from file.
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_low(
@@ -235,8 +224,7 @@ buf_page_get_low(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
- dberr_t* err,
- bool allow_ibuf_merge);
+ dberr_t* err);
/** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
@@ -539,18 +527,16 @@ public:
static constexpr uint32_t REMOVE_HASH= 2;
/** smallest state() of a buffer page that is freed in the tablespace */
static constexpr uint32_t FREED= 3;
+ /* unused state: 1U<<29 */
/** smallest state() for a block that belongs to buf_pool.LRU */
- static constexpr uint32_t UNFIXED= 1U << 29;
- /** smallest state() of a block for which buffered changes may exist */
- static constexpr uint32_t IBUF_EXIST= 2U << 29;
+ static constexpr uint32_t UNFIXED= 2U << 29;
/** smallest state() of a (re)initialized page (no doublewrite needed) */
static constexpr uint32_t REINIT= 3U << 29;
/** smallest state() for an io-fixed block */
static constexpr uint32_t READ_FIX= 4U << 29;
+ /* unused state: 5U<<29 */
/** smallest state() for a write-fixed block */
- static constexpr uint32_t WRITE_FIX= 5U << 29;
- /** smallest state() for a write-fixed block with buffered changes */
- static constexpr uint32_t WRITE_FIX_IBUF= 6U << 29;
+ static constexpr uint32_t WRITE_FIX= 6U << 29;
/** smallest state() for a write-fixed block (no doublewrite was used) */
static constexpr uint32_t WRITE_FIX_REINIT= 7U << 29;
/** buf_pool.LRU status mask in state() */
@@ -562,8 +548,7 @@ public:
byte *frame;
/* @} */
/** ROW_FORMAT=COMPRESSED page; zip.data (but not the data it points to)
- is also protected by buf_pool.mutex;
- !frame && !zip.data means an active buf_pool.watch */
+ is also protected by buf_pool.mutex */
page_zip_des_t zip;
#ifdef UNIV_DEBUG
/** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */
@@ -696,13 +681,6 @@ public:
bool is_freed() const
{ const auto s= state(); ut_ad(s >= FREED); return s < UNFIXED; }
- bool is_ibuf_exist() const
- {
- const auto s= state();
- ut_ad(s >= UNFIXED);
- ut_ad(s < READ_FIX);
- return (s & LRU_MASK) == IBUF_EXIST;
- }
bool is_reinit() const { return !(~state() & REINIT); }
void set_reinit(uint32_t prev_state)
@@ -713,29 +691,10 @@ public:
ut_ad(s < prev_state + UNFIXED);
}
- void set_ibuf_exist()
- {
- ut_ad(lock.is_write_locked());
- ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0));
- const auto s= state();
- ut_ad(s >= UNFIXED);
- ut_ad(s < READ_FIX);
- ut_ad(s < IBUF_EXIST || s >= REINIT);
- zip.fix.fetch_add(IBUF_EXIST - (LRU_MASK & s));
- }
- void clear_ibuf_exist()
- {
- ut_ad(lock.is_write_locked());
- ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0));
- ut_d(const auto s=) zip.fix.fetch_sub(IBUF_EXIST - UNFIXED);
- ut_ad(s >= IBUF_EXIST);
- ut_ad(s < REINIT);
- }
-
uint32_t read_unfix(uint32_t s)
{
ut_ad(lock.is_write_locked());
- ut_ad(s == UNFIXED + 1 || s == IBUF_EXIST + 1 || s == REINIT + 1);
+ ut_ad(s == UNFIXED + 1 || s == REINIT + 1);
uint32_t old_state= zip.fix.fetch_add(s - READ_FIX);
ut_ad(old_state >= READ_FIX);
ut_ad(old_state < WRITE_FIX);
@@ -824,7 +783,7 @@ public:
uint32_t fix(uint32_t count= 1)
{
ut_ad(count);
- ut_ad(count < IBUF_EXIST);
+ ut_ad(count < REINIT);
uint32_t f= zip.fix.fetch_add(count);
ut_ad(f >= FREED);
ut_ad(!((f ^ (f + 1)) & LRU_MASK));
@@ -1424,78 +1383,10 @@ public:
public:
/** @return whether the buffer pool contains a page
- @tparam allow_watch whether to allow watch_is_sentinel()
@param page_id page identifier
@param chain hash table chain for page_id.fold() */
- template<bool allow_watch= false>
- TRANSACTIONAL_INLINE
- bool page_hash_contains(const page_id_t page_id, hash_chain &chain)
- {
- transactional_shared_lock_guard<page_hash_latch> g
- {page_hash.lock_get(chain)};
- buf_page_t *bpage= page_hash.get(page_id, chain);
- if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)])
- {
- ut_ad(!bpage->in_zip_hash);
- ut_ad(!bpage->zip.data);
- if (!allow_watch)
- bpage= nullptr;
- }
- return bpage;
- }
-
- /** Determine if a block is a sentinel for a buffer pool watch.
- @param bpage page descriptor
- @return whether bpage a sentinel for a buffer pool watch */
- bool watch_is_sentinel(const buf_page_t &bpage)
- {
-#ifdef SAFE_MUTEX
- DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
- page_hash.lock_get(page_hash.cell_get(bpage.id().fold())).
- is_locked());
-#endif /* SAFE_MUTEX */
- ut_ad(bpage.in_file());
- if (&bpage < &watch[0] || &bpage >= &watch[array_elements(watch)])
- return false;
- ut_ad(!bpage.in_zip_hash);
- ut_ad(!bpage.zip.data);
- return true;
- }
-
- /** Check if a watched page has been read.
- This may only be called after !watch_set() and before invoking watch_unset().
- @param id page identifier
- @return whether the page was read to the buffer pool */
- TRANSACTIONAL_INLINE
- bool watch_occurred(const page_id_t id)
- {
- hash_chain &chain= page_hash.cell_get(id.fold());
- transactional_shared_lock_guard<page_hash_latch> g
- {page_hash.lock_get(chain)};
- /* The page must exist because watch_set() increments buf_fix_count. */
- return !watch_is_sentinel(*page_hash.get(id, chain));
- }
-
- /** Register a watch for a page identifier.
- @param id page identifier
- @param chain page_hash.cell_get(id.fold())
- @return a buffer page corresponding to id
- @retval nullptr if the block was not present in page_hash */
- buf_page_t *watch_set(const page_id_t id, hash_chain &chain);
-
- /** Stop watching whether a page has been read in.
- watch_set(id) must have returned nullptr before.
- @param id page identifier
- @param chain unlocked hash table chain */
- void watch_unset(const page_id_t id, hash_chain &chain);
-
- /** Remove the sentinel block for the watch before replacing it with a
- real block. watch_unset() or watch_occurred() will notice
- that the block has been replaced with the real block.
- @param w sentinel
- @param chain locked hash table chain
- @return w->state() */
- inline uint32_t watch_remove(buf_page_t *w, hash_chain &chain);
+ TRANSACTIONAL_TARGET
+ bool page_hash_contains(const page_id_t page_id, hash_chain &chain);
/** @return whether less than 1/4 of the buffer pool is available */
TPOOL_SUPPRESS_TSAN
@@ -1882,9 +1773,6 @@ public:
# error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
#endif
- /** Sentinels to detect if pages are read into the buffer pool while
- a delete-buffering operation is pending. Protected by mutex. */
- buf_page_t watch[innodb_purge_threads_MAX + 1];
/** Reserve a buffer. */
buf_tmp_buffer_t *io_buf_reserve() { return io_buf.reserve(); }
diff --git a/storage/innobase/include/buf0buf.inl b/storage/innobase/include/buf0buf.inl
index 4516a24803c..24f7352ca4c 100644
--- a/storage/innobase/include/buf0buf.inl
+++ b/storage/innobase/include/buf0buf.inl
@@ -90,7 +90,7 @@ inline bool buf_page_peek_if_too_old(const buf_page_t *bpage)
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
inline buf_block_t *buf_block_alloc()
{
- return buf_LRU_get_free_block(false);
+ return buf_LRU_get_free_block(have_no_mutex);
}
/********************************************************************//**
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index aec08e77f54..d8ce8333eb1 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2021, MariaDB Corporation.
+Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -62,6 +62,17 @@ bool buf_LRU_scan_and_free_block(ulint limit= ULINT_UNDEFINED);
@retval NULL if the free list is empty */
buf_block_t* buf_LRU_get_free_only();
+/** How to acquire a block */
+enum buf_LRU_get {
+ /** The caller is not holding buf_pool.mutex */
+ have_no_mutex= 0,
+ /** The caller is holding buf_pool.mutex */
+ have_mutex,
+ /** The caller is not holding buf_pool.mutex and is OK if a block
+ cannot be allocated. */
+ have_no_mutex_soft
+};
+
/** Get a block from the buf_pool.free list.
If the list is empty, blocks will be moved from the end of buf_pool.LRU
to buf_pool.free.
@@ -83,9 +94,10 @@ we put it to free list to be used.
* scan whole LRU list
* scan LRU list even if buf_pool.try_LRU_scan is not set
-@param have_mutex whether buf_pool.mutex is already being held
-@return the free control block, in state BUF_BLOCK_MEMORY */
-buf_block_t* buf_LRU_get_free_block(bool have_mutex)
+@param get how to allocate the block
+@return the free control block, in state BUF_BLOCK_MEMORY
+@retval nullptr if get==have_no_mutex_soft and memory was not available */
+buf_block_t* buf_LRU_get_free_block(buf_LRU_get get)
MY_ATTRIBUTE((malloc,warn_unused_result));
/** @return whether the unzip_LRU list should be used for evicting a victim
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
index 4ec8938c689..ebf0f60ffe5 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2021, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,8 +24,7 @@ The database buffer read
Created 11/5/1995 Heikki Tuuri
*******************************************************/
-#ifndef buf0rea_h
-#define buf0rea_h
+#pragma once
#include "buf0buf.h"
@@ -33,15 +32,17 @@ Created 11/5/1995 Heikki Tuuri
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
-@param page_id page id
-@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@retval DB_SUCCESS if the page was read and is not corrupted
+@param page_id page id
+@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@param chain buf_pool.page_hash cell for page_id
+@retval DB_SUCCESS if the page was read and is not corrupted,
@retval DB_SUCCESS_LOCKED_REC if the page was not read
-@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
+@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-dberr_t buf_read_page(const page_id_t page_id, ulint zip_size);
+dberr_t buf_read_page(const page_id_t page_id, ulint zip_size,
+ buf_pool_t::hash_chain &chain);
/** High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
@@ -57,21 +58,14 @@ void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
/** Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
-mechanism is not activated. NOTE 1: the calling thread may own latches on
+mechanism is not activated. NOTE: the calling thread may own latches on
pages: to avoid deadlocks this function must be written such that it cannot
-end up waiting for these latches! NOTE 2: the calling thread must want
-access to the page given: this rule is set to prevent unintended read-aheads
-performed by ibuf routines, a situation which could result in a deadlock if
-the OS does not support asynchronous i/o.
+end up waiting for these latches!
@param[in] page_id page id of a page which the current thread
wants to access
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] ibuf whether we are inside ibuf routine
-@return number of page read requests issued; NOTE that if we read ibuf
-pages, it may happen that the page at the given page number does not
-get read even if we return a positive value! */
-ulint
-buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf);
+@return number of page read requests issued */
+ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size);
/** Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
@@ -92,26 +86,12 @@ only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
-NOTE 3: the calling thread must want access to the page given: this rule is
-set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io.
@param[in] page_id page id; see NOTE 3 above
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] ibuf whether if we are inside ibuf routine
@return number of page read requests issued */
-ulint
-buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf);
+ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size);
/** Issue read requests for pages that need to be recovered.
@param space_id tablespace identifier
@param page_nos page numbers to read, in ascending order */
void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos);
-
-/** @name Modes used in read-ahead @{ */
-/** read only pages belonging to the insert buffer tree */
-#define BUF_READ_IBUF_PAGES_ONLY 131
-/** read any page */
-#define BUF_READ_ANY_PAGE 132
-/* @} */
-
-#endif
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index 3d63ddb767c..d4885186087 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,6 @@ Created 1/16/1996 Heikki Tuuri
/** @return whether a length is actually stored in a field */
#define len_is_stored(len) (len != UNIV_SQL_NULL && len != UNIV_SQL_DEFAULT)
-extern ulint data_mysql_default_charset_coll;
#define DATA_MYSQL_BINARY_CHARSET_COLL 63
/* SQL data type struct */
@@ -196,14 +195,6 @@ constexpr uint8_t DATA_MBR_LEN= uint8_t(SPDIMS * 2 * sizeof(double));
/*-------------------------------------------*/
-/* This many bytes we need to store the type information affecting the
-alphabetical order for a single field and decide the storage size of an
-SQL null*/
-#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4
-/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
-store the charset-collation number; one byte is left unused, though */
-#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
-
/* Maximum multi-byte character length in bytes, plus 1 */
#define DATA_MBMAX 8
@@ -344,13 +335,11 @@ charset-collation code.
DATA_BINARY_TYPE etc.
@param[in] charset_coll character-set collation code
@return precise type, including the charset-collation code */
-UNIV_INLINE
-uint32_t
-dtype_form_prtype(ulint old_prtype, ulint charset_coll)
+inline uint32_t dtype_form_prtype(ulint old_prtype, ulint charset_coll)
{
- ut_ad(old_prtype < 256 * 256);
- ut_ad(charset_coll <= MAX_CHAR_COLL_NUM);
- return(uint32_t(old_prtype + (charset_coll << 16)));
+ ut_ad(old_prtype <= 0xffff);
+ ut_ad(charset_coll <= MAX_CHAR_COLL_NUM);
+ return uint32_t(old_prtype | (charset_coll << 16));
}
/*********************************************************************//**
@@ -439,40 +428,6 @@ dtype_get_sql_null_size(
const dtype_t* type, /*!< in: type */
ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. */
-UNIV_INLINE
-void
-dtype_read_for_order_and_null_size(
-/*===============================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf); /*!< in: buffer for the stored order info */
-/**********************************************************************//**
-Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. This is the >= 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_store_for_order_and_null_size(
-/*====================================*/
- byte* buf, /*!< in: buffer for
- DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- bytes where we store the info */
- const dtype_t* type, /*!< in: type struct */
- ulint prefix_len);/*!< in: prefix length to
- replace type->len, or 0 */
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_read_for_order_and_null_size(
-/*===================================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf); /*!< in: buffer for stored type order info */
-
/*********************************************************************//**
Validates a data type structure.
@return TRUE if ok */
@@ -494,8 +449,6 @@ struct dict_col_t;
If you add fields to this structure, be sure to initialize them everywhere.
This structure is initialized in the following functions:
dtype_set()
-dtype_read_for_order_and_null_size()
-dtype_new_read_for_order_and_null_size()
sym_tab_add_null_lit() */
struct dtype_t{
diff --git a/storage/innobase/include/data0type.inl b/storage/innobase/include/data0type.inl
index 329cee5d190..add6c211bb9 100644
--- a/storage/innobase/include/data0type.inl
+++ b/storage/innobase/include/data0type.inl
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2020, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -183,126 +183,6 @@ dtype_get_mbmaxlen(
return type->mbmaxlen;
}
-/**********************************************************************//**
-Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. This is the >= 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_store_for_order_and_null_size(
-/*====================================*/
- byte* buf, /*!< in: buffer for
- DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- bytes where we store the info */
- const dtype_t* type, /*!< in: type struct */
- ulint prefix_len)/*!< in: prefix length to
- replace type->len, or 0 */
-{
- compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- ulint len;
-
- ut_ad(type);
- ut_ad(type->mtype >= DATA_VARCHAR);
- ut_ad(type->mtype <= DATA_MTYPE_MAX);
-
- buf[0] = (byte)(type->mtype & 0xFFUL);
-
- if (type->prtype & DATA_BINARY_TYPE) {
- buf[0] |= 128;
- }
-
- /* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) {
- buf[0] |= 64;
- }
- */
-
- buf[1] = (byte)(type->prtype & 0xFFUL);
-
- len = prefix_len ? prefix_len : type->len;
-
- mach_write_to_2(buf + 2, len & 0xFFFFUL);
-
- ut_ad(dtype_get_charset_coll(type->prtype) <= MAX_CHAR_COLL_NUM);
- mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
-
- if (type->prtype & DATA_NOT_NULL) {
- buf[4] |= 128;
- }
-}
-
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the < 4.1.x
-storage format. */
-UNIV_INLINE
-void
-dtype_read_for_order_and_null_size(
-/*===============================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf) /*!< in: buffer for stored type order info */
-{
- compile_time_assert(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
- type->mtype = buf[0] & 63;
- type->prtype = buf[1];
-
- if (buf[0] & 128) {
- type->prtype |= DATA_BINARY_TYPE;
- }
-
- type->len = mach_read_from_2(buf + 2);
-
- type->prtype = dtype_form_prtype(type->prtype,
- data_mysql_default_charset_coll);
- dtype_set_mblen(type);
-}
-
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
-storage format. */
-UNIV_INLINE
-void
-dtype_new_read_for_order_and_null_size(
-/*===================================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf) /*!< in: buffer for stored type order info */
-{
- compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- type->mtype = buf[0] & 63;
- type->prtype = buf[1];
-
- if (buf[0] & 128) {
- type->prtype |= DATA_BINARY_TYPE;
- }
-
- if (buf[4] & 128) {
- type->prtype |= DATA_NOT_NULL;
- }
-
- type->len = mach_read_from_2(buf + 2);
-
- ulint charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
-
- if (dtype_is_string_type(type->mtype)) {
- ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
-
- if (charset_coll == 0) {
- /* This insert buffer record was inserted with MySQL
- version < 4.1.2, and the charset-collation code was not
- explicitly stored to dtype->prtype at that time. It
- must be the default charset-collation of this MySQL
- installation. */
-
- charset_coll = data_mysql_default_charset_coll;
- }
-
- type->prtype = dtype_form_prtype(type->prtype, charset_coll);
- }
- dtype_set_mblen(type);
-}
-
/***********************************************************************//**
Returns the size of a fixed size data type, 0 if not a fixed size type.
@return fixed size, or 0 */
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index a65287476ef..68400d2095d 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -44,39 +44,6 @@ dict_hdr_get_new_id(
(not assigned if NULL) */
uint32_t* space_id); /*!< out: space id
(not assigned if NULL) */
-/** Update dict_sys.row_id in the dictionary header file page. */
-void dict_hdr_flush_row_id(row_id_t id);
-/** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
-inline row_id_t dict_sys_t::get_new_row_id()
-{
- row_id_t id= row_id.fetch_add(1);
- if (!(id % ROW_ID_WRITE_MARGIN))
- dict_hdr_flush_row_id(id);
- return id;
-}
-
-/** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
-inline void dict_sys_t::update_row_id(row_id_t id)
-{
- row_id_t sys_id= row_id;
- while (id >= sys_id)
- {
- if (!row_id.compare_exchange_strong(sys_id, id))
- continue;
- if (!(id % ROW_ID_WRITE_MARGIN))
- dict_hdr_flush_row_id(id);
- break;
- }
-}
-
-/**********************************************************************//**
-Writes a row id to a record or other 6-byte stored form. */
-inline void dict_sys_write_row_id(byte *field, row_id_t row_id)
-{
- static_assert(DATA_ROW_ID_LEN == 6, "compatibility");
- mach_write_to_6(field, row_id);
-}
-
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created.
@@ -116,7 +83,7 @@ inline bool dict_is_sys_table(table_id_t id) { return id < DICT_HDR_FIRST_ID; }
/*-------------------------------------------------------------*/
/* Dictionary header offsets */
-#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
+//#define DICT_HDR_ROW_ID 0 /* Was: latest assigned DB_ROW_ID */
#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 8daa07160a3..628ad8366af 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -2,7 +2,7 @@
Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -634,8 +634,6 @@ dict_table_get_next_index(
#define dict_index_is_auto_gen_clust(index) (index)->is_gen_clust()
#define dict_index_is_unique(index) (index)->is_unique()
#define dict_index_is_spatial(index) (index)->is_spatial()
-#define dict_index_is_ibuf(index) (index)->is_ibuf()
-#define dict_index_is_sec_or_ibuf(index) !(index)->is_primary()
#define dict_index_has_virtual(index) (index)->has_virtual()
/** Get all the FTS indexes on a table.
@@ -650,7 +648,7 @@ dict_table_get_all_fts_indexes(
/********************************************************************//**
Gets the number of user-defined non-virtual columns in a table in the
dictionary cache.
-@return number of user-defined (e.g., not ROW_ID) non-virtual
+@return number of user-defined (e.g., not DB_ROW_ID) non-virtual
columns of a table */
UNIV_INLINE
unsigned
@@ -1372,27 +1370,10 @@ private:
std::atomic<table_id_t> temp_table_id{DICT_HDR_FIRST_ID};
/** hash table of temporary table IDs */
hash_table_t temp_id_hash;
- /** the next value of DB_ROW_ID, backed by DICT_HDR_ROW_ID
- (FIXME: remove this, and move to dict_table_t) */
- Atomic_relaxed<row_id_t> row_id;
- /** The synchronization interval of row_id */
- static constexpr size_t ROW_ID_WRITE_MARGIN= 256;
public:
/** Diagnostic message for exceeding the lock_wait() timeout */
static const char fatal_msg[];
- /** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
- inline row_id_t get_new_row_id();
-
- /** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
- inline void update_row_id(row_id_t id);
-
- /** Recover the global DB_ROW_ID sequence on database startup */
- void recover_row_id(row_id_t id)
- {
- row_id= ut_uint64_align_up(id, ROW_ID_WRITE_MARGIN) + ROW_ID_WRITE_MARGIN;
- }
-
/** @return a new temporary table ID */
table_id_t acquire_temporary_table_id()
{
diff --git a/storage/innobase/include/dict0dict.inl b/storage/innobase/include/dict0dict.inl
index 4cc3eae96ab..ead22a21757 100644
--- a/storage/innobase/include/dict0dict.inl
+++ b/storage/innobase/include/dict0dict.inl
@@ -244,7 +244,7 @@ dict_table_get_next_index(
/********************************************************************//**
Gets the number of user-defined non-virtual columns in a table in the
dictionary cache.
-@return number of user-defined (e.g., not ROW_ID) non-virtual
+@return number of user-defined (e.g., not DB_ROW_ID) non-virtual
columns of a table */
UNIV_INLINE
unsigned
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index f7d33d5b43b..bd55848a776 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,9 +39,7 @@ Created 4/24/1996 Heikki Tuuri
/** A stack of table names related through foreign key constraints */
typedef std::deque<const char*, ut_allocator<const char*> > dict_names_t;
-/** Check each tablespace found in the data dictionary.
-Then look at each table defined in SYS_TABLES that has a space_id > 0
-to find all the file-per-table tablespaces.
+/** Open each tablespace found in the data dictionary.
In a crash recovery we already have some tablespace objects created from
processing the REDO log. We will compare the
@@ -50,7 +48,7 @@ tablespace file. In addition, more validation will be done if recovery
was needed and force_recovery is not set.
We also scan the biggest space id, and store it to fil_system. */
-void dict_check_tablespaces_and_store_max_id();
+void dict_load_tablespaces();
/** Make sure the data_file_name is saved in dict_table_t if needed.
@param[in,out] table Table object */
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index c76262ff5be..bbbda57b05d 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -64,7 +64,6 @@ combination of types */
auto-generated clustered indexes,
also DICT_UNIQUE will be set */
#define DICT_UNIQUE 2 /*!< unique index */
-#define DICT_IBUF 8 /*!< insert buffer tree */
#define DICT_CORRUPT 16 /*!< bit to store the corrupted flag
in SYS_INDEXES.TYPE */
#define DICT_FTS 32 /* FTS index; can't be combined with the
@@ -995,7 +994,7 @@ struct dict_index_t {
# define DICT_INDEX_MERGE_THRESHOLD_DEFAULT 50
unsigned type:DICT_IT_BITS;
/*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
- DICT_IBUF, DICT_CORRUPT) */
+ DICT_CORRUPT) */
#define MAX_KEY_LENGTH_BITS 12
unsigned trx_id_offset:MAX_KEY_LENGTH_BITS;
/*!< position of the trx id column
@@ -1184,12 +1183,8 @@ public:
/** @return whether instant ALTER TABLE is in effect */
inline bool is_instant() const;
- /** @return whether the index is the primary key index
- (not the clustered index of the change buffer) */
- bool is_primary() const
- {
- return DICT_CLUSTERED == (type & (DICT_CLUSTERED | DICT_IBUF));
- }
+ /** @return whether the index is the primary key index */
+ bool is_primary() const { return is_clust(); }
/** @return whether this is a generated clustered index */
bool is_gen_clust() const { return type == DICT_CLUSTERED; }
@@ -1203,16 +1198,13 @@ public:
/** @return whether this is a spatial index */
bool is_spatial() const { return UNIV_UNLIKELY(type & DICT_SPATIAL); }
- /** @return whether this is the change buffer */
- bool is_ibuf() const { return UNIV_UNLIKELY(type & DICT_IBUF); }
-
/** @return whether this index requires locking */
- bool has_locking() const { return !is_ibuf(); }
+ static constexpr bool has_locking() { return true; }
/** @return whether this is a normal B-tree index
(not the change buffer, not SPATIAL or FULLTEXT) */
bool is_btree() const {
- return UNIV_LIKELY(!(type & (DICT_IBUF | DICT_SPATIAL
+ return UNIV_LIKELY(!(type & (DICT_SPATIAL
| DICT_FTS | DICT_CORRUPT)));
}
@@ -2355,6 +2347,8 @@ private:
Atomic_relaxed<pthread_t> lock_mutex_owner{0};
#endif
public:
+ /** The next DB_ROW_ID value */
+ Atomic_counter<uint64_t> row_id{0};
/** Autoinc counter value to give to the next inserted row. */
uint64_t autoinc;
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index ec50e8cd951..f6169227433 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -48,10 +48,6 @@ struct dict_add_v_col_t;
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
-/* The ibuf table and indexes's ID are assigned as the number
-DICT_IBUF_ID_MIN plus the space id */
-#define DICT_IBUF_ID_MIN 0xFFFFFFFF00000000ULL
-
typedef ib_id_t table_id_t;
typedef ib_id_t index_id_t;
@@ -136,13 +132,6 @@ struct table_name_t
inline bool is_temporary() const;
};
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-/** Dump the change buffer at startup */
-extern my_bool ibuf_dump;
-/** Flag to control insert buffer debugging. */
-extern uint ibuf_debug;
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
/** Shift for spatial status */
#define SPATIAL_STATUS_SHIFT 12
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 483d594c6b9..f53279ecb88 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -51,35 +51,6 @@ using space_list_t= ilist<fil_space_t, space_list_tag_t>;
// Forward declaration
extern my_bool srv_use_doublewrite_buf;
-/** Possible values of innodb_flush_method */
-enum srv_flush_t
-{
- /** fsync, the default */
- SRV_FSYNC= 0,
- /** open log files in O_DSYNC mode */
- SRV_O_DSYNC,
- /** do not call os_file_flush() when writing data files, but do flush
- after writing to log files */
- SRV_LITTLESYNC,
- /** do not flush after writing */
- SRV_NOSYNC,
- /** invoke os_file_set_nocache() on data files. This implies using
- unbuffered I/O but still fdatasync(), because some filesystems might
- not flush meta-data on write completion */
- SRV_O_DIRECT,
- /** Like O_DIRECT, but skip fdatasync(), assuming that the data is
- durable on write completion */
- SRV_O_DIRECT_NO_FSYNC
-#ifdef _WIN32
- /** Traditional Windows appoach to open all files without caching,
- and do FileFlushBuffers() */
- ,SRV_ALL_O_DIRECT_FSYNC
-#endif
-};
-
-/** innodb_flush_method */
-extern ulong srv_file_flush_method;
-
/** Undo tablespaces starts with space_id. */
extern uint32_t srv_undo_space_id_start;
/** The number of UNDO tablespaces that are open and ready to use. */
@@ -631,6 +602,8 @@ private:
}
public:
+ /** Reopen all files on set_write_through() or set_buffered(). */
+ static void reopen_all();
/** Try to close a file to adhere to the innodb_open_files limit.
@param print_info whether to diagnose why a file cannot be closed
@return whether a file was closed */
@@ -1276,11 +1249,11 @@ constexpr uint16_t FIL_PAGE_RTREE= 17854;
constexpr uint16_t FIL_PAGE_UNDO_LOG= 2;
/** Index node (of file-in-file metadata) */
constexpr uint16_t FIL_PAGE_INODE= 3;
-/** Insert buffer free list */
+/** Former change buffer free list */
constexpr uint16_t FIL_PAGE_IBUF_FREE_LIST= 4;
/** Freshly allocated page */
constexpr uint16_t FIL_PAGE_TYPE_ALLOCATED= 0;
-/** Change buffer bitmap (pages n*innodb_page_size+1) */
+/** Former change buffer bitmap pages (pages n*innodb_page_size+1) */
constexpr uint16_t FIL_PAGE_IBUF_BITMAP= 5;
/** System page */
constexpr uint16_t FIL_PAGE_TYPE_SYS= 6;
@@ -1421,6 +1394,20 @@ public:
fil_space_t* temp_space; /*!< The innodb_temporary tablespace */
/** Map of fil_space_t::id to fil_space_t* */
hash_table_t spaces;
+
+ /** whether each write to data files is durable (O_DSYNC) */
+ my_bool write_through;
+ /** whether data files are buffered (not O_DIRECT) */
+ my_bool buffered;
+
+ /** Try to enable or disable write-through of data files */
+ void set_write_through(bool write_through);
+ /** Try to enable or disable file system caching of data files */
+ void set_buffered(bool buffered);
+
+ TPOOL_SUPPRESS_TSAN bool is_write_through() const { return write_through; }
+ TPOOL_SUPPRESS_TSAN bool is_buffered() const { return buffered; }
+
/** tablespaces for which fil_space_t::needs_flush() holds */
sized_ilist<fil_space_t, unflushed_spaces_tag_t> unflushed_spaces;
/** number of currently open files; protected by mutex */
@@ -1578,12 +1565,7 @@ template<bool have_reference> inline void fil_space_t::flush()
mysql_mutex_assert_not_owner(&fil_system.mutex);
ut_ad(!have_reference || (pending() & PENDING));
ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT);
- if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)
- {
- ut_ad(!is_in_unflushed_spaces);
- ut_ad(!needs_flush());
- }
- else if (have_reference)
+ if (have_reference)
flush_low();
else
{
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 9a23e840380..757ead55d03 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -157,28 +157,20 @@ this many file pages */
/* This has been replaced with either srv_page_size or page_zip->size. */
/** @name The space low address page map
-The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
+The 2 pages at FSP_XDES_OFFSET are repeated
every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
/* @{ */
/*--------------------------------------*/
#define FSP_XDES_OFFSET 0U /* !< extent descriptor */
-#define FSP_IBUF_BITMAP_OFFSET 1U /* !< insert buffer bitmap */
- /* The ibuf bitmap pages are the ones whose
- page number is the number above plus a
- multiple of XDES_DESCRIBED_PER_PAGE */
-
#define FSP_FIRST_INODE_PAGE_NO 2U /*!< in every tablespace */
/* The following pages exist
in the system tablespace (space 0). */
-#define FSP_IBUF_HEADER_PAGE_NO 3U /*!< insert buffer
+#define FSP_IBUF_HEADER_PAGE_NO 3U /*!< former change buffer
header page, in
tablespace 0 */
-#define FSP_IBUF_TREE_ROOT_PAGE_NO 4U /*!< insert buffer
+#define FSP_IBUF_TREE_ROOT_PAGE_NO 4U /*!< former change buffer
B-tree root page in
tablespace 0 */
- /* The ibuf tree root page number in
- tablespace 0; its fseg inode is on the page
- number FSP_FIRST_INODE_PAGE_NO */
#define FSP_TRX_SYS_PAGE_NO 5U /*!< transaction
system header, in
tablespace 0 */
diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h
index b07261ce042..724a764d848 100644
--- a/storage/innobase/include/gis0rtree.h
+++ b/storage/innobase/include/gis0rtree.h
@@ -62,40 +62,45 @@ Created 2013/03/27 Jimmy Yang and Allen Lai
/** Search for a spatial index leaf page record.
@param cur cursor
+@param thr query thread
@param tuple search tuple
@param latch_mode latching mode
@param mtr mini-transaction
@param mode search mode */
-dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple,
+dberr_t rtr_search_leaf(btr_cur_t *cur, que_thr_t *thr, const dtuple_t *tuple,
btr_latch_mode latch_mode, mtr_t *mtr,
page_cur_mode_t mode= PAGE_CUR_RTREE_LOCATE)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+ MY_ATTRIBUTE((nonnull(1,3,5), warn_unused_result));
/** Search for inserting a spatial index leaf page record.
@param cur cursor
@param tuple search tuple
@param latch_mode latching mode
@param mtr mini-transaction */
-inline dberr_t rtr_insert_leaf(btr_cur_t *cur, const dtuple_t *tuple,
+inline dberr_t rtr_insert_leaf(btr_cur_t *cur, que_thr_t *thr,
+ const dtuple_t *tuple,
btr_latch_mode latch_mode, mtr_t *mtr)
{
- return rtr_search_leaf(cur, tuple, latch_mode, mtr, PAGE_CUR_RTREE_INSERT);
+ return rtr_search_leaf(cur, thr, tuple, latch_mode, mtr,
+ PAGE_CUR_RTREE_INSERT);
}
/** Search for a spatial index leaf page record.
-@param pcur cursor
+@param pcur cursor
+@param thr query thread
@param tuple search tuple
@param mode search mode
@param mtr mini-transaction */
-dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple,
+dberr_t rtr_search_leaf(btr_pcur_t *pcur, que_thr_t *thr,
+ const dtuple_t *tuple,
page_cur_mode_t mode, mtr_t *mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
-dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
- page_cur_mode_t mode,
- btr_latch_mode latch_mode,
- btr_cur_t *cur, mtr_t *mtr)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+dberr_t rtr_search_to_nth_level(btr_cur_t *cur, que_thr_t *thr,
+ const dtuple_t *tuple,
+ btr_latch_mode latch_mode, mtr_t *mtr,
+ page_cur_mode_t mode, ulint level)
+ MY_ATTRIBUTE((nonnull(1,3,5), warn_unused_result));
/**********************************************************************//**
Builds a Rtree node pointer out of a physical record and a page number.
@@ -132,7 +137,29 @@ rtr_page_split_and_insert(
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr, /*!< in: mtr */
- dberr_t* err); /*!< out: error code */
+ dberr_t* err, /*!< out: error code */
+ que_thr_t* thr); /*!< in: query thread */
+
+/*************************************************************//**
+Makes tree one level higher by splitting the root, and inserts the tuple.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called.
+@return inserted record */
+rec_t*
+rtr_root_raise_and_insert(
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
+ on the root page; when the function returns,
+ the cursor is positioned on the predecessor
+ of the inserted record */
+ rec_offs** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr, /*!< in: mtr */
+ dberr_t* err, /*!< out: error code */
+ que_thr_t* thr); /*!< in: query thread */
/**************************************************************//**
Sets the child node mbr in a node pointer. */
@@ -243,8 +270,8 @@ rtr_create_rtr_info(
bool init_matches, /*!< in: Whether to initiate the
"matches" structure for collecting
matched leaf records */
- btr_cur_t* cursor, /*!< in: tree search cursor */
- dict_index_t* index); /*!< in: index struct */
+ que_thr_t* thr, /*!< in/out: query thread */
+ btr_cur_t* cursor); /*!< in: tree search cursor */
/********************************************************************//**
Update a btr_cur_t with rtr_info */
@@ -299,8 +326,10 @@ rtr_get_mbr_from_tuple(
about parent nodes in search
@param[in,out] cursor cursor on node pointer record,
its page x-latched
+@param[in,out] thr query thread
@return whether the cursor was successfully positioned */
-bool rtr_page_get_father(mtr_t *mtr, btr_cur_t *sea_cur, btr_cur_t *cursor)
+bool rtr_page_get_father(mtr_t *mtr, btr_cur_t *sea_cur, btr_cur_t *cursor,
+ que_thr_t *thr)
MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
/************************************************************//**
@@ -312,11 +341,12 @@ rtr_page_get_father_block(
/*======================*/
rec_offs* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
- mtr_t* mtr, /*!< in: mtr */
btr_cur_t* sea_cur,/*!< in: search cursor, contains information
about parent nodes in search */
- btr_cur_t* cursor);/*!< out: cursor on node pointer record,
+ btr_cur_t* cursor, /*!< out: cursor on node pointer record,
its page x-latched */
+ que_thr_t* thr, /*!< in/out: query thread */
+ mtr_t* mtr); /*!< in/out: mtr */
/**************************************************************//**
Store the parent path cursor
@return number of cursor stored */
@@ -337,6 +367,7 @@ bool rtr_search(
const dtuple_t* tuple, /*!< in: tuple on which search done */
btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ que_thr_t* thr, /*!< in/out; query thread */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((warn_unused_result));
diff --git a/storage/innobase/include/gis0rtree.inl b/storage/innobase/include/gis0rtree.inl
index 5101eeb6f7a..460496d1978 100644
--- a/storage/innobase/include/gis0rtree.inl
+++ b/storage/innobase/include/gis0rtree.inl
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2021, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -240,6 +240,9 @@ rtr_info_reinit_in_cursor(
bool need_prdt) /*!< in: Whether predicate lock is
needed */
{
+ que_thr_t* thr = cursor->rtr_info->thr;
+ ut_ad(thr);
rtr_clean_rtr_info(cursor->rtr_info, false);
rtr_init_rtr_info(cursor->rtr_info, need_prdt, cursor, index, true);
+ cursor->rtr_info->thr = thr;
}
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index c246b2ef513..d1ff331fe21 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -1,7 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -17,420 +16,40 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
-/**************************************************//**
-@file include/ibuf0ibuf.h
-Insert buffer
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef ibuf0ibuf_h
-#define ibuf0ibuf_h
-
-#include "mtr0mtr.h"
-#include "dict0mem.h"
-#include "fsp0fsp.h"
-
-/** Default value for maximum on-disk size of change buffer in terms
-of percentage of the buffer pool. */
-#define CHANGE_BUFFER_DEFAULT_SIZE (25)
-
-/* Possible operations buffered in the insert/whatever buffer. See
-ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */
-typedef enum {
- IBUF_OP_INSERT = 0,
- IBUF_OP_DELETE_MARK = 1,
- IBUF_OP_DELETE = 2,
-
- /* Number of different operation types. */
- IBUF_OP_COUNT = 3
-} ibuf_op_t;
-
-/** Combinations of operations that can be buffered.
-@see innodb_change_buffering_names */
-enum ibuf_use_t {
- IBUF_USE_NONE = 0,
- IBUF_USE_INSERT, /* insert */
- IBUF_USE_DELETE_MARK, /* delete */
- IBUF_USE_INSERT_DELETE_MARK, /* insert+delete */
- IBUF_USE_DELETE, /* delete+purge */
- IBUF_USE_ALL /* insert+delete+purge */
-};
-
-/** Operations that can currently be buffered. */
-extern ulong innodb_change_buffering;
-
-/** Insert buffer struct */
-struct ibuf_t{
- Atomic_relaxed<ulint> size; /*!< current size of the ibuf index
- tree, in pages */
- Atomic_relaxed<ulint> max_size; /*!< recommended maximum size of the
- ibuf index tree, in pages */
- ulint seg_size; /*!< allocated pages of the file
- segment containing ibuf header and
- tree */
- bool empty; /*!< Protected by the page
- latch of the root page of the
- insert buffer tree
- (FSP_IBUF_TREE_ROOT_PAGE_NO). true
- if and only if the insert
- buffer tree is empty. */
- ulint free_list_len; /*!< length of the free list */
- ulint height; /*!< tree height */
- dict_index_t* index; /*!< insert buffer index */
-
- /** number of pages merged */
- Atomic_counter<ulint> n_merges;
- Atomic_counter<ulint> n_merged_ops[IBUF_OP_COUNT];
- /*!< number of operations of each type
- merged to index pages */
- Atomic_counter<ulint> n_discarded_ops[IBUF_OP_COUNT];
- /*!< number of operations of each type
- discarded without merging due to the
- tablespace being deleted or the
- index being dropped */
-};
-
-/** The insert buffer control structure */
-extern ibuf_t ibuf;
-
-/* The purpose of the insert buffer is to reduce random disk access.
-When we wish to insert a record into a non-unique secondary index and
-the B-tree leaf page where the record belongs to is not in the buffer
-pool, we insert the record into the insert buffer B-tree, indexed by
-(space_id, page_no). When the page is eventually read into the buffer
-pool, we look up the insert buffer B-tree for any modifications to the
-page, and apply these upon the completion of the read operation. This
-is called the insert buffer merge. */
-
-/* The insert buffer merge must always succeed. To guarantee this,
-the insert buffer subsystem keeps track of the free space in pages for
-which it can buffer operations. Two bits per page in the insert
-buffer bitmap indicate the available space in coarse increments. The
-free bits in the insert buffer bitmap must never exceed the free space
-on a page. It is safe to decrement or reset the bits in the bitmap in
-a mini-transaction that is committed before the mini-transaction that
-affects the free space. It is unsafe to increment the bits in a
-separately committed mini-transaction, because in crash recovery, the
-free bits could momentarily be set too high. */
-
-/******************************************************************//**
-Creates the insert buffer data structure at a database startup.
-@return DB_SUCCESS or failure */
-dberr_t
-ibuf_init_at_db_start(void);
-/*=======================*/
-/*********************************************************************//**
-Updates the max_size value for ibuf. */
-void
-ibuf_max_size_update(
-/*=================*/
- ulint new_val); /*!< in: new value in terms of
- percentage of the buffer pool size */
-/*********************************************************************//**
-Reads the biggest tablespace id from the high end of the insert buffer
-tree and updates the counter in fil_system. */
-void
-ibuf_update_max_tablespace_id(void);
-/*===============================*/
-/***************************************************************//**
-Starts an insert buffer mini-transaction. */
-UNIV_INLINE
-void
-ibuf_mtr_start(
-/*===========*/
- mtr_t* mtr) /*!< out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/***************************************************************//**
-Commits an insert buffer mini-transaction. */
-UNIV_INLINE
-void
-ibuf_mtr_commit(
-/*============*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/************************************************************************//**
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to decrement or reset the bits in the bitmap in a mini-transaction
-that is committed before the mini-transaction that affects the free
-space. */
-void
-ibuf_reset_free_bits(
-/*=================*/
- buf_block_t* block); /*!< in: index page; free bits are set to 0
- if the index is a non-clustered
- non-unique, and page level is 0 */
-/************************************************************************//**
-Updates the free bits of an uncompressed page in the ibuf bitmap if
-there is not enough free on the page any more. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is
-unsafe to increment the bits in a separately committed
-mini-transaction, because in crash recovery, the free bits could
-momentarily be set too high. It is only safe to use this function for
-decrementing the free bits. Should more free space become available,
-we must not update the free bits here, because that would break crash
-recovery. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
- buf_block_t* block, /*!< in: index page to which we have added new
- records; the free bits are updated if the
- index is non-clustered and non-unique and
- the page level is 0, and the page becomes
- fuller */
- ulint max_ins_size,/*!< in: value of maximum insert size with
- reorganize before the latest operation
- performed to the page */
- ulint increase);/*!< in: upper limit for the additional space
- used in the latest operation, if known, or
- ULINT_UNDEFINED */
-/**********************************************************************//**
-Updates the free bits for an uncompressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-void
-ibuf_update_free_bits_low(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- ulint max_ins_size, /*!< in: value of
- maximum insert size
- with reorganize before
- the latest operation
- performed to the page */
- mtr_t* mtr); /*!< in/out: mtr */
-/**********************************************************************//**
-Updates the free bits for a compressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-void
-ibuf_update_free_bits_zip(
-/*======================*/
- buf_block_t* block, /*!< in/out: index page */
- mtr_t* mtr); /*!< in/out: mtr */
-/**********************************************************************//**
-Updates the free bits for the two pages to reflect the present state.
-Does this in the mtr given, which means that the latching order rules
-virtually prevent any further operations until mtr is committed.
-NOTE: The free bits in the insert buffer bitmap must never exceed the
-free space on a page. It is safe to set the free bits in the same
-mini-transaction that updated the pages. */
-void
-ibuf_update_free_bits_for_two_pages_low(
-/*====================================*/
- buf_block_t* block1, /*!< in: index page */
- buf_block_t* block2, /*!< in: index page */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
- dict_index_t* index, /*!< in: index where to insert */
- ulint ignore_sec_unique); /*!< in: if != 0, we should
- ignore UNIQUE constraint on
- a secondary index when we
- decide */
-/******************************************************************//**
-Returns TRUE if the current OS thread is performing an insert buffer
-routine.
-
-For instance, a read-ahead of non-ibuf pages is forbidden by threads
-that are executing an insert buffer routine.
-@return TRUE if inside an insert buffer routine */
-UNIV_INLINE
-ibool
-ibuf_inside(
-/*========*/
- const mtr_t* mtr) /*!< in: mini-transaction */
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Checks if a page address is an ibuf bitmap page (level 3 page) address.
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@return TRUE if a bitmap page */
-inline bool ibuf_bitmap_page(const page_id_t page_id, ulint zip_size)
-{
- ut_ad(ut_is_2pow(zip_size));
- ulint size = zip_size ? zip_size : srv_page_size;
- return (page_id.page_no() & (size - 1)) == FSP_IBUF_BITMAP_OFFSET;
-}
-
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==true.
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] x_latch FALSE if relaxed check (avoid latching the
-bitmap page)
-@param[in,out] mtr mtr which will contain an x-latch to the
-bitmap page if the page is not one of the fixed address ibuf pages, or NULL,
-in which case a new transaction is created.
-@return true if level 2 or level 3 page */
-bool
-ibuf_page_low(
- const page_id_t page_id,
- ulint zip_size,
-#ifdef UNIV_DEBUG
- bool x_latch,
-#endif /* UNIV_DEBUG */
- mtr_t* mtr)
- MY_ATTRIBUTE((warn_unused_result));
-
-#ifdef UNIV_DEBUG
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==true.
-@param[in] page_id tablespace/page identifier
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in,out] mtr mini-transaction or NULL
-@return TRUE if level 2 or level 3 page */
-# define ibuf_page(page_id, zip_size, mtr) \
- ibuf_page_low(page_id, zip_size, true, mtr)
-
-#else /* UNIV_DEBUG */
-
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==true.
-@param[in] page_id tablespace/page identifier
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in,out] mtr mini-transaction or NULL
-@return TRUE if level 2 or level 3 page */
-# define ibuf_page(page_id, zip_size, mtr) \
- ibuf_page_low(page_id, zip_size, mtr)
-
-#endif /* UNIV_DEBUG */
-/***********************************************************************//**
-Frees excess pages from the ibuf free list. This function is called when an OS
-thread calls fsp services to allocate a new file segment, or a new page to a
-file segment, and the thread did not own the fsp latch before this call. */
-void
-ibuf_free_excess_pages(void);
-/*========================*/
-
-/** Buffer an operation in the change buffer, instead of applying it
-directly to the file page, if this is possible. Does not do it if the index
-is clustered or unique.
-@param[in] op operation type
-@param[in] entry index entry to insert
-@param[in,out] index index where to insert
-@param[in] page_id page id where to insert
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in,out] thr query thread
-@return true if success */
-bool
-ibuf_insert(
- ibuf_op_t op,
- const dtuple_t* entry,
- dict_index_t* index,
- const page_id_t page_id,
- ulint zip_size,
- que_thr_t* thr);
-
-/** Check whether buffered changes exist for a page.
-@param[in] id page identifier
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@return whether buffered changes exist */
-bool ibuf_page_exists(const page_id_t id, ulint zip_size);
-
-/** When an index page is read from a disk to the buffer pool, this function
-applies any buffered operations to the page and deletes the entries from the
-insert buffer. If the page is not read, but created in the buffer pool, this
-function deletes its buffered entries from the insert buffer; there can
-exist entries for such a page if the page belonged to an index which
-subsequently was dropped.
-@param block X-latched page to try to apply changes to, or NULL to discard
-@param page_id page identifier
-@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@return error code */
-dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block,
- const page_id_t page_id,
- ulint zip_size);
-
-/** Delete all change buffer entries for a tablespace,
-in DISCARD TABLESPACE, IMPORT TABLESPACE, or read-ahead.
-@param[in] space missing or to-be-discarded tablespace */
-void ibuf_delete_for_discarded_space(uint32_t space);
-
-/** Contract the change buffer by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read
-@retval 0 if ibuf.empty */
-ulint ibuf_contract();
-
-/** Contracts insert buffer trees by reading pages referring to space_id
-to the buffer pool.
-@returns number of pages merged.*/
-ulint
-ibuf_merge_space(
-/*=============*/
- ulint space); /*!< in: space id */
-
-/******************************************************************//**
-Looks if the insert buffer is empty.
-@return true if empty */
-bool
-ibuf_is_empty(void);
-/*===============*/
-/******************************************************************//**
-Prints info of ibuf. */
-void
-ibuf_print(
-/*=======*/
- FILE* file); /*!< in: file where to print */
-/********************************************************************
-Read the first two bytes from a record's fourth field (counter field in new
-records; something else in older records).
-@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
-ulint
-ibuf_rec_get_counter(
-/*=================*/
- const rec_t* rec); /*!< in: ibuf record */
-/******************************************************************//**
-Closes insert buffer and frees the data structures. */
-void
-ibuf_close(void);
-/*============*/
-
-/** Check the insert buffer bitmaps on IMPORT TABLESPACE.
-@param[in] trx transaction
-@param[in,out] space tablespace being imported
-@return DB_SUCCESS or error code */
-dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/** Update free bits and buffered bits for bulk loaded page.
-@param block secondary index leaf page
-@param mtr mini-transaction
-@param reset whether the page is full */
-void ibuf_set_bitmap_for_bulk_load(buf_block_t *block, mtr_t *mtr, bool reset);
-
-#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
-#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
-
-/* The ibuf header page currently contains only the file segment header
-for the file segment from which the pages for the ibuf tree are allocated */
-#define IBUF_HEADER PAGE_DATA
-#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
-
-/* The insert buffer tree itself is always located in space 0. */
-#define IBUF_SPACE_ID static_cast<ulint>(0)
-
-#include "ibuf0ibuf.inl"
-
-#endif
+#include "db0err.h"
+
+/* The purpose of the change buffer was to reduce random disk access.
+When we wished to
+(1) insert a record into a non-unique secondary index,
+(2) delete-mark a secondary index record,
+(3) delete a secondary index record as part of purge (but not ROLLBACK),
+and the B-tree leaf page where the record belongs to is not in the buffer
+pool, we inserted a record into the change buffer B-tree, indexed by
+the page identifier. When the page was eventually read into the buffer
+pool, we looked up the change buffer B-tree for any modifications to the
+page, applied these upon the completion of the read operation. This
+was called the insert buffer merge.
+
+There was a hash index of the change buffer B-tree, implemented as the
+"change buffer bitmap". Bits in these bitmap pages indicated how full
+the page roughly was, and whether any records for the page identifier
+exist in the change buffer. The "free" bits had to be updated as part of
+operations that modified secondary index leaf pages.
+
+Because the change buffer has been removed, we will no longer update
+any change buffer bitmap pages. Instead, on database startup, we will
+check if an upgrade needs to be performed, and apply any buffered
+changes if that is the case. Finally, the change buffer will be
+transformed to a format that will not be recognized by earlier
+versions of MariaDB Server, to prevent downgrades from causing
+corruption (due to the removed updates of the bitmap pages) when the
+change buffer might be enabled. */
+
+/** Check if ibuf_upgrade() is needed as part of server startup.
+@return error code
+@retval DB_SUCCESS if no upgrade is needed
+@retval DB_FAIL if the change buffer is not empty (need ibuf_upgrade()) */
+dberr_t ibuf_upgrade_needed();
+
+/** Upgrade the change buffer after all redo log has been applied. */
+dberr_t ibuf_upgrade();
diff --git a/storage/innobase/include/ibuf0ibuf.inl b/storage/innobase/include/ibuf0ibuf.inl
deleted file mode 100644
index 003bf22a047..00000000000
--- a/storage/innobase/include/ibuf0ibuf.inl
+++ /dev/null
@@ -1,282 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2021, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ibuf0ibuf.ic
-Insert buffer
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "page0page.h"
-#include "page0zip.h"
-#include "fsp0types.h"
-#include "buf0lru.h"
-
-/** An index page must contain at least srv_page_size /
-IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
-buffer inserts to this page. If there is this much of free space, the
-corresponding bits are set in the ibuf bitmap. */
-#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
-
-/***************************************************************//**
-Starts an insert buffer mini-transaction. */
-UNIV_INLINE
-void
-ibuf_mtr_start(
-/*===========*/
- mtr_t* mtr) /*!< out: mini-transaction */
-{
- mtr_start(mtr);
- mtr->enter_ibuf();
-
- if (high_level_read_only || srv_read_only_mode) {
- mtr_set_log_mode(mtr, MTR_LOG_NO_REDO);
- }
-
-}
-/***************************************************************//**
-Commits an insert buffer mini-transaction. */
-UNIV_INLINE
-void
-ibuf_mtr_commit(
-/*============*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(mtr->is_inside_ibuf());
- ut_d(mtr->exit_ibuf());
-
- mtr_commit(mtr);
-}
-
-/************************************************************************//**
-Sets the free bit of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-void
-ibuf_set_free_bits_func(
-/*====================*/
- buf_block_t* block, /*!< in: index page of a non-clustered index;
- free bit is reset if page level is 0 */
-#ifdef UNIV_IBUF_DEBUG
- ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
- value which the bits must have before
- setting; this is for debugging */
-#endif /* UNIV_IBUF_DEBUG */
- ulint val); /*!< in: value to set: < 4 */
-#ifdef UNIV_IBUF_DEBUG
-# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v)
-#else /* UNIV_IBUF_DEBUG */
-# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v)
-#endif /* UNIV_IBUF_DEBUG */
-
-/**********************************************************************//**
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
- dict_index_t* index, /*!< in: index where to insert */
- ulint ignore_sec_unique) /*!< in: if != 0, we should
- ignore UNIQUE constraint on
- a secondary index when we
- decide */
-{
- if (index->type & (DICT_CLUSTERED | DICT_IBUF | DICT_SPATIAL) ||
- !innodb_change_buffering || !ibuf.max_size)
- return false;
- if (!ignore_sec_unique && index->is_unique())
- return false;
- if (index->table->quiesce != QUIESCE_NONE)
- return false;
- for (unsigned i= 0; i < index->n_fields; i++)
- if (index->fields[i].descending)
- return false;
- return true;
-}
-
-/******************************************************************//**
-Returns TRUE if the current OS thread is performing an insert buffer
-routine.
-
-For instance, a read-ahead of non-ibuf pages is forbidden by threads
-that are executing an insert buffer routine.
-@return TRUE if inside an insert buffer routine */
-UNIV_INLINE
-ibool
-ibuf_inside(
-/*========*/
- const mtr_t* mtr) /*!< in: mini-transaction */
-{
- return(mtr->is_inside_ibuf());
-}
-
-/** Translates the free space on a page to a value in the ibuf bitmap.
-@param[in] page_size page size in bytes
-@param[in] max_ins_size maximum insert size after reorganize for
-the page
-@return value for ibuf bitmap bits */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_bits(
- ulint page_size,
- ulint max_ins_size)
-{
- ulint n;
- ut_ad(ut_is_2pow(page_size));
- ut_ad(page_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
-
- n = max_ins_size / (page_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-
- if (n == 3) {
- n = 2;
- }
-
- if (n > 3) {
- n = 3;
- }
-
- return(n);
-}
-
-/*********************************************************************//**
-Translates the free space on a compressed page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_zip(
-/*==========================*/
- const buf_block_t* block) /*!< in: buffer block */
-{
- ulint max_ins_size;
- const page_zip_des_t* page_zip;
- lint zip_max_ins;
-
- ut_ad(block->page.zip.data);
-
- /* Consider the maximum insert size on the uncompressed page
- without reorganizing the page. We must not assume anything
- about the compression ratio. If zip_max_ins > max_ins_size and
- there is 1/4 garbage on the page, recompression after the
- reorganize could fail, in theory. So, let us guarantee that
- merging a buffered insert to a compressed page will always
- succeed without reorganizing or recompressing the page, just
- by using the page modification log. */
- max_ins_size = page_get_max_insert_size(
- buf_block_get_frame(block), 1);
-
- page_zip = buf_block_get_page_zip(block);
- zip_max_ins = page_zip_max_ins_size(page_zip,
- FALSE/* not clustered */);
-
- if (zip_max_ins < 0) {
- return(0);
- } else if (max_ins_size > (ulint) zip_max_ins) {
- max_ins_size = (ulint) zip_max_ins;
- }
-
- return(ibuf_index_page_calc_free_bits(block->physical_size(),
- max_ins_size));
-}
-
-/*********************************************************************//**
-Translates the free space on a page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free(
-/*======================*/
- const buf_block_t* block) /*!< in: buffer block */
-{
- if (!block->page.zip.data) {
- ulint max_ins_size;
-
- max_ins_size = page_get_max_insert_size_after_reorganize(
- buf_block_get_frame(block), 1);
-
- return(ibuf_index_page_calc_free_bits(
- block->physical_size(), max_ins_size));
- } else {
- return(ibuf_index_page_calc_free_zip(block));
- }
-}
-
-/************************************************************************//**
-Updates the free bits of an uncompressed page in the ibuf bitmap if
-there is not enough free on the page any more. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is
-unsafe to increment the bits in a separately committed
-mini-transaction, because in crash recovery, the free bits could
-momentarily be set too high. It is only safe to use this function for
-decrementing the free bits. Should more free space become available,
-we must not update the free bits here, because that would break crash
-recovery. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
- buf_block_t* block, /*!< in: index page to which we have added new
- records; the free bits are updated if the
- index is non-clustered and non-unique and
- the page level is 0, and the page becomes
- fuller */
- ulint max_ins_size,/*!< in: value of maximum insert size with
- reorganize before the latest operation
- performed to the page */
- ulint increase)/*!< in: upper limit for the additional space
- used in the latest operation, if known, or
- ULINT_UNDEFINED */
-{
- ulint before;
- ulint after;
-
- ut_ad(buf_block_get_page_zip(block) == NULL);
-
- before = ibuf_index_page_calc_free_bits(
- srv_page_size, max_ins_size);
-
- if (max_ins_size >= increase) {
- compile_time_assert(ULINT32_UNDEFINED > UNIV_PAGE_SIZE_MAX);
- after = ibuf_index_page_calc_free_bits(
- srv_page_size, max_ins_size - increase);
-#ifdef UNIV_IBUF_DEBUG
- ut_a(after <= ibuf_index_page_calc_free(block));
-#endif
- } else {
- after = ibuf_index_page_calc_free(block);
- }
-
- if (after == 0) {
- /* We move the page to the front of the buffer pool LRU list:
- the purpose of this is to prevent those pages to which we
- cannot make inserts using the insert buffer from slipping
- out of the buffer pool */
-
- buf_page_make_young(&block->page);
- }
-
- if (before > after) {
- ibuf_set_free_bits(block, after, before);
- }
-}
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 09e4ece8894..8afa92abc93 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -275,6 +275,8 @@ public:
bool log_maybe_unbuffered;
# endif
#endif
+ /** whether each write to ib_logfile0 is durable (O_DSYNC) */
+ my_bool log_write_through;
/** Fields involved in checkpoints @{ */
lsn_t log_capacity; /*!< capacity of the log; if
@@ -362,6 +364,8 @@ public:
/** Try to enable or disable file system caching (update log_buffered) */
void set_buffered(bool buffered);
#endif
+ /** Try to enable or disable durable writes (update log_write_through) */
+ void set_write_through(bool write_through);
void attach(log_file_t file, os_offset_t size);
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index e787d81e8c2..c0b79f1a76d 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -417,16 +417,6 @@ public:
/** The recovery system */
extern recv_sys_t recv_sys;
-/** If the following is TRUE, the buffer pool file pages must be invalidated
-after recovery and no ibuf operations are allowed; this will be set if
-recv_sys.pages becomes too full, and log records must be merged
-to file pages already before the recovery is finished: in this case no
-ibuf operations are allowed, as they could modify the pages read in the
-buffer pool before the pages have been recovered to the up-to-date state.
-
-TRUE means that recovery is running and no operations on the log files
-are allowed yet: the variable name is misleading. */
-extern bool recv_no_ibuf_operations;
/** TRUE when recv_init_crash_recovery() has been called. */
extern bool recv_needed_recovery;
#ifdef UNIV_DEBUG
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index b8df6d9f63e..5576560dca8 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -308,15 +308,6 @@ public:
@retval 0 if the transaction only modified temporary tablespaces */
lsn_t commit_lsn() const { ut_ad(has_committed()); return m_commit_lsn; }
- /** Note that we are inside the change buffer code. */
- void enter_ibuf() { m_inside_ibuf= true; }
-
- /** Note that we have exited from the change buffer code. */
- void exit_ibuf() { m_inside_ibuf= false; }
-
- /** @return true if we are inside the change buffer code */
- bool is_inside_ibuf() const { return m_inside_ibuf; }
-
/** Note that some pages have been freed */
void set_trim_pages() { m_trim_pages= true; }
@@ -745,10 +736,6 @@ private:
/** whether log_sys.latch is locked exclusively */
uint16_t m_latch_ex:1;
- /** whether change buffer is latched; only needed in non-debug builds
- to suppress some read-ahead operations, @see ibuf_inside() */
- uint16_t m_inside_ibuf:1;
-
/** whether the pages has been trimmed */
uint16_t m_trim_pages:1;
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index 28aa30565e4..279138acd79 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2022, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -117,11 +117,6 @@ succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
@@ -151,11 +146,6 @@ page_cur_insert_rec_low(
Inserts a record next to page cursor on a compressed and uncompressed
page.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return pointer to inserted record
@return nullptr on failure */
rec_t*
diff --git a/storage/innobase/include/page0cur.inl b/storage/innobase/include/page0cur.inl
index 7c4eafa266a..a73c31a7bff 100644
--- a/storage/innobase/include/page0cur.inl
+++ b/storage/innobase/include/page0cur.inl
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -155,11 +155,6 @@ succeed, i.e., enough space available, NULL otherwise. The cursor stays at
the same logical position, but the physical position may change if it is
pointing to a compressed page that was reorganized.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index 2978656b508..38373f6bb19 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -95,7 +95,7 @@ this byte can be garbage. */
direction */
#define PAGE_N_RECS 16 /* number of user records on the page */
/** The largest DB_TRX_ID that may have modified a record on the page;
-Defined only in secondary index leaf pages and in change buffer leaf pages.
+Defined only in secondary index leaf pages.
Otherwise written as 0. @see PAGE_ROOT_AUTO_INC */
#define PAGE_MAX_TRX_ID 18
/** The AUTO_INCREMENT value (on persistent clustered index root pages). */
@@ -901,11 +901,6 @@ MY_ATTRIBUTE((nonnull, warn_unused_result))
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page or compress the page.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_t::commit().
-
@return error code */
dberr_t
page_copy_rec_list_end_no_locks(
@@ -920,11 +915,6 @@ Copies records from page to new_page, from the given record onward,
including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_t::commit().
-
@return pointer to the original successor of the infimum record on new_block
@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
rec_t*
@@ -942,11 +932,6 @@ Copies records from page to new_page, up to the given record, NOT
including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return pointer to the original predecessor of the supremum record on new_block
@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
rec_t*
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index 4332990619e..501ef31a8f9 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -2,7 +2,7 @@
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -213,9 +213,9 @@ page_zip_max_ins_size(
/**********************************************************************//**
Determine if enough space is available in the modification log.
-@return TRUE if page_zip_write_rec() will succeed */
+@return true if page_zip_write_rec() will succeed */
UNIV_INLINE
-ibool
+bool
page_zip_available(
/*===============*/
const page_zip_des_t* page_zip,/*!< in: compressed page */
@@ -323,10 +323,6 @@ Reorganize and compress a page. This is a low-level operation for
compressed pages, to be used when page_zip_compress() fails.
On success, redo log will be written.
The function btr_page_reorganize() should be preferred whenever possible.
-IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
-non-clustered index, the caller must update the insert buffer free
-bits in the same mini-transaction in such a way that the modification
-will be redo-logged.
@return error code
@retval DB_FAIL on overflow; the block_zip will be left intact */
dberr_t
diff --git a/storage/innobase/include/page0zip.inl b/storage/innobase/include/page0zip.inl
index afc877c3720..edcd4ab48fa 100644
--- a/storage/innobase/include/page0zip.inl
+++ b/storage/innobase/include/page0zip.inl
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -269,7 +269,7 @@ page_zip_max_ins_size(
Determine if enough space is available in the modification log.
@return TRUE if enough space is available */
UNIV_INLINE
-ibool
+bool
page_zip_available(
/*===============*/
const page_zip_des_t* page_zip,/*!< in: compressed page */
diff --git a/storage/innobase/include/rem0rec.inl b/storage/innobase/include/rem0rec.inl
index 46c209cbdec..da7337a3b82 100644
--- a/storage/innobase/include/rem0rec.inl
+++ b/storage/innobase/include/rem0rec.inl
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1095,9 +1095,7 @@ rec_get_converted_size(
ut_ad(dtuple_check_typed(dtuple));
#ifdef UNIV_DEBUG
- if (dict_index_is_ibuf(index)) {
- ut_ad(dtuple->n_fields > 1);
- } else if ((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+ if ((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
== REC_STATUS_NODE_PTR) {
ut_ad(dtuple->n_fields - 1
== dict_index_get_n_unique_in_tree_nonleaf(index));
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index b1390fd1ef1..686bbaa7384 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2019, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -37,39 +37,6 @@ Created 3/14/1997 Heikki Tuuri
#include <queue>
class MDL_ticket;
-/** Determines if it is possible to remove a secondary index entry.
-Removal is possible if the secondary index entry does not refer to any
-not delete marked version of a clustered index record where DB_TRX_ID
-is newer than the purge view.
-
-NOTE: This function should only be called by the purge thread, only
-while holding a latch on the leaf page of the secondary index entry
-(or keeping the buffer pool watch on the page). It is possible that
-this function first returns true and then false, if a user transaction
-inserts a record that the secondary index entry would refer to.
-However, in that case, the user transaction would also re-insert the
-secondary index entry after purge has removed it and released the leaf
-page latch.
-@param[in,out] node row purge node
-@param[in] index secondary index
-@param[in] entry secondary index entry
-@param[in,out] sec_pcur secondary index cursor or NULL
- if it is called for purge buffering
- operation.
-@param[in,out] sec_mtr mini-transaction which holds
- secondary index entry or NULL if it is
- called for purge buffering operation.
-@param[in] is_tree true=pessimistic purge,
- false=optimistic (leaf-page only)
-@return true if the secondary index record can be purged */
-bool
-row_purge_poss_sec(
- purge_node_t* node,
- dict_index_t* index,
- const dtuple_t* entry,
- btr_pcur_t* sec_pcur=NULL,
- mtr_t* sec_mtr=NULL,
- bool is_tree=false);
/***************************************************************
Does the purge operation.
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index a1350740e2a..a26924d08a0 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +28,6 @@ Created 4/20/1996 Heikki Tuuri
#define row0row_h
#include "que0types.h"
-#include "ibuf0ibuf.h"
#include "trx0types.h"
#include "mtr0mtr.h"
#include "rem0types.h"
@@ -344,23 +343,10 @@ row_parse_int(
ulint mtype,
bool unsigned_type);
-/** Result of row_search_index_entry */
-enum row_search_result {
- ROW_FOUND = 0, /*!< the record was found */
- ROW_NOT_FOUND, /*!< record not found */
- ROW_BUFFERED, /*!< one of BTR_INSERT, BTR_DELETE, or
- BTR_DELETE_MARK was specified, the
- secondary index leaf page was not in
- the buffer pool, and the operation was
- enqueued in the insert/delete buffer */
- ROW_NOT_DELETED_REF /*!< BTR_DELETE was specified, and
- row_purge_poss_sec() failed */
-};
-
/***************************************************************//**
Searches an index record.
-@return whether the record was found or buffered */
-enum row_search_result
+@return whether the record was found */
+bool
row_search_index_entry(
/*===================*/
const dtuple_t* entry, /*!< in: index entry */
@@ -398,22 +384,17 @@ row_raw_format(
in bytes */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+#include "dict0mem.h"
+
/** Prepare to start a mini-transaction to modify an index.
@param[in,out] mtr mini-transaction
-@param[in,out] index possibly secondary index
-@param[in] pessimistic whether this is a pessimistic operation */
-inline
-void
-row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
+@param[in,out] index possibly secondary index */
+inline void row_mtr_start(mtr_t* mtr, dict_index_t* index)
{
mtr->start();
switch (index->table->space_id) {
- case IBUF_SPACE_ID:
- if (pessimistic
- && !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
- ibuf_free_excess_pages();
- }
+ case 0:
break;
case SRV_TMP_SPACE_ID:
mtr->set_log_mode(MTR_LOG_NO_REDO);
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 51f3049b81a..4672ce00a36 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -2,7 +2,7 @@
Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -228,12 +228,8 @@ enum monitor_id_t {
MONITOR_MODULE_BUF_PAGE,
MONITOR_INDEX_LEAF_PAGE_READ,
MONITOR_INDEX_NON_LEAF_PAGE_READ,
- MONITOR_INDEX_IBUF_LEAF_PAGE_READ,
- MONITOR_INDEX_IBUF_NON_LEAF_PAGE_READ,
MONITOR_UNDO_LOG_PAGE_READ,
MONITOR_INODE_PAGE_READ,
- MONITOR_IBUF_FREELIST_PAGE_READ,
- MONITOR_IBUF_BITMAP_PAGE_READ,
MONITOR_SYSTEM_PAGE_READ,
MONITOR_TRX_SYSTEM_PAGE_READ,
MONITOR_FSP_HDR_PAGE_READ,
@@ -244,12 +240,8 @@ enum monitor_id_t {
MONITOR_OTHER_PAGE_READ,
MONITOR_INDEX_LEAF_PAGE_WRITTEN,
MONITOR_INDEX_NON_LEAF_PAGE_WRITTEN,
- MONITOR_INDEX_IBUF_LEAF_PAGE_WRITTEN,
- MONITOR_INDEX_IBUF_NON_LEAF_PAGE_WRITTEN,
MONITOR_UNDO_LOG_PAGE_WRITTEN,
MONITOR_INODE_PAGE_WRITTEN,
- MONITOR_IBUF_FREELIST_PAGE_WRITTEN,
- MONITOR_IBUF_BITMAP_PAGE_WRITTEN,
MONITOR_SYSTEM_PAGE_WRITTEN,
MONITOR_TRX_SYSTEM_PAGE_WRITTEN,
MONITOR_FSP_HDR_PAGE_WRITTEN,
@@ -347,17 +339,6 @@ enum monitor_id_t {
MONITOR_MODULE_FIL_SYSTEM,
MONITOR_OVLD_N_FILE_OPENED,
- /* InnoDB Change Buffer related counters */
- MONITOR_MODULE_IBUF_SYSTEM,
- MONITOR_OVLD_IBUF_MERGE_INSERT,
- MONITOR_OVLD_IBUF_MERGE_DELETE,
- MONITOR_OVLD_IBUF_MERGE_PURGE,
- MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT,
- MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE,
- MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE,
- MONITOR_OVLD_IBUF_MERGES,
- MONITOR_OVLD_IBUF_SIZE,
-
/* Counters for server operations */
MONITOR_MODULE_SERVER,
MONITOR_MASTER_THREAD_SLEEP,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index b85fa518384..52e5a724efd 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -165,9 +165,9 @@ extern char* srv_data_home;
recovery and open all tables in RO mode instead of RW mode. We don't
sync the max trx id to disk either. */
extern my_bool srv_read_only_mode;
-/** Set if InnoDB operates in read-only mode or innodb-force-recovery
-is greater than SRV_FORCE_NO_IBUF_MERGE. */
-extern my_bool high_level_read_only;
+/** Set if innodb_read_only is set or innodb_force_recovery
+is SRV_FORCE_NO_UNDO_LOG_SCAN or greater. */
+extern bool high_level_read_only;
/** store to its own file each table created by an user; data
dictionary tables are in the system tablespace 0 */
extern my_bool srv_file_per_table;
@@ -270,8 +270,6 @@ extern double srv_defragment_fill_factor;
extern uint srv_defragment_frequency;
extern ulonglong srv_defragment_interval;
-extern uint srv_change_buffer_max_size;
-
/* Number of IO operations per second the server can do */
extern ulong srv_io_capacity;
@@ -296,7 +294,7 @@ extern ulong srv_flushing_avg_loops;
extern ulong srv_force_recovery;
-/** innodb_fast_shutdown=1 skips purge and change buffer merge.
+/** innodb_fast_shutdown=1 skips purge.
innodb_fast_shutdown=2 effectively crashes the server (no log checkpoint).
innodb_fast_shutdown=3 is a clean shutdown that skips the rollback
of active transaction (to be done on restart). */
@@ -580,11 +578,6 @@ void srv_monitor_task(void*);
void srv_master_callback(void*);
-/**
-Complete the shutdown tasks such as background DROP TABLE,
-and optionally change buffer merge (on innodb_fast_shutdown=0). */
-void srv_shutdown(bool ibuf_merge);
-
} /* extern "C" */
#ifdef UNIV_DEBUG
diff --git a/storage/innobase/include/sux_lock.h b/storage/innobase/include/sux_lock.h
index 2c0167ac651..7a7f93b6787 100644
--- a/storage/innobase/include/sux_lock.h
+++ b/storage/innobase/include/sux_lock.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2020, 2022, MariaDB Corporation.
+Copyright (c) 2020, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -149,7 +149,7 @@ private:
#endif
public:
- /** In crash recovery or the change buffer, claim the ownership
+ /** In crash recovery, claim the ownership
of the exclusive block lock to the current thread */
void claim_ownership() { set_new_owner(pthread_self()); }
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 152e794ac6a..81eb5471a7b 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -778,13 +778,17 @@ public:
const char* op_info; /*!< English text describing the
current operation, or an empty
string */
- uint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
- bool check_foreigns; /*!< normally TRUE, but if the user
- wants to suppress foreign key checks,
- (in table imports, for example) we
- set this FALSE */
+ /** TRX_ISO_REPEATABLE_READ, ... */
+ unsigned isolation_level:2;
+ /** normally set; "SET foreign_key_checks=0" can be issued to suppress
+ foreign key checks, in table imports, for example */
+ unsigned check_foreigns:1;
+ /** normally set; "SET unique_checks=0, foreign_key_checks=0"
+ enables bulk insert into an empty table */
+ unsigned check_unique_secondary:1;
+
/** whether an insert into an empty table is active */
- bool bulk_insert;
+ unsigned bulk_insert:1;
/*------------------------------*/
/* MySQL has a transaction coordinator to coordinate two phase
commit between multiple storage engines and the binary log. When
@@ -798,13 +802,6 @@ public:
/** whether this is holding the prepare mutex */
bool active_commit_ordered;
/*------------------------------*/
- bool check_unique_secondary;
- /*!< normally TRUE, but if the user
- wants to speed up inserts by
- suppressing unique key checks
- for secondary indexes when we decide
- if we can use the insert buffer for
- them, we set this FALSE */
bool flush_log_later;/* In 2PC, we hold the
prepare_commit mutex across
both phases. In that case, we
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index 3474a903f6c..4728e7ef2bf 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -488,10 +488,10 @@ completely purged and trx_purge_free_segment() has started freeing it */
/** Transaction end identifier (if the log is in a history list),
or 0 if the transaction has not been committed */
#define TRX_UNDO_TRX_NO 8
-/** Before MariaDB 10.3.1, when purge did not reset DB_TRX_ID of
+/* Before MariaDB 10.3.1, when purge did not reset DB_TRX_ID of
surviving user records, this used to be called TRX_UNDO_DEL_MARKS.
-This field is redundant; it is only being read by some debug assertions.
+This field was removed in MariaDB 11.0.
The value 1 indicates that purge needs to process the undo log segment.
The value 0 indicates that all of it has been processed, and
@@ -500,7 +500,7 @@ trx_purge_free_segment() has been invoked, so the log is not safe to access.
Before MariaDB 10.3.1, a log segment may carry the value 0 even before
trx_purge_free_segment() was called, for those undo log records for
which purge would not result in removing delete-marked records. */
-#define TRX_UNDO_NEEDS_PURGE 16
+/*#define TRX_UNDO_NEEDS_PURGE 16*/
#define TRX_UNDO_LOG_START 18 /*!< Offset of the first undo log record
of this log on the header page; purge
may remove undo log record from the
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index d06343fcabe..93352b279b1 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -142,7 +142,6 @@ using the call command. */
assertions. */
#define UNIV_LRU_DEBUG /* debug the buffer pool LRU */
#define UNIV_HASH_DEBUG /* debug HASH_ macros */
-#define UNIV_IBUF_DEBUG /* debug the insert buffer */
#define UNIV_PERF_DEBUG /* debug flag that enables
light weight performance
related stuff. */
@@ -475,9 +474,6 @@ extern mysql_pfs_key_t fts_cache_mutex_key;
extern mysql_pfs_key_t fts_cache_init_mutex_key;
extern mysql_pfs_key_t fts_delete_mutex_key;
extern mysql_pfs_key_t fts_doc_id_mutex_key;
-extern mysql_pfs_key_t ibuf_bitmap_mutex_key;
-extern mysql_pfs_key_t ibuf_mutex_key;
-extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
extern mysql_pfs_key_t recalc_pool_mutex_key;
extern mysql_pfs_key_t purge_sys_pq_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index ff5508d489d..02c6649bc33 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Google Inc.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -401,6 +401,31 @@ void log_t::set_buffered(bool buffered)
}
#endif
+ /** Try to enable or disable durable writes (update log_write_through) */
+void log_t::set_write_through(bool write_through)
+{
+ if (is_pmem() || high_level_read_only)
+ return;
+ log_resize_acquire();
+ if (!resize_in_progress() && is_opened() &&
+ bool(log_write_through) != write_through)
+ {
+ os_file_close_func(log.m_file);
+ log.m_file= OS_FILE_CLOSED;
+ std::string path{get_log_file_path()};
+ log_write_through= write_through;
+ bool success;
+ log.m_file= os_file_create_func(path.c_str(),
+ OS_FILE_OPEN, OS_FILE_NORMAL, OS_LOG_FILE,
+ false, &success);
+ ut_a(log.m_file != OS_FILE_CLOSED);
+ sql_print_information(log_write_through
+ ? "InnoDB: Log writes write through"
+ : "InnoDB: Log writes may be cached");
+ }
+ log_resize_release();
+}
+
/** Start resizing the log and release the exclusive latch.
@param size requested new file_size
@return whether the resizing was started successfully */
@@ -852,7 +877,7 @@ bool log_t::flush(lsn_t lsn) noexcept
{
ut_ad(lsn >= get_flushed_lsn());
flush_lock.set_pending(lsn);
- const bool success{srv_file_flush_method == SRV_O_DSYNC || log.flush()};
+ const bool success{log_write_through || log.flush()};
if (UNIV_LIKELY(success))
{
flushed_to_disk_lsn.store(lsn, std::memory_order_release);
@@ -888,15 +913,6 @@ void log_write_up_to(lsn_t lsn, bool durable,
{
ut_ad(!srv_read_only_mode);
ut_ad(lsn != LSN_MAX);
-
- if (UNIV_UNLIKELY(recv_no_ibuf_operations))
- {
- /* A non-final batch of recovery is active no writes to the log
- are allowed yet. */
- ut_a(!callback);
- return;
- }
-
ut_ad(lsn <= log_sys.get_lsn());
#ifdef HAVE_PMEM
@@ -922,6 +938,7 @@ repeat:
if (write_lock.acquire(lsn, durable ? nullptr : callback) ==
group_commit_lock::ACQUIRED)
{
+ ut_ad(!recv_no_log_write || srv_operation != SRV_OPERATION_NORMAL);
log_sys.latch.wr_lock(SRW_LOCK_CALL);
pending_write_lsn= write_lock.release(log_sys.write_buf<true>());
}
@@ -1054,11 +1071,9 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
ib::info() << "Starting shutdown...";
- /* Wait until the master thread and all other operations are idle: our
+ /* Wait until the master task and all other operations are idle: our
algorithm only works if the server is idle at shutdown */
- bool do_srv_shutdown = false;
if (srv_master_timer) {
- do_srv_shutdown = srv_fast_shutdown < 2;
srv_master_timer.reset();
}
@@ -1075,11 +1090,6 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
}
srv_monitor_timer.reset();
- if (do_srv_shutdown) {
- srv_shutdown(srv_fast_shutdown == 0);
- }
-
-
loop:
ut_ad(lock_sys.is_initialised() || !srv_was_started);
ut_ad(log_sys.is_initialised() || !srv_was_started);
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 793f7b327c8..3443369af6c 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -46,7 +46,6 @@ Created 9/20/1997 Heikki Tuuri
#include "page0page.h"
#include "page0cur.h"
#include "trx0undo.h"
-#include "ibuf0ibuf.h"
#include "trx0undo.h"
#include "trx0rec.h"
#include "fil0fil.h"
@@ -71,17 +70,6 @@ number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
recv_recovery_from_checkpoint_start(). */
bool recv_lsn_checks_on;
-/** If the following is TRUE, the buffer pool file pages must be invalidated
-after recovery and no ibuf operations are allowed; this becomes TRUE if
-the log record hash table becomes too full, and log records must be merged
-to file pages already before the recovery is finished: in this case no
-ibuf operations are allowed, as they could modify the pages read in the
-buffer pool before the pages have been recovered to the up-to-date state.
-
-true means that recovery is running and no operations on the log file
-are allowed yet: the variable name is misleading. */
-bool recv_no_ibuf_operations;
-
/** The maximum lsn we see for a page during the recovery process. If this
is bigger than the lsn we are able to scan up to, that is an indication that
the recovery failed and the database may be corrupt. */
@@ -739,7 +727,7 @@ static struct
retry:
log_sys.latch.wr_unlock();
bool fail= false;
- buf_block_t *free_block= buf_LRU_get_free_block(false);
+ buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
log_sys.latch.wr_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&recv_sys.mutex);
@@ -1026,22 +1014,13 @@ FIXME: Rely on recv_sys.pages! */
class mlog_init_t
{
public:
- /** A page initialization operation that was parsed from
- the redo log */
- struct init {
- /** log sequence number of the page initialization */
- lsn_t lsn;
- /** Whether btr_page_create() avoided a read of the page.
-
- At the end of the last recovery batch, mark_ibuf_exist()
- will mark pages for which this flag is set. */
- bool created;
- };
+ /** log sequence number of the page initialization */
+ lsn_t lsn;
private:
- typedef std::map<const page_id_t, init,
+ typedef std::map<const page_id_t, lsn_t,
std::less<const page_id_t>,
- ut_allocator<std::pair<const page_id_t, init> > >
+ ut_allocator<std::pair<const page_id_t, lsn_t> > >
map;
/** Map of page initialization operations.
FIXME: Merge this to recv_sys.pages! */
@@ -1054,13 +1033,11 @@ public:
bool add(const page_id_t page_id, lsn_t lsn)
{
mysql_mutex_assert_owner(&recv_sys.mutex);
- const init init = { lsn, false };
- std::pair<map::iterator, bool> p = inits.insert(
- map::value_type(page_id, init));
- ut_ad(!p.first->second.created);
+ std::pair<map::iterator, bool> p = inits.emplace(
+ map::value_type(page_id, lsn));
if (p.second) return true;
- if (p.first->second.lsn >= init.lsn) return false;
- p.first->second = init;
+ if (p.first->second >= lsn) return false;
+ p.first->second = lsn;
return true;
}
@@ -1070,7 +1047,7 @@ public:
@param[in,out] init initialize log or load log
@return the latest page initialization;
not valid after releasing recv_sys.mutex. */
- init& last(page_id_t page_id)
+ lsn_t last(page_id_t page_id)
{
mysql_mutex_assert_owner(&recv_sys.mutex);
return inits.find(page_id)->second;
@@ -1084,69 +1061,7 @@ public:
{
mysql_mutex_assert_owner(&recv_sys.mutex);
auto i= inits.find(page_id);
- return i != inits.end() && i->second.lsn > lsn;
- }
-
- /** At the end of each recovery batch, reset the 'created' flags. */
- void reset()
- {
- mysql_mutex_assert_owner(&recv_sys.mutex);
- ut_ad(recv_no_ibuf_operations);
- for (map::value_type& i : inits) {
- i.second.created = false;
- }
- }
-
- /** On the last recovery batch, mark whether there exist
- buffered changes for the pages that were initialized
- by buf_page_create() and still reside in the buffer pool.
- @param[in,out] mtr dummy mini-transaction */
- void mark_ibuf_exist(mtr_t& mtr)
- {
- mysql_mutex_assert_owner(&recv_sys.mutex);
- mtr.start();
-
- for (const map::value_type& i : inits) {
- if (!i.second.created) {
- continue;
- }
- if (buf_block_t* block = buf_page_get_low(
- i.first, 0, RW_X_LATCH, nullptr,
- BUF_GET_IF_IN_POOL,
- &mtr, nullptr, false)) {
- if (UNIV_LIKELY_NULL(block->page.zip.data)) {
- switch (fil_page_get_type(
- block->page.zip.data)) {
- case FIL_PAGE_INDEX:
- case FIL_PAGE_RTREE:
- if (page_zip_decompress(
- &block->page.zip,
- block->page.frame,
- true)) {
- break;
- }
- ib::error() << "corrupted "
- << block->page.id();
- }
- }
- if (recv_no_ibuf_operations) {
- mtr.commit();
- mtr.start();
- continue;
- }
- mysql_mutex_unlock(&recv_sys.mutex);
- if (ibuf_page_exists(block->page.id(),
- block->zip_size())) {
- block->page.set_ibuf_exist();
- }
- mtr.commit();
- mtr.start();
- mysql_mutex_lock(&recv_sys.mutex);
- }
- }
-
- mtr.commit();
- clear();
+ return i != inits.end() && i->second > lsn;
}
/** Clear the data structure */
@@ -2890,19 +2805,17 @@ lsn of a log record.
@param[in,out] mtr mini-transaction
@param[in,out] p recovery address
@param[in,out] space tablespace, or NULL if not looked up yet
-@param[in,out] init page initialization operation, or NULL
+@param[in,out] init_lsn page initialization LSN, or 0
@return the recovered page
@retval nullptr on failure */
static buf_block_t *recv_recover_page(buf_block_t *block, mtr_t &mtr,
const recv_sys_t::map::iterator &p,
fil_space_t *space= nullptr,
- mlog_init_t::init *init= nullptr)
+ lsn_t init_lsn= 0)
{
mysql_mutex_assert_owner(&recv_sys.mutex);
ut_ad(recv_sys.apply_log_recs);
ut_ad(recv_needed_recovery);
- ut_ad(!init || init->created);
- ut_ad(!init || init->lsn);
ut_ad(block->page.id() == p->first);
ut_ad(!p->second.is_being_processed());
ut_ad(!space || space->id == block->page.id().space());
@@ -2923,13 +2836,12 @@ static buf_block_t *recv_recover_page(buf_block_t *block, mtr_t &mtr,
byte *frame = UNIV_LIKELY_NULL(block->page.zip.data)
? block->page.zip.data
: block->page.frame;
- const lsn_t page_lsn = init
+ const lsn_t page_lsn = init_lsn
? 0
: mach_read_from_8(frame + FIL_PAGE_LSN);
bool free_page = false;
lsn_t start_lsn = 0, end_lsn = 0;
ut_d(lsn_t recv_start_lsn = 0);
- const lsn_t init_lsn = init ? init->lsn : 0;
bool skipped_after_init = false;
@@ -3057,8 +2969,7 @@ static buf_block_t *recv_recover_page(buf_block_t *block, mtr_t &mtr,
set_start_lsn:
if ((a == log_phys_t::APPLIED_CORRUPTED
|| recv_sys.is_corrupt_log()) && !srv_force_recovery) {
- if (init) {
- init->created = false;
+ if (init_lsn) {
if (space || block->page.id().page_no()) {
block->page.lock.x_lock_recursive();
}
@@ -3098,12 +3009,9 @@ set_start_lsn:
UT_LIST_ADD_FIRST(buf_pool.flush_list, &block->page);
buf_pool.page_cleaner_wakeup();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- } else if (free_page && init) {
+ } else if (free_page && init_lsn) {
/* There have been no operations that modify the page.
- Any buffered changes must not be merged. A subsequent
- buf_page_create() from a user thread should discard
- any buffered changes. */
- init->created = false;
+ Any buffered changes will be merged in ibuf_upgrade(). */
ut_ad(!mtr.has_modifications());
block->page.set_freed(block->page.state());
}
@@ -3237,10 +3145,12 @@ func_exit:
}
/** Read pages for which log needs to be applied.
-@param page_id first page identifier to read
-@param i iterator to recv_sys.pages */
+@param page_id first page identifier to read
+@param i iterator to recv_sys.pages
+@param last_batch whether it is possible to write more redo log */
TRANSACTIONAL_TARGET
-static void recv_read_in_area(page_id_t page_id, recv_sys_t::map::iterator i)
+static void recv_read_in_area(page_id_t page_id, recv_sys_t::map::iterator i,
+ bool last_batch)
{
uint32_t page_nos[32];
ut_ad(page_id == i->first);
@@ -3260,7 +3170,9 @@ static void recv_read_in_area(page_id_t page_id, recv_sys_t::map::iterator i)
if (p != page_nos)
{
mysql_mutex_unlock(&recv_sys.mutex);
+ if (!last_batch) log_sys.latch.wr_unlock();
buf_read_recv_pages(page_id.space(), {page_nos, p});
+ if (!last_batch) log_sys.latch.wr_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&recv_sys.mutex);
}
}
@@ -3282,11 +3194,11 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
page_recv_t &recs= p->second;
ut_ad(recs.state == page_recv_t::RECV_WILL_NOT_READ);
buf_block_t* block= nullptr;
- mlog_init_t::init &i= mlog_init.last(page_id);
+ const lsn_t init_lsn= mlog_init.last(page_id);
const lsn_t end_lsn= recs.log.last()->lsn;
- if (end_lsn < i.lsn)
+ if (end_lsn < init_lsn)
DBUG_LOG("ib_log", "skip log for page " << page_id
- << " LSN " << end_lsn << " < " << i.lsn);
+ << " LSN " << end_lsn << " < " << init_lsn);
fil_space_t *space= fil_space_t::get(page_id.space());
mtr.start();
@@ -3326,9 +3238,8 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
}
ut_ad(&recs == &pages.find(page_id)->second);
- i.created= true;
map::iterator r= p++;
- block= recv_recover_page(block, mtr, r, space, &i);
+ block= recv_recover_page(block, mtr, r, space, init_lsn);
ut_ad(mtr.has_committed());
if (block)
@@ -3354,7 +3265,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
@retval nullptr if the page cannot be initialized based on log records */
buf_block_t *recv_sys_t::recover_low(const page_id_t page_id)
{
- buf_block_t *free_block= buf_LRU_get_free_block(false);
+ buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block= nullptr;
mysql_mutex_lock(&mutex);
@@ -3441,10 +3352,6 @@ void recv_sys_t::apply(bool last_batch)
}
}
- recv_no_ibuf_operations = !last_batch ||
- srv_operation == SRV_OPERATION_RESTORE ||
- srv_operation == SRV_OPERATION_RESTORE_EXPORT;
-
mtr_t mtr;
if (!pages.empty())
@@ -3492,7 +3399,7 @@ void recv_sys_t::apply(bool last_batch)
if (!last_batch)
log_sys.latch.wr_unlock();
- buf_block_t *free_block= buf_LRU_get_free_block(false);
+ buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
if (!last_batch)
log_sys.latch.wr_lock(SRW_LOCK_CALL);
@@ -3545,7 +3452,7 @@ next_free_block:
mysql_mutex_unlock(&mutex);
if (!last_batch)
log_sys.latch.wr_unlock();
- free_block= buf_LRU_get_free_block(false);
+ free_block= buf_LRU_get_free_block(have_no_mutex);
if (!last_batch)
log_sys.latch.wr_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&mutex);
@@ -3554,7 +3461,7 @@ next_free_block:
ut_ad(p == pages.end() || p->first > page_id);
continue;
case page_recv_t::RECV_NOT_PROCESSED:
- recv_read_in_area(page_id, p);
+ recv_read_in_area(page_id, p, last_batch);
}
p= pages.lower_bound(page_id);
/* Ensure that progress will be made. */
@@ -3607,14 +3514,8 @@ next_free_block:
}
}
- if (last_batch)
- /* We skipped this in buf_page_create(). */
- mlog_init.mark_ibuf_exist(mtr);
- else
- {
- mlog_init.reset();
+ if (!last_batch)
log_sys.latch.wr_unlock();
- }
mysql_mutex_unlock(&mutex);
@@ -4330,7 +4231,6 @@ err_exit:
mysql_mutex_lock(&recv_sys.mutex);
recv_sys.apply_log_recs = true;
- recv_no_ibuf_operations = false;
ut_d(recv_no_log_write = srv_operation == SRV_OPERATION_RESTORE
|| srv_operation == SRV_OPERATION_RESTORE_EXPORT);
if (srv_operation == SRV_OPERATION_NORMAL) {
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index b866460feb5..6d31a55e8ed 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -177,7 +177,6 @@ void mtr_t::start()
m_made_dirty= false;
m_latch_ex= false;
- m_inside_ibuf= false;
m_modifications= false;
m_log_mode= MTR_LOG_ALL;
ut_d(m_user_space_id= TRX_SYS_SPACE);
@@ -309,7 +308,6 @@ void mtr_t::release()
void mtr_t::commit()
{
ut_ad(is_active());
- ut_ad(!is_inside_ibuf());
/* This is a dirty read, for debugging. */
ut_ad(!m_modifications || !recv_no_log_write);
@@ -494,7 +492,6 @@ void mtr_t::rollback_to_savepoint(ulint begin, ulint end)
void mtr_t::commit_shrink(fil_space_t &space)
{
ut_ad(is_active());
- ut_ad(!is_inside_ibuf());
ut_ad(!high_level_read_only);
ut_ad(m_modifications);
ut_ad(m_made_dirty);
@@ -601,7 +598,6 @@ void mtr_t::commit_shrink(fil_space_t &space)
bool mtr_t::commit_file(fil_space_t &space, const char *name)
{
ut_ad(is_active());
- ut_ad(!is_inside_ibuf());
ut_ad(!high_level_read_only);
ut_ad(m_modifications);
ut_ad(!m_made_dirty);
@@ -712,7 +708,6 @@ lsn_t mtr_t::commit_files(lsn_t checkpoint_lsn)
ut_ad(log_sys.latch.is_write_locked());
#endif
ut_ad(is_active());
- ut_ad(!is_inside_ibuf());
ut_ad(m_log_mode == MTR_LOG_ALL);
ut_ad(!m_made_dirty);
ut_ad(m_memo.empty());
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index d4cfb6207bf..98bf4fdb8ca 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -65,7 +65,9 @@ Created 10/21/1995 Heikki Tuuri
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
#ifdef _WIN32
-#include <winioctl.h>
+# include <winioctl.h>
+#elif !defined O_DSYNC
+# define O_DSYNC O_SYNC
#endif
// my_test_if_atomic_write() , my_win_secattr()
@@ -931,6 +933,8 @@ bool
os_file_flush_func(
os_file_t file)
{
+ if (UNIV_UNLIKELY(my_disable_sync)) return true;
+
int ret;
ret = os_file_sync_posix(file);
@@ -981,40 +985,19 @@ os_file_create_simple_func(
*success = false;
- int create_flag;
- const char* mode_str = NULL;
+ int create_flag = O_RDONLY;
ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
- if (create_mode == OS_FILE_OPEN) {
- mode_str = "OPEN";
-
- if (access_type == OS_FILE_READ_ONLY) {
-
- create_flag = O_RDONLY;
-
- } else if (read_only) {
-
- create_flag = O_RDONLY;
-
- } else {
+ if (read_only) {
+ } else if (create_mode == OS_FILE_OPEN) {
+ if (access_type != OS_FILE_READ_ONLY) {
create_flag = O_RDWR;
}
-
- } else if (read_only) {
-
- mode_str = "OPEN";
- create_flag = O_RDONLY;
-
} else if (create_mode == OS_FILE_CREATE) {
-
- mode_str = "CREATE";
create_flag = O_RDWR | O_CREAT | O_EXCL;
-
} else if (create_mode == OS_FILE_CREATE_PATH) {
-
- mode_str = "CREATE PATH";
/* Create subdirs along the path if needed. */
*success = os_file_create_subdirs_if_needed(name);
@@ -1040,40 +1023,38 @@ os_file_create_simple_func(
return(OS_FILE_CLOSED);
}
- bool retry;
+ create_flag |= O_CLOEXEC;
+ if (fil_system.is_write_through()) create_flag |= O_DSYNC;
+#ifdef O_DIRECT
+ int direct_flag = fil_system.is_buffered() ? 0 : O_DIRECT;
+#else
+ constexpr int direct_flag = 0;
+#endif
- do {
- file = open(name, create_flag | O_CLOEXEC, os_innodb_umask);
+ for (;;) {
+ file = open(name, create_flag | direct_flag, os_innodb_umask);
if (file == -1) {
+#ifdef O_DIRECT
+ if (direct_flag && errno == EINVAL) {
+ direct_flag = 0;
+ continue;
+ }
+#endif
+
*success = false;
- retry = os_file_handle_error(
+ if (!os_file_handle_error(
name,
create_mode == OS_FILE_OPEN
- ? "open" : "create");
+ ? "open" : "create")) {
+ break;
+ }
} else {
*success = true;
- retry = false;
- }
-
- } while (retry);
-
- /* This function is always called for data files, we should disable
- OS caching (O_DIRECT) here as we do in os_file_create_func(), so
- we open the same file in the same mode, see man page of open(2). */
- if (!srv_read_only_mode && *success) {
- switch (srv_file_flush_method) {
- case SRV_O_DSYNC:
- case SRV_O_DIRECT:
- case SRV_O_DIRECT_NO_FSYNC:
- os_file_set_nocache(file, name, mode_str);
- break;
- default:
break;
}
}
-#ifndef _WIN32
if (!read_only
&& *success
&& access_type == OS_FILE_READ_WRITE
@@ -1084,7 +1065,6 @@ os_file_create_simple_func(
close(file);
file = -1;
}
-#endif /* !_WIN32 */
return(file);
}
@@ -1156,8 +1136,10 @@ os_file_create_func(
return(OS_FILE_CLOSED);
);
- int create_flag;
- const char* mode_str = NULL;
+ int create_flag = O_RDONLY | O_CLOEXEC;
+#ifdef O_DIRECT
+ const char* mode_str = "OPEN";
+#endif
on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
? true : false;
@@ -1167,30 +1149,21 @@ os_file_create_func(
create_mode &= ulint(~(OS_FILE_ON_ERROR_NO_EXIT
| OS_FILE_ON_ERROR_SILENT));
- if (create_mode == OS_FILE_OPEN
- || create_mode == OS_FILE_OPEN_RAW
- || create_mode == OS_FILE_OPEN_RETRY) {
-
- mode_str = "OPEN";
-
- create_flag = read_only ? O_RDONLY : O_RDWR;
-
- } else if (read_only) {
-
- mode_str = "OPEN";
-
- create_flag = O_RDONLY;
-
+ if (read_only) {
+ } else if (create_mode == OS_FILE_OPEN
+ || create_mode == OS_FILE_OPEN_RAW
+ || create_mode == OS_FILE_OPEN_RETRY) {
+ create_flag = O_RDWR | O_CLOEXEC;
} else if (create_mode == OS_FILE_CREATE) {
-
+#ifdef O_DIRECT
mode_str = "CREATE";
- create_flag = O_RDWR | O_CREAT | O_EXCL;
-
+#endif
+ create_flag = O_RDWR | O_CREAT | O_EXCL | O_CLOEXEC;
} else if (create_mode == OS_FILE_OVERWRITE) {
-
+#ifdef O_DIRECT
mode_str = "OVERWRITE";
- create_flag = O_RDWR | O_CREAT | O_TRUNC;
-
+#endif
+ create_flag = O_RDWR | O_CREAT | O_TRUNC | O_CLOEXEC;
} else {
ib::error()
<< "Unknown file create mode (" << create_mode << ")"
@@ -1205,25 +1178,36 @@ os_file_create_func(
ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL);
- /* We let O_DSYNC only affect log files */
+ create_flag |= O_CLOEXEC;
- if (!read_only
- && type == OS_LOG_FILE
- && srv_file_flush_method == SRV_O_DSYNC) {
-#ifdef O_DSYNC
- create_flag |= O_DSYNC;
+#ifdef O_DIRECT
+ int direct_flag = type == OS_DATA_FILE && create_mode != OS_FILE_CREATE
+ && !fil_system.is_buffered()
+ ? O_DIRECT : 0;
#else
- create_flag |= O_SYNC;
+ constexpr int direct_flag = 0;
#endif
+
+ if (read_only) {
+ } else if ((type == OS_LOG_FILE)
+ ? log_sys.log_write_through
+ : fil_system.is_write_through()) {
+ create_flag |= O_DSYNC;
}
os_file_t file;
- bool retry;
- do {
- file = open(name, create_flag | O_CLOEXEC, os_innodb_umask);
+ for (;;) {
+ file = open(name, create_flag | direct_flag, os_innodb_umask);
if (file == -1) {
+#ifdef O_DIRECT
+ if (direct_flag && errno == EINVAL) {
+ direct_flag = 0;
+ continue;
+ }
+#endif
+
const char* operation;
operation = (create_mode == OS_FILE_CREATE
@@ -1232,39 +1216,30 @@ os_file_create_func(
*success = false;
if (on_error_no_exit) {
- retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent);
+ if (os_file_handle_error_no_exit(
+ name, operation, on_error_silent))
+ continue;
} else {
- retry = os_file_handle_error(name, operation);
+ if (os_file_handle_error(name, operation))
+ continue;
}
+
+ return file;
} else {
*success = true;
- retry = false;
+ break;
}
-
- } while (retry);
-
- if (!*success) {
- return file;
}
#if (defined __sun__ && defined DIRECTIO_ON) || defined O_DIRECT
- if (type == OS_DATA_FILE) {
- switch (srv_file_flush_method) {
- case SRV_O_DSYNC:
- case SRV_O_DIRECT:
- case SRV_O_DIRECT_NO_FSYNC:
+ if (type == OS_DATA_FILE && create_mode == OS_FILE_CREATE
+ && !fil_system.is_buffered()) {
# ifdef __linux__
use_o_direct:
# endif
- os_file_set_nocache(file, name, mode_str);
- break;
- default:
- break;
- }
- }
+ os_file_set_nocache(file, name, mode_str);
# ifdef __linux__
- else if (type == OS_LOG_FILE && !log_sys.is_opened()) {
+ } else if (type == OS_LOG_FILE && !log_sys.is_opened()) {
struct stat st;
char b[20 + sizeof "/sys/dev/block/" ":"
"/../queue/physical_block_size"];
@@ -1316,11 +1291,10 @@ skip_o_direct:
log_sys.log_buffered= true;
log_sys.set_block_size(512);
}
- }
# endif
+ }
#endif
-#ifndef _WIN32
if (!read_only
&& create_mode != OS_FILE_OPEN_RAW
&& !my_disable_locking
@@ -1348,7 +1322,6 @@ skip_o_direct:
close(file);
file = -1;
}
-#endif /* !_WIN32 */
return(file);
}
@@ -1786,6 +1759,9 @@ Flushes the write buffers of a given file to the disk.
@return true if success */
bool os_file_flush_func(os_file_t file)
{
+ if (UNIV_UNLIKELY(my_disable_sync))
+ return true;
+
++os_n_fsyncs;
static bool disable_datasync;
@@ -2011,6 +1987,11 @@ os_file_create_simple_func(
return(OS_FILE_CLOSED);
}
+ if (fil_system.is_write_through())
+ attributes |= FILE_FLAG_WRITE_THROUGH;
+ if (!fil_system.is_buffered())
+ attributes |= FILE_FLAG_NO_BUFFERING;
+
bool retry;
do {
@@ -2182,27 +2163,16 @@ os_file_create_func(
if (!log_sys.is_opened() && !log_sys.log_buffered) {
attributes|= FILE_FLAG_NO_BUFFERING;
}
- if (srv_file_flush_method == SRV_O_DSYNC)
+ if (log_sys.log_write_through)
attributes|= FILE_FLAG_WRITE_THROUGH;
- }
- else if (type == OS_DATA_FILE)
- {
- switch (srv_file_flush_method)
- {
- case SRV_FSYNC:
- case SRV_LITTLESYNC:
- case SRV_NOSYNC:
- break;
- default:
+ } else {
+ if (type == OS_DATA_FILE && !fil_system.is_buffered())
attributes|= FILE_FLAG_NO_BUFFERING;
- }
+ if (fil_system.is_write_through())
+ attributes|= FILE_FLAG_WRITE_THROUGH;
}
- DWORD access = GENERIC_READ;
-
- if (!read_only) {
- access |= GENERIC_WRITE;
- }
+ DWORD access = read_only ? GENERIC_READ : GENERIC_READ | GENERIC_WRITE;
for (;;) {
const char *operation;
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc
index b019694b9f6..8d3a44d630d 100644
--- a/storage/innobase/page/page0cur.cc
+++ b/storage/innobase/page/page0cur.cc
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2018, 2022, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1369,8 +1369,7 @@ page_cur_insert_rec_low(
ut_ad(!!page_is_comp(block->page.frame) == !!rec_offs_comp(offsets));
ut_ad(fil_page_index_page_check(block->page.frame));
ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + block->page.frame) ==
- index->id ||
- mtr->is_inside_ibuf());
+ index->id || index->is_dummy);
ut_ad(page_dir_get_n_slots(block->page.frame) >= 2);
ut_ad(!page_rec_is_supremum(cur->rec));
@@ -1769,11 +1768,6 @@ static inline void page_zip_dir_add_slot(buf_block_t *block,
Inserts a record next to page cursor on a compressed and uncompressed
page.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return pointer to inserted record
@return nullptr on failure */
rec_t*
@@ -1797,8 +1791,7 @@ page_cur_insert_rec_zip(
ut_ad(rec_offs_comp(offsets));
ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX ||
fil_page_get_type(page) == FIL_PAGE_RTREE);
- ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + page) ==
- index->id || mtr->is_inside_ibuf());
+ ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + page) == index->id);
ut_ad(!page_get_instant(page));
ut_ad(!page_cur_is_after_last(cursor));
#ifdef UNIV_ZIP_DEBUG
@@ -2265,8 +2258,7 @@ page_cur_delete_rec(
== index->table->not_redundant());
ut_ad(fil_page_index_page_check(block->page.frame));
ut_ad(mach_read_from_8(PAGE_HEADER + PAGE_INDEX_ID + block->page.frame)
- == index->id
- || mtr->is_inside_ibuf());
+ == index->id);
ut_ad(mtr->is_named_space(index->table->space));
/* The record must not be the supremum or infimum record. */
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index 258d47a5451..1060e702db4 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -336,17 +336,13 @@ page_create_zip(
/* PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC are always 0 for
temporary tables. */
ut_ad(max_trx_id == 0 || !index->table->is_temporary());
- /* In secondary indexes and the change buffer, PAGE_MAX_TRX_ID
+ /* In secondary indexes, PAGE_MAX_TRX_ID
must be zero on non-leaf pages. max_trx_id can be 0 when the
- index consists of an empty root (leaf) page. */
- ut_ad(max_trx_id == 0
- || level == 0
- || !dict_index_is_sec_or_ibuf(index)
- || index->table->is_temporary());
- /* In the clustered index, PAGE_ROOT_AUTOINC or
+ index consists of an empty root (leaf) page.
+
+ the clustered index, PAGE_ROOT_AUTOINC or
PAGE_MAX_TRX_ID must be 0 on other pages than the root. */
- ut_ad(level == 0 || max_trx_id == 0
- || !dict_index_is_sec_or_ibuf(index)
+ ut_ad(max_trx_id == 0 || level == 0 || index->is_primary()
|| index->table->is_temporary());
buf_block_modify_clock_inc(block);
@@ -390,8 +386,7 @@ page_create_empty(
same temp-table in parallel.
max_trx_id is ignored for temp tables because it not required
for MVCC. */
- if (dict_index_is_sec_or_ibuf(index)
- && !index->table->is_temporary()
+ if (!index->is_primary() && !index->table->is_temporary()
&& page_is_leaf(block->page.frame)) {
max_trx_id = page_get_max_trx_id(block->page.frame);
ut_ad(max_trx_id);
@@ -435,11 +430,6 @@ page_create_empty(
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page or compress the page.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return error code */
dberr_t
page_copy_rec_list_end_no_locks(
@@ -507,11 +497,6 @@ Copies records from page to new_page, from a given record onward,
including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_t::commit().
-
@return pointer to the original successor of the infimum record on new_block
@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
rec_t*
@@ -603,8 +588,7 @@ err_exit:
same temp-table in parallel.
max_trx_id is ignored for temp tables because it not required
for MVCC. */
- if (dict_index_is_sec_or_ibuf(index)
- && page_is_leaf(page)
+ if (!index->is_primary() && page_is_leaf(page)
&& !index->table->is_temporary()) {
ut_ad(!was_empty || page_dir_get_n_heap(new_page)
== PAGE_HEAP_NO_USER_LOW
@@ -677,11 +661,6 @@ Copies records from page to new_page, up to the given record,
NOT including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page.
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
@return pointer to the original predecessor of the supremum record on new_block
@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */
rec_t*
@@ -2057,7 +2036,7 @@ func_exit2:
max_trx_id is ignored for temp tables because it not required
for MVCC. */
if (!page_is_leaf(page) || page_is_empty(page)
- || !dict_index_is_sec_or_ibuf(index)
+ || index->is_primary()
|| index->table->is_temporary()) {
} else if (trx_id_t sys_max_trx_id = trx_sys.get_max_trx_id()) {
trx_id_t max_trx_id = page_get_max_trx_id(page);
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index 56b58dd87d0..aff01764be6 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -2,7 +2,7 @@
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -408,8 +408,6 @@ inline void mtr_t::zmemcpy(const buf_block_t &b, void *dest, const void *str,
static void page_zip_compress_write_log(buf_block_t *block,
dict_index_t *index, mtr_t *mtr)
{
- ut_ad(!index->is_ibuf());
-
if (!mtr->is_logged())
return;
@@ -463,8 +461,7 @@ page_zip_get_n_prev_extern(
ut_ad(page_is_leaf(page));
ut_ad(page_is_comp(page));
ut_ad(dict_table_is_comp(index->table));
- ut_ad(dict_index_is_clust(index));
- ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(index->is_primary());
heap_no = rec_get_heap_no_new(rec);
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
@@ -1282,7 +1279,6 @@ page_zip_compress(
ut_ad(page_simple_validate_new((page_t*) page));
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(dict_table_is_comp(index->table));
- ut_ad(!dict_index_is_ibuf(index));
MEM_CHECK_DEFINED(page, srv_page_size);
@@ -4374,10 +4370,6 @@ Reorganize and compress a page. This is a low-level operation for
compressed pages, to be used when page_zip_compress() fails.
On success, redo log will be written.
The function btr_page_reorganize() should be preferred whenever possible.
-IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
-non-clustered index, the caller must update the insert buffer free
-bits in the same mini-transaction in such a way that the modification
-will be redo-logged.
@return error code
@retval DB_FAIL on overflow; the block_zip will be left intact */
dberr_t
@@ -4398,7 +4390,6 @@ page_zip_reorganize(
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
ut_ad(block->page.zip.data);
ut_ad(page_is_comp(page));
- ut_ad(!dict_index_is_ibuf(index));
ut_ad(!index->table->is_temporary());
/* Note that page_zip_validate(page_zip, page, index) may fail here. */
MEM_CHECK_DEFINED(page, srv_page_size);
@@ -4505,7 +4496,6 @@ page_zip_copy_recs(
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr->memo_contains_page_flagged(src, MTR_MEMO_PAGE_X_FIX));
- ut_ad(!dict_index_is_ibuf(index));
ut_ad(!index->table->is_temporary());
#ifdef UNIV_ZIP_DEBUG
/* The B-tree operations that call this function may set
diff --git a/storage/innobase/rem/rem0cmp.cc b/storage/innobase/rem/rem0cmp.cc
index c2b2bc7120d..e48cad01530 100644
--- a/storage/innobase/rem/rem0cmp.cc
+++ b/storage/innobase/rem/rem0cmp.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2020, 2022, MariaDB Corporation.
+Copyright (c) 2020, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -401,8 +401,8 @@ int cmp_dtuple_rec_with_match_low(const dtuple_t *dtuple, const rec_t *rec,
ut_ad(!dfield_is_ext(dtuple_field));
- ret = cmp_data(type->mtype, type->prtype, !index->is_ibuf()
- && index->fields[cur_field].descending,
+ ret = cmp_data(type->mtype, type->prtype,
+ index->fields[cur_field].descending,
dtuple_b_ptr, dtuple_f_len,
rec_b_ptr, rec_f_len);
if (ret) {
@@ -480,7 +480,6 @@ cmp_dtuple_rec_with_match_bytes(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!(REC_INFO_MIN_REC_FLAG
& dtuple_get_info_bits(dtuple)));
- ut_ad(!index->is_ibuf());
if (UNIV_UNLIKELY(REC_INFO_MIN_REC_FLAG
& rec_get_info_bits(rec, rec_offs_comp(offsets)))) {
@@ -832,32 +831,21 @@ cmp_rec_rec(
dict_index_get_n_unique_in_tree(index));
for (; cur_field < n_fields; cur_field++) {
- ulint mtype;
- ulint prtype;
- bool descending;
-
- if (UNIV_UNLIKELY(dict_index_is_ibuf(index))) {
- /* This is for the insert buffer B-tree. */
- mtype = DATA_BINARY;
+ const dict_field_t* field = dict_index_get_nth_field(
+ index, cur_field);
+ bool descending = field->descending;
+ ulint mtype = field->col->mtype;
+ ulint prtype = field->col->prtype;
+
+ if (UNIV_LIKELY(!index->is_spatial())) {
+ } else if (cur_field == 0) {
+ ut_ad(DATA_GEOMETRY_MTYPE(mtype));
+ prtype |= DATA_GIS_MBR;
+ } else if (!page_rec_is_leaf(rec2)) {
+ /* Compare the child page number. */
+ ut_ad(cur_field == 1);
+ mtype = DATA_SYS_CHILD;
prtype = 0;
- descending = false;
- } else {
- const dict_field_t* field = dict_index_get_nth_field(
- index, cur_field);
- descending = field->descending;
- mtype = field->col->mtype;
- prtype = field->col->prtype;
-
- if (UNIV_LIKELY(!dict_index_is_spatial(index))) {
- } else if (cur_field == 0) {
- ut_ad(DATA_GEOMETRY_MTYPE(mtype));
- prtype |= DATA_GIS_MBR;
- } else if (!page_rec_is_leaf(rec2)) {
- /* Compare the child page number. */
- ut_ad(cur_field == 1);
- mtype = DATA_SYS_CHILD;
- prtype = 0;
- }
}
/* We should never encounter an externally stored field.
diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc
index 98cf2dda900..f489669b408 100644
--- a/storage/innobase/rem/rem0rec.cc
+++ b/storage/innobase/rem/rem0rec.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -476,7 +476,7 @@ rec_offs_make_valid(
const bool is_alter_metadata = leaf
&& rec_is_alter_metadata(rec, *index);
ut_ad((leaf && rec_is_metadata(rec, *index))
- || index->is_dummy || index->is_ibuf()
+ || index->is_dummy
|| (leaf
? rec_offs_n_fields(offsets)
<= dict_index_get_n_fields(index)
@@ -878,18 +878,15 @@ rec_get_offsets_func(
/* The infimum and supremum records carry 1 field. */
ut_ad(is_user_rec || n == 1);
ut_ad(!is_user_rec || n_core || index->is_dummy
- || dict_index_is_ibuf(index)
|| n == n_fields /* dict_stats_analyze_index_level() */
|| n - 1
== dict_index_get_n_unique_in_tree_nonleaf(index));
ut_ad(!is_user_rec || !n_core || index->is_dummy
- || dict_index_is_ibuf(index)
|| n == n_fields /* btr_pcur_restore_position() */
|| (n + (index->id == DICT_INDEXES_ID) >= n_core));
if (is_user_rec && n_core && n < index->n_fields) {
ut_ad(!index->is_dummy);
- ut_ad(!dict_index_is_ibuf(index));
n = index->n_fields;
}
}
@@ -1968,7 +1965,7 @@ rec_copy_prefix_to_buf(
or NULL */
ulint* buf_size) /*!< in/out: buffer size */
{
- ut_ad(n_fields <= index->n_fields || dict_index_is_ibuf(index));
+ ut_ad(n_fields <= index->n_fields);
ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable));
UNIV_PREFETCH_RW(*buf);
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 2dcc16130c3..5d7ea475d43 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -2074,7 +2074,7 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
we no longer evict the pages on DISCARD TABLESPACE. */
buf_page_get_low(block->page.id(), get_zip_size(), RW_NO_LATCH,
nullptr, BUF_PEEK_IF_IN_POOL,
- nullptr, nullptr, false);
+ nullptr, nullptr);
uint16_t page_type;
@@ -2112,8 +2112,9 @@ row_import_cleanup(
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
dberr_t err) /*!< in: error code */
{
+ dict_table_t* table = prebuilt->table;
+
if (err != DB_SUCCESS) {
- dict_table_t* table = prebuilt->table;
table->file_unreadable = true;
if (table->space) {
fil_close_tablespace(table->space_id);
@@ -2144,7 +2145,25 @@ row_import_cleanup(
DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
- return(err);
+ if (err != DB_SUCCESS
+ || !dict_table_get_first_index(table)->is_gen_clust()) {
+ return err;
+ }
+
+ btr_cur_t cur;
+ mtr_t mtr;
+ mtr.start();
+ err = cur.open_leaf(false, dict_table_get_first_index(table),
+ BTR_SEARCH_LEAF, &mtr);
+ if (err != DB_SUCCESS) {
+ } else if (const rec_t *rec =
+ page_rec_get_prev(btr_cur_get_rec(&cur))) {
+ if (page_rec_is_user_rec(rec))
+ table->row_id= mach_read_from_6(rec);
+ }
+ mtr.commit();
+
+ return err;
}
/*****************************************************************//**
@@ -2280,55 +2299,6 @@ row_import_adjust_root_pages_of_secondary_indexes(
}
/*****************************************************************//**
-Ensure that dict_sys.row_id exceeds SELECT MAX(DB_ROW_ID). */
-MY_ATTRIBUTE((nonnull)) static
-void
-row_import_set_sys_max_row_id(
-/*==========================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
- handler */
- const dict_table_t* table) /*!< in: table to import */
-{
- const rec_t* rec;
- mtr_t mtr;
- btr_pcur_t pcur;
- row_id_t row_id = 0;
- dict_index_t* index;
-
- index = dict_table_get_first_index(table);
- ut_ad(index->is_primary());
- ut_ad(dict_index_is_auto_gen_clust(index));
-
- mtr_start(&mtr);
-
- mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-
- if (pcur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr)
- == DB_SUCCESS) {
- rec = btr_pcur_move_to_prev_on_page(&pcur);
-
- if (!rec) {
- /* The table is corrupted. */
- } else if (page_rec_is_infimum(rec)) {
- /* The table is empty. */
- } else if (rec_is_metadata(rec, *index)) {
- /* The clustered index contains the metadata
- record only, that is, the table is empty. */
- } else {
- row_id = mach_read_from_6(rec);
- }
- }
-
- mtr_commit(&mtr);
-
- if (row_id) {
- /* Update the system row id if the imported index row id is
- greater than the max system row id. */
- dict_sys.update_row_id(row_id);
- }
-}
-
-/*****************************************************************//**
Read the a string from the meta data file.
@return DB_SUCCESS or error code. */
static
@@ -4259,8 +4229,6 @@ row_import_for_mysql(
ut_ad(trx->state == TRX_STATE_ACTIVE);
ut_ad(!table->is_readable());
- ibuf_delete_for_discarded_space(table->space_id);
-
/* Assign an undo segment for the transaction, so that the
transaction will be recovered after a crash. */
@@ -4459,12 +4427,6 @@ row_import_for_mysql(
ut_free(filepath);
- if (err == DB_SUCCESS) {
- err = ibuf_check_bitmap_on_import(trx, table->space);
- }
-
- DBUG_EXECUTE_IF("ib_import_check_bitmap_failure", err = DB_CORRUPTION;);
-
if (err != DB_SUCCESS) {
return row_import_cleanup(prebuilt, err);
}
@@ -4521,13 +4483,6 @@ row_import_for_mysql(
return row_import_error(prebuilt, err);
}
- /* Ensure that the next available DB_ROW_ID is not smaller than
- any DB_ROW_ID stored in the table. */
-
- if (prebuilt->clust_index_was_generated) {
- row_import_set_sys_max_row_id(prebuilt, table);
- }
-
ib::info() << "Phase III - Flush changes to disk";
/* Ensure that all pages dirtied during the IMPORT make it to disk.
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 4be1aa6c82c..24fb6eb39ce 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -31,7 +31,6 @@ Created 4/20/1996 Heikki Tuuri
#include "btr0btr.h"
#include "btr0cur.h"
#include "mach0data.h"
-#include "ibuf0ibuf.h"
#include "que0que.h"
#include "row0upd.h"
#include "row0sel.h"
@@ -2732,8 +2731,6 @@ err_exit:
page_set_autoinc(root, auto_inc, &mtr, false);
}
- btr_pcur_get_btr_cur(&pcur)->thr = thr;
-
#ifdef UNIV_DEBUG
{
page_t* page = btr_pcur_get_page(&pcur);
@@ -3008,7 +3005,6 @@ row_ins_sec_index_entry_low(
ut_ad(!dict_index_is_clust(index));
ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_INSERT_TREE);
- cursor.thr = thr;
cursor.rtr_info = NULL;
cursor.page_cur.index = index;
ut_ad(thr_get_trx(thr)->id != 0);
@@ -3030,9 +3026,10 @@ row_ins_sec_index_entry_low(
if (index->is_spatial()) {
rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
+ rtr_info.thr = thr;
rtr_info_update_btr(&cursor, &rtr_info);
- err = rtr_insert_leaf(&cursor, entry, search_mode, &mtr);
+ err = rtr_insert_leaf(&cursor, thr, entry, search_mode, &mtr);
if (err == DB_SUCCESS && search_mode == BTR_MODIFY_LEAF
&& rtr_info.mbr_adj) {
@@ -3041,6 +3038,7 @@ row_ins_sec_index_entry_low(
rtr_clean_rtr_info(&rtr_info, true);
rtr_init_rtr_info(&rtr_info, false, &cursor,
index, false);
+ rtr_info.thr = thr;
rtr_info_update_btr(&cursor, &rtr_info);
mtr.start();
if (index->table->is_temporary()) {
@@ -3048,7 +3046,7 @@ row_ins_sec_index_entry_low(
} else {
index->set_modified(mtr);
}
- err = rtr_insert_leaf(&cursor, entry,
+ err = rtr_insert_leaf(&cursor, thr, entry,
search_mode, &mtr);
}
@@ -3057,14 +3055,6 @@ row_ins_sec_index_entry_low(
goto func_exit;});
} else {
- if (!index->table->is_temporary()) {
- search_mode = btr_latch_mode(
- search_mode
- | (thr_get_trx(thr)->check_unique_secondary
- ? BTR_INSERT | BTR_IGNORE_SEC_UNIQUE
- : BTR_INSERT));
- }
-
err = cursor.search_leaf(entry, PAGE_CUR_LE, search_mode,
&mtr);
}
@@ -3076,12 +3066,6 @@ row_ins_sec_index_entry_low(
goto func_exit;
}
- if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
- ut_ad(!dict_index_is_spatial(index));
- /* The insert was buffered during the search: we are done */
- goto func_exit;
- }
-
#ifdef UNIV_DEBUG
{
page_t* page = btr_cur_get_page(&cursor);
@@ -3141,13 +3125,9 @@ row_ins_sec_index_entry_low(
locked with s-locks the necessary records to
prevent any insertion of a duplicate by another
transaction. Let us now reposition the cursor and
- continue the insertion (bypassing the change buffer). */
- err = cursor.search_leaf(
- entry, PAGE_CUR_LE,
- btr_latch_mode(search_mode
- & ~(BTR_INSERT
- | BTR_IGNORE_SEC_UNIQUE)),
- &mtr);
+ continue the insertion. */
+ err = cursor.search_leaf(entry, PAGE_CUR_LE, search_mode,
+ &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
}
@@ -3378,11 +3358,6 @@ row_ins_sec_index_entry(
if (err == DB_FAIL) {
mem_heap_empty(heap);
- if (index->table->space == fil_system.sys_space
- && !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
- ibuf_free_excess_pages();
- }
-
/* Try then pessimistic descent to the B-tree */
log_free_check();
@@ -3606,19 +3581,6 @@ row_ins_index_entry_step(
}
/***********************************************************//**
-Allocates a row id for row and inits the node->index field. */
-UNIV_INLINE
-void
-row_ins_alloc_row_id_step(
-/*======================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
- if (dict_table_get_first_index(node->table)->is_gen_clust())
- dict_sys_write_row_id(node->sys_buf, dict_sys.get_new_row_id());
-}
-
-/***********************************************************//**
Gets a row to insert from the values list. */
UNIV_INLINE
void
@@ -3698,13 +3660,18 @@ row_ins(
DBUG_PRINT("row_ins", ("table: %s", node->table->name.m_name));
if (node->state == INS_NODE_ALLOC_ROW_ID) {
-
- row_ins_alloc_row_id_step(node);
-
node->index = dict_table_get_first_index(node->table);
ut_ad(node->entry_list.empty() == false);
node->entry = node->entry_list.begin();
+ if (node->index->is_gen_clust()) {
+ const uint64_t db_row_id{++node->table->row_id};
+ if (db_row_id >> 48) {
+ DBUG_RETURN(DB_OUT_OF_FILE_SPACE);
+ }
+ mach_write_to_6(node->sys_buf, db_row_id);
+ }
+
if (node->ins_type == INS_SEARCHED) {
row_ins_get_row_from_select(node);
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 3302bf934da..010b347c003 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -1701,22 +1701,6 @@ err_exit:
if (error) {
goto err_exit;
}
-#ifdef UNIV_DEBUG
- switch (btr_pcur_get_btr_cur(pcur)->flag) {
- case BTR_CUR_DELETE_REF:
- case BTR_CUR_DEL_MARK_IBUF:
- case BTR_CUR_DELETE_IBUF:
- case BTR_CUR_INSERT_TO_IBUF:
- /* We did not request buffering. */
- break;
- case BTR_CUR_HASH:
- case BTR_CUR_HASH_FAIL:
- case BTR_CUR_BINARY:
- goto flag_ok;
- }
- ut_ad(0);
-flag_ok:
-#endif /* UNIV_DEBUG */
if (page_rec_is_infimum(btr_pcur_get_rec(pcur))
|| btr_pcur_get_low_match(pcur) < index->n_uniq) {
@@ -1724,8 +1708,8 @@ flag_ok:
found, because new_table is being modified by
this thread only, and all indexes should be
updated in sync. */
- mtr->commit();
- return(DB_INDEX_CORRUPT);
+ error = DB_INDEX_CORRUPT;
+ goto err_exit;
}
btr_cur_pessimistic_delete(&error, FALSE,
@@ -1785,22 +1769,6 @@ row_log_table_apply_delete(
if (err != DB_SUCCESS) {
goto all_done;
}
-#ifdef UNIV_DEBUG
- switch (btr_pcur_get_btr_cur(&pcur)->flag) {
- case BTR_CUR_DELETE_REF:
- case BTR_CUR_DEL_MARK_IBUF:
- case BTR_CUR_DELETE_IBUF:
- case BTR_CUR_INSERT_TO_IBUF:
- /* We did not request buffering. */
- break;
- case BTR_CUR_HASH:
- case BTR_CUR_HASH_FAIL:
- case BTR_CUR_BINARY:
- goto flag_ok;
- }
- ut_ad(0);
-flag_ok:
-#endif /* UNIV_DEBUG */
if (page_rec_is_infimum(btr_pcur_get_rec(&pcur))
|| btr_pcur_get_low_match(&pcur) < index->n_uniq) {
@@ -1934,19 +1902,6 @@ func_exit_committed:
return error;
}
-#ifdef UNIV_DEBUG
- switch (btr_pcur_get_btr_cur(&pcur)->flag) {
- case BTR_CUR_DELETE_REF:
- case BTR_CUR_DEL_MARK_IBUF:
- case BTR_CUR_DELETE_IBUF:
- case BTR_CUR_INSERT_TO_IBUF:
- ut_ad(0);/* We did not request buffering. */
- case BTR_CUR_HASH:
- case BTR_CUR_HASH_FAIL:
- case BTR_CUR_BINARY:
- break;
- }
-#endif /* UNIV_DEBUG */
ut_ad(!page_rec_is_infimum(btr_pcur_get_rec(&pcur))
&& btr_pcur_get_low_match(&pcur) >= index->n_uniq);
@@ -2096,8 +2051,17 @@ func_exit_committed:
ut_free(pcur.old_rec_buf);
pcur.old_rec_buf = nullptr;
- if (ROW_FOUND != row_search_index_entry(
- entry, BTR_MODIFY_TREE, &pcur, &mtr)) {
+ error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_MODIFY_TREE,
+ &pcur, &mtr);
+
+ if (error != DB_SUCCESS) {
+ ut_ad(0);
+ break;
+ }
+
+ if (btr_pcur_is_before_first_on_page(&pcur)
+ || btr_pcur_get_low_match(&pcur)
+ != dtuple_get_n_fields(entry)) {
ut_ad(0);
error = DB_CORRUPTION;
break;
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index 5601a786555..5a16b5d8ec4 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -150,7 +150,7 @@ public:
false);
rtr_info_update_btr(&ins_cur, &rtr_info);
- error = rtr_insert_leaf(&ins_cur, dtuple,
+ error = rtr_insert_leaf(&ins_cur, nullptr, dtuple,
BTR_MODIFY_LEAF, &mtr);
/* It need to update MBR in parent entry,
@@ -163,7 +163,8 @@ public:
rtr_info_update_btr(&ins_cur, &rtr_info);
mtr.start();
index->set_modified(mtr);
- error = rtr_insert_leaf(&ins_cur, dtuple,
+ error = rtr_insert_leaf(&ins_cur, nullptr,
+ dtuple,
BTR_MODIFY_TREE, &mtr);
}
@@ -186,7 +187,8 @@ public:
&ins_cur, index, false);
rtr_info_update_btr(&ins_cur, &rtr_info);
- error = rtr_insert_leaf(&ins_cur, dtuple,
+ error = rtr_insert_leaf(&ins_cur, nullptr,
+ dtuple,
BTR_MODIFY_TREE, &mtr);
if (error == DB_SUCCESS) {
@@ -2221,7 +2223,7 @@ end_of_index:
next_page_no),
old_table->space->zip_size(),
RW_S_LATCH, nullptr, BUF_GET, &mtr,
- &err, false);
+ &err);
if (!block) {
goto err_exit;
}
@@ -3709,8 +3711,6 @@ row_merge_mtuple_to_dtuple(
dtuple_t* dtuple,
const mtuple_t* mtuple)
{
- ut_ad(!dict_index_is_ibuf(index));
-
memcpy(dtuple->fields, mtuple->fields,
dtuple->n_fields * sizeof *mtuple->fields);
}
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 549d2745223..81879431096 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2000, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -43,7 +43,6 @@ Created 9/17/2000 Heikki Tuuri
#include "fsp0file.h"
#include "fts0fts.h"
#include "fts0types.h"
-#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "log0log.h"
#include "pars0pars.h"
@@ -2334,12 +2333,7 @@ row_discard_tablespace(
2) Purge and rollback: we assign a new table id for the
table. Since purge and rollback look for the table based on
the table id, they see the table as 'dropped' and discard
- their operations.
-
- 3) Insert buffer: we remove all entries for the tablespace in
- the insert buffer tree. */
-
- ibuf_delete_for_discarded_space(table->space_id);
+ their operations. */
table_id_t new_id;
@@ -2442,9 +2436,8 @@ rollback:
/* Note: The following cannot be rolled back. Rollback would see the
UPDATE of SYS_INDEXES.TABLE_ID as two operations: DELETE and INSERT.
It would invoke btr_free_if_exists() when rolling back the INSERT,
- effectively dropping all indexes of the table. Furthermore, calls like
- ibuf_delete_for_discarded_space() are already discarding data
- before the transaction is committed.
+ effectively dropping all indexes of the table. Furthermore, we are
+ already discarding data before the transaction is committed.
It would be better to remove the integrity-breaking
ALTER TABLE...DISCARD TABLESPACE operation altogether. */
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index 65d26e0a733..0a2647e8d6d 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -273,10 +273,10 @@ not delete marked version of a clustered index record where DB_TRX_ID
is newer than the purge view.
NOTE: This function should only be called by the purge thread, only
-while holding a latch on the leaf page of the secondary index entry
-(or keeping the buffer pool watch on the page). It is possible that
-this function first returns true and then false, if a user transaction
-inserts a record that the secondary index entry would refer to.
+while holding a latch on the leaf page of the secondary index entry.
+It is possible that this function first returns true and then false,
+if a user transaction inserts a record that the secondary index entry
+would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
@@ -292,6 +292,7 @@ page latch.
@param[in] is_tree true=pessimistic purge,
false=optimistic (leaf-page only)
@return true if the secondary index record can be purged */
+static
bool
row_purge_poss_sec(
purge_node_t* node,
@@ -349,14 +350,11 @@ row_purge_remove_sec_if_poss_tree(
pcur.btr_cur.page_cur.index = index;
if (index->is_spatial()) {
- if (!rtr_search(entry, BTR_PURGE_TREE, &pcur, &mtr)) {
- goto found;
+ if (rtr_search(entry, BTR_PURGE_TREE, &pcur, nullptr, &mtr)) {
+ goto func_exit;
}
- goto func_exit;
- }
-
- switch (row_search_index_entry(entry, BTR_PURGE_TREE, &pcur, &mtr)) {
- case ROW_NOT_FOUND:
+ } else if (!row_search_index_entry(entry, BTR_PURGE_TREE,
+ &pcur, &mtr)) {
/* Not found. This is a legitimate condition. In a
rollback, InnoDB will remove secondary recs that would
be purged anyway. Then the actual purge will not find
@@ -366,25 +364,13 @@ row_purge_remove_sec_if_poss_tree(
index, it will remove it. Then if/when the purge
comes to consider the secondary index record a second
time, it will not exist any more in the index. */
-
- /* fputs("PURGE:........sec entry not found\n", stderr); */
- /* dtuple_print(stderr, entry); */
goto func_exit;
- case ROW_FOUND:
- break;
- case ROW_BUFFERED:
- case ROW_NOT_DELETED_REF:
- /* These are invalid outcomes, because the mode passed
- to row_search_index_entry() did not include any of the
- flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
- ut_error;
}
/* We should remove the index record if no later version of the row,
which cannot be purged yet, requires its existence. If some requires,
we should do nothing. */
-found:
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) {
/* Remove the index record, which should have been
@@ -453,24 +439,17 @@ row_purge_remove_sec_if_poss_leaf(
pcur.btr_cur.page_cur.index = index;
- /* Set the purge node for the call to row_purge_poss_sec(). */
- pcur.btr_cur.purge_node = node;
if (index->is_spatial()) {
- pcur.btr_cur.thr = NULL;
- if (!rtr_search(entry, BTR_MODIFY_LEAF, &pcur, &mtr)) {
+ if (!rtr_search(entry, BTR_MODIFY_LEAF, &pcur, nullptr,
+ &mtr)) {
goto found;
}
- goto func_exit;
- }
-
- /* Set the query thread, so that ibuf_insert_low() will be
- able to invoke thd_get_trx(). */
- pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node));
-
- switch (row_search_index_entry(entry, index->has_virtual()
- ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF,
- &pcur, &mtr)) {
- case ROW_FOUND:
+ } else if (btr_pcur_open(entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur,
+ &mtr)
+ == DB_SUCCESS
+ && !btr_pcur_is_before_first_on_page(&pcur)
+ && btr_pcur_get_low_match(&pcur)
+ == dtuple_get_n_fields(entry)) {
found:
/* Before attempting to purge a record, check
if it is safe to do so. */
@@ -499,25 +478,18 @@ found:
if (index->is_spatial()) {
const buf_block_t* block = btr_cur_get_block(
btr_cur);
+ const page_id_t id{block->page.id()};
- if (block->page.id().page_no()
- != index->page
+ if (id.page_no() != index->page
&& page_get_n_recs(block->page.frame) < 2
- && !lock_test_prdt_page_lock(
- btr_cur->rtr_info
- && btr_cur->rtr_info->thr
- ? thr_get_trx(
- btr_cur->rtr_info->thr)
- : nullptr,
- block->page.id())) {
+ && !lock_test_prdt_page_lock(nullptr, id)){
/* this is the last record on page,
and it has a "page" lock on it,
which mean search is still depending
on it, so do not delete */
DBUG_LOG("purge",
"skip purging last"
- " record on page "
- << block->page.id());
+ " record on page " << id);
goto func_exit;
}
}
@@ -525,25 +497,13 @@ found:
success = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
!= DB_FAIL;
}
+ }
- /* (The index entry is still needed,
- or the deletion succeeded) */
- /* fall through */
- case ROW_NOT_DELETED_REF:
- /* The index entry is still needed. */
- case ROW_BUFFERED:
- /* The deletion was buffered. */
- case ROW_NOT_FOUND:
- /* The index entry does not exist, nothing to do. */
func_exit:
- mtr.commit();
+ mtr.commit();
cleanup:
- btr_pcur_close(&pcur); // FIXME: do we need these? when is btr_cur->rtr_info set?
- return(success);
- }
-
- ut_error;
- return(false);
+ btr_pcur_close(&pcur);
+ return success;
}
/***********************************************************//**
@@ -596,10 +556,7 @@ Purges a delete marking of a record.
@retval false the purge needs to be suspended because of
running out of file space */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-row_purge_del_mark(
-/*===============*/
- purge_node_t* node) /*!< in/out: row purge node */
+bool row_purge_del_mark(purge_node_t *node)
{
if (node->index)
{
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index a4d634f2d14..059aee6f140 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2021, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,6 @@ Created 2012-02-08 by Sunny Bains.
#include "row0quiesce.h"
#include "row0mysql.h"
-#include "ibuf0ibuf.h"
#include "srv0start.h"
#include "trx0purge.h"
@@ -533,18 +532,6 @@ row_quiesce_table_start(
purge_sys.stop();
}
- for (ulint count = 0;
- ibuf_merge_space(table->space_id);
- ++count) {
- if (trx_is_interrupted(trx)) {
- goto aborted;
- }
- if (!(count % 20)) {
- ib::info() << "Merging change buffer entries for "
- << table->name;
- }
- }
-
while (buf_flush_list_space(table->space)) {
if (trx_is_interrupted(trx)) {
goto aborted;
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index 4a00b2a430e..a7cddee0b77 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -215,28 +215,20 @@ row_build_index_entry_low(
entry = dtuple_create(heap, entry_len);
}
- if (dict_index_is_ibuf(index)) {
- dtuple_set_n_fields_cmp(entry, entry_len);
- /* There may only be externally stored columns
- in a clustered index B-tree of a user table. */
- ut_a(!ext);
- } else {
- dtuple_set_n_fields_cmp(
- entry, dict_index_get_n_unique_in_tree(index));
- if (dict_index_is_spatial(index)) {
- /* Set the MBR field */
- if (!row_build_spatial_index_key(
- index, ext,
- dtuple_get_nth_field(entry, 0),
- dtuple_get_nth_field(
- row,
- dict_index_get_nth_field(index, i)
- ->col->ind), flag, heap)) {
- return NULL;
- }
-
- i = 1;
+ dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique_in_tree(index));
+ if (index->is_spatial()) {
+ /* Set the MBR field */
+ if (!row_build_spatial_index_key(
+ index, ext,
+ dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(
+ row,
+ dict_index_get_nth_field(index, i)
+ ->col->ind), flag, heap)) {
+ return NULL;
}
+
+ i = 1;
}
for (; i < entry_len; i++) {
@@ -1262,8 +1254,8 @@ row_get_clust_rec(
/***************************************************************//**
Searches an index record.
-@return whether the record was found or buffered */
-enum row_search_result
+@return whether the record was found */
+bool
row_search_index_entry(
/*===================*/
const dtuple_t* entry, /*!< in: index entry */
@@ -1272,47 +1264,14 @@ row_search_index_entry(
be closed by the caller */
mtr_t* mtr) /*!< in: mtr */
{
- ulint n_fields;
- ulint low_match;
- rec_t* rec;
-
ut_ad(dtuple_check_typed(entry));
if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) {
- return ROW_NOT_FOUND;
- }
-
- switch (btr_pcur_get_btr_cur(pcur)->flag) {
- case BTR_CUR_DELETE_REF:
- ut_ad(!(~mode & BTR_DELETE));
- return(ROW_NOT_DELETED_REF);
-
- case BTR_CUR_DEL_MARK_IBUF:
- case BTR_CUR_DELETE_IBUF:
- case BTR_CUR_INSERT_TO_IBUF:
- return(ROW_BUFFERED);
-
- case BTR_CUR_HASH:
- case BTR_CUR_HASH_FAIL:
- case BTR_CUR_BINARY:
- break;
- }
-
- low_match = btr_pcur_get_low_match(pcur);
-
- rec = btr_pcur_get_rec(pcur);
-
- n_fields = dtuple_get_n_fields(entry);
-
- if (page_rec_is_infimum(rec)) {
-
- return(ROW_NOT_FOUND);
- } else if (low_match != n_fields) {
-
- return(ROW_NOT_FOUND);
+ return false;
}
- return(ROW_FOUND);
+ return !btr_pcur_is_before_first_on_page(pcur)
+ && btr_pcur_get_low_match(pcur) == dtuple_get_n_fields(entry);
}
/*******************************************************************//**
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 9ef145236a8..fa7e129752a 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -4040,7 +4040,8 @@ row_search_idx_cond_check(
ut_ad(rec_offs_validate(rec, prebuilt->index, offsets));
if (!prebuilt->idx_cond) {
- if (!handler_rowid_filter_is_active(prebuilt->pk_filter)) {
+ if (!prebuilt->pk_filter ||
+ !handler_rowid_filter_is_active(prebuilt->pk_filter)) {
return(CHECK_POS);
}
} else {
@@ -4082,7 +4083,8 @@ row_search_idx_cond_check(
switch (result) {
case CHECK_POS:
- if (handler_rowid_filter_is_active(prebuilt->pk_filter)) {
+ if (prebuilt->pk_filter &&
+ handler_rowid_filter_is_active(prebuilt->pk_filter)) {
ut_ad(!prebuilt->index->is_primary());
if (prebuilt->clust_index_was_generated) {
ulint len;
@@ -4768,14 +4770,13 @@ wait_table_again:
}
} else if (dtuple_get_n_fields(search_tuple) > 0) {
- pcur->btr_cur.thr = thr;
pcur->old_rec = nullptr;
if (index->is_spatial()) {
if (!prebuilt->rtr_info) {
prebuilt->rtr_info = rtr_create_rtr_info(
- set_also_gap_locks, true,
- btr_pcur_get_btr_cur(pcur), index);
+ set_also_gap_locks, true, thr,
+ btr_pcur_get_btr_cur(pcur));
prebuilt->rtr_info->search_tuple = search_tuple;
prebuilt->rtr_info->search_mode = mode;
rtr_info_update_btr(btr_pcur_get_btr_cur(pcur),
@@ -4788,7 +4789,8 @@ wait_table_again:
prebuilt->rtr_info->search_mode = mode;
}
- err = rtr_search_leaf(pcur, search_tuple, mode, &mtr);
+ err = rtr_search_leaf(pcur, thr, search_tuple, mode,
+ &mtr);
} else {
err = btr_pcur_open_with_no_init(search_tuple, mode,
BTR_SEARCH_LEAF,
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 50196e78092..6b4393d4113 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -40,7 +40,6 @@ Created 2/25/1997 Heikki Tuuri
#include "row0row.h"
#include "row0upd.h"
#include "que0que.h"
-#include "ibuf0ibuf.h"
#include "log0log.h"
#include "fil0fil.h"
#include <mysql/service_thd_mdl.h>
@@ -266,7 +265,7 @@ row_undo_ins_remove_sec_low(
const bool modify_leaf = mode == BTR_MODIFY_LEAF;
pcur.btr_cur.page_cur.index = index;
- row_mtr_start(&mtr, index, !modify_leaf);
+ row_mtr_start(&mtr, index);
if (index->is_spatial()) {
mode = modify_leaf
@@ -274,8 +273,7 @@ row_undo_ins_remove_sec_low(
| BTR_RTREE_DELETE_MARK
| BTR_RTREE_UNDO_INS)
: btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS);
- btr_pcur_get_btr_cur(&pcur)->thr = thr;
- if (rtr_search(entry, mode, &pcur, &mtr)) {
+ if (rtr_search(entry, mode, &pcur, thr, &mtr)) {
goto func_exit;
}
@@ -296,28 +294,17 @@ row_undo_ins_remove_sec_low(
mtr_x_lock_index(index, &mtr);
}
- switch (row_search_index_entry(entry, mode, &pcur, &mtr)) {
- case ROW_BUFFERED:
- case ROW_NOT_DELETED_REF:
- /* These are invalid outcomes, because the mode passed
- to row_search_index_entry() did not include any of the
- flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
- ut_error;
- case ROW_NOT_FOUND:
- break;
- case ROW_FOUND:
- found:
- btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
-
+ if (row_search_index_entry(entry, mode, &pcur, &mtr)) {
+found:
if (modify_leaf) {
- err = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
+ err = btr_cur_optimistic_delete(&pcur.btr_cur, 0, &mtr);
} else {
/* Passing rollback=false here, because we are
deleting a secondary index record: the distinction
only matters when deleting a record that contains
externally stored columns. */
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- false, &mtr);
+ btr_cur_pessimistic_delete(&err, FALSE, &pcur.btr_cur,
+ 0, false, &mtr);
}
}
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 50e15e03cc9..63393d86502 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -33,7 +33,6 @@ Created 2/27/1997 Heikki Tuuri
#include "trx0purge.h"
#include "btr0btr.h"
#include "mach0data.h"
-#include "ibuf0ibuf.h"
#include "row0undo.h"
#include "row0vers.h"
#include "trx0trx.h"
@@ -491,7 +490,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
mtr_t mtr_vers;
const bool modify_leaf = mode == BTR_MODIFY_LEAF;
- row_mtr_start(&mtr, index, !modify_leaf);
+ row_mtr_start(&mtr, index);
pcur.btr_cur.page_cur.index = index;
btr_cur = btr_pcur_get_btr_cur(&pcur);
@@ -502,8 +501,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
| BTR_RTREE_DELETE_MARK
| BTR_RTREE_UNDO_INS)
: btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS);
- btr_cur->thr = thr;
- if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) {
+ if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, thr, &mtr))) {
goto found;
} else {
goto func_exit;
@@ -527,9 +525,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
ut_ad(!dict_index_is_online_ddl(index));
}
- switch (UNIV_EXPECT(row_search_index_entry(entry, mode, &pcur, &mtr),
- ROW_FOUND)) {
- case ROW_NOT_FOUND:
+ if (!row_search_index_entry(entry, mode, &pcur, &mtr)) {
/* In crash recovery, the secondary index record may
be missing if the UPDATE did not have time to insert
the secondary index records before the crash. When we
@@ -540,14 +536,6 @@ row_undo_mod_del_mark_or_remove_sec_low(
before it has inserted all updated secondary index
records, then the undo will not find those records. */
goto func_exit;
- case ROW_FOUND:
- break;
- case ROW_BUFFERED:
- case ROW_NOT_DELETED_REF:
- /* These are invalid outcomes, because the mode passed
- to row_search_index_entry() did not include any of the
- flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
- ut_error;
}
found:
@@ -685,12 +673,13 @@ row_undo_mod_del_unmark_sec_and_undo_update(
}
try_again:
- row_mtr_start(&mtr, index, mode & 8);
+ row_mtr_start(&mtr, index);
- btr_cur->thr = thr;
+ mem_heap_t* offsets_heap = nullptr;
+ rec_offs* offsets = nullptr;
if (index->is_spatial()) {
- if (!rtr_search(entry, mode, &pcur, &mtr)) {
+ if (!rtr_search(entry, mode, &pcur, thr, &mtr)) {
goto found;
}
@@ -704,17 +693,7 @@ try_again:
goto not_found;
}
- switch (row_search_index_entry(entry, mode, &pcur, &mtr)) {
- mem_heap_t* heap;
- mem_heap_t* offsets_heap;
- rec_offs* offsets;
- case ROW_BUFFERED:
- case ROW_NOT_DELETED_REF:
- /* These are invalid outcomes, because the mode passed
- to row_search_index_entry() did not include any of the
- flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
- ut_error;
- case ROW_NOT_FOUND:
+ if (!row_search_index_entry(entry, mode, &pcur, &mtr)) {
not_found:
if (btr_cur->up_match >= dict_index_get_n_unique(index)
|| btr_cur->low_match >= dict_index_get_n_unique(index)) {
@@ -726,7 +705,7 @@ not_found:
<< " at: " << rec_index_print(
btr_cur_get_rec(btr_cur), index);
err = DB_DUPLICATE_KEY;
- break;
+ goto func_exit;
}
ib::warn() << "Record in index " << index->name
@@ -740,8 +719,6 @@ not_found:
delete-unmark. */
big_rec_t* big_rec;
rec_t* insert_rec;
- offsets = NULL;
- offsets_heap = NULL;
err = btr_cur_optimistic_insert(
flags, btr_cur, &offsets, &offsets_heap,
@@ -770,16 +747,13 @@ not_found:
if (offsets_heap) {
mem_heap_free(offsets_heap);
}
-
- break;
- case ROW_FOUND:
+ } else {
found:
btr_rec_set_deleted<false>(btr_cur_get_block(btr_cur),
btr_cur_get_rec(btr_cur), &mtr);
- heap = mem_heap_create(
+ mem_heap_t* heap = mem_heap_create(
sizeof(upd_t)
+ dtuple_get_n_fields(entry) * sizeof(upd_field_t));
- offsets_heap = NULL;
offsets = rec_get_offsets(
btr_cur_get_rec(btr_cur),
index, nullptr, index->n_core_fields, ULINT_UNDEFINED,
@@ -818,6 +792,7 @@ found:
mem_heap_free(offsets_heap);
}
+func_exit:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index fe88fce58a2..15a0ebb277c 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -1838,9 +1838,7 @@ row_upd_sec_index_entry(
dict_index_t* index;
dberr_t err = DB_SUCCESS;
trx_t* trx = thr_get_trx(thr);
- btr_latch_mode mode;
ulint flags;
- enum row_search_result search_result;
ut_ad(trx->id != 0);
@@ -1868,7 +1866,6 @@ row_upd_sec_index_entry(
"before_row_upd_sec_index_entry");
mtr.start();
- mode = BTR_MODIFY_LEAF;
switch (index->table->space_id) {
case SRV_TMP_SPACE_ID:
@@ -1878,24 +1875,17 @@ row_upd_sec_index_entry(
default:
index->set_modified(mtr);
/* fall through */
- case IBUF_SPACE_ID:
+ case 0:
flags = index->table->no_rollback() ? BTR_NO_ROLLBACK : 0;
- /* We can only buffer delete-mark operations if there
- are no foreign key constraints referring to the index. */
- if (!referenced) {
- mode = BTR_DELETE_MARK_LEAF;
- }
- break;
}
- /* Set the query thread, so that ibuf_insert_low() will be
- able to invoke thd_get_trx(). */
- pcur.btr_cur.thr = thr;
pcur.btr_cur.page_cur.index = index;
+ const rec_t *rec;
if (index->is_spatial()) {
- mode = btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK);
- if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) {
+ constexpr btr_latch_mode mode = btr_latch_mode(
+ BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK);
+ if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, thr, &mtr))) {
goto found;
}
@@ -1905,20 +1895,8 @@ row_upd_sec_index_entry(
}
goto not_found;
- }
-
- search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
-
- switch (search_result) {
- const rec_t* rec;
- case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */
- ut_error;
- break;
- case ROW_BUFFERED:
- /* Entry was delete marked already. */
- break;
-
- case ROW_NOT_FOUND:
+ } else if (!row_search_index_entry(entry, BTR_MODIFY_LEAF,
+ &pcur, &mtr)) {
not_found:
rec = btr_pcur_get_rec(&pcur);
ib::error()
@@ -1932,8 +1910,7 @@ not_found:
ut_ad(btr_validate_index(index, 0) == DB_SUCCESS);
ut_ad(0);
#endif /* UNIV_DEBUG */
- break;
- case ROW_FOUND:
+ } else {
found:
ut_ad(err == DB_SUCCESS);
rec = btr_pcur_get_rec(&pcur);
@@ -1948,7 +1925,7 @@ found:
btr_pcur_get_block(&pcur),
btr_pcur_get_rec(&pcur), index, thr, &mtr);
if (err != DB_SUCCESS) {
- break;
+ goto close;
}
btr_rec_set_deleted<true>(btr_pcur_get_block(&pcur),
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index d0f96ece141..987d3d185d9 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -2,7 +2,7 @@
Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,6 @@ Created 12/9/2009 Jimmy Yang
#include "buf0buf.h"
#include "dict0mem.h"
-#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "mach0data.h"
#include "os0file.h"
@@ -527,23 +526,10 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_BUF_PAGE_READ("index_non_leaf","Index Non-leaf",
INDEX_NON_LEAF),
- MONITOR_BUF_PAGE_READ("index_ibuf_leaf", "Insert Buffer Index Leaf",
- INDEX_IBUF_LEAF),
-
- MONITOR_BUF_PAGE_READ("index_ibuf_non_leaf",
- "Insert Buffer Index Non-Leaf",
- INDEX_IBUF_NON_LEAF),
-
MONITOR_BUF_PAGE_READ("undo_log", "Undo Log", UNDO_LOG),
MONITOR_BUF_PAGE_READ("index_inode", "Index Inode", INODE),
- MONITOR_BUF_PAGE_READ("ibuf_free_list", "Insert Buffer Free List",
- IBUF_FREELIST),
-
- MONITOR_BUF_PAGE_READ("ibuf_bitmap", "Insert Buffer Bitmap",
- IBUF_BITMAP),
-
MONITOR_BUF_PAGE_READ("system_page", "System", SYSTEM),
MONITOR_BUF_PAGE_READ("trx_system", "Transaction System", TRX_SYSTEM),
@@ -566,23 +552,10 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_BUF_PAGE_WRITTEN("index_non_leaf","Index Non-leaf",
INDEX_NON_LEAF),
- MONITOR_BUF_PAGE_WRITTEN("index_ibuf_leaf", "Insert Buffer Index Leaf",
- INDEX_IBUF_LEAF),
-
- MONITOR_BUF_PAGE_WRITTEN("index_ibuf_non_leaf",
- "Insert Buffer Index Non-Leaf",
- INDEX_IBUF_NON_LEAF),
-
MONITOR_BUF_PAGE_WRITTEN("undo_log", "Undo Log", UNDO_LOG),
MONITOR_BUF_PAGE_WRITTEN("index_inode", "Index Inode", INODE),
- MONITOR_BUF_PAGE_WRITTEN("ibuf_free_list", "Insert Buffer Free List",
- IBUF_FREELIST),
-
- MONITOR_BUF_PAGE_WRITTEN("ibuf_bitmap", "Insert Buffer Bitmap",
- IBUF_BITMAP),
-
MONITOR_BUF_PAGE_WRITTEN("system_page", "System", SYSTEM),
MONITOR_BUF_PAGE_WRITTEN("trx_system", "Transaction System",
@@ -948,57 +921,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_N_FILE_OPENED},
- /* ========== Counters for Change Buffer ========== */
- {"module_ibuf_system", "change_buffer", "InnoDB Change Buffer",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_IBUF_SYSTEM},
-
- {"ibuf_merges_insert", "change_buffer",
- "Number of inserted records merged by change buffering",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_INSERT},
-
- {"ibuf_merges_delete_mark", "change_buffer",
- "Number of deleted records merged by change buffering",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DELETE},
-
- {"ibuf_merges_delete", "change_buffer",
- "Number of purge records merged by change buffering",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_PURGE},
-
- {"ibuf_merges_discard_insert", "change_buffer",
- "Number of insert merged operations discarded",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT},
-
- {"ibuf_merges_discard_delete_mark", "change_buffer",
- "Number of deleted merged operations discarded",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE},
-
- {"ibuf_merges_discard_delete", "change_buffer",
- "Number of purge merged operations discarded",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE},
-
- {"ibuf_merges", "change_buffer", "Number of change buffer merges",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGES},
-
- {"ibuf_size", "change_buffer", "Change buffer size in pages",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_SIZE},
-
/* ========== Counters for server operations ========== */
{"module_innodb", "innodb",
"Counter for general InnoDB server wide operations and properties",
@@ -1531,38 +1453,6 @@ srv_mon_process_existing_counter(
value = fil_system.n_open;
break;
- case MONITOR_OVLD_IBUF_MERGE_INSERT:
- value = ibuf.n_merged_ops[IBUF_OP_INSERT];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_DELETE:
- value = ibuf.n_merged_ops[IBUF_OP_DELETE_MARK];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_PURGE:
- value = ibuf.n_merged_ops[IBUF_OP_DELETE];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT:
- value = ibuf.n_discarded_ops[IBUF_OP_INSERT];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE:
- value = ibuf.n_discarded_ops[IBUF_OP_DELETE_MARK];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE:
- value = ibuf.n_discarded_ops[IBUF_OP_DELETE];
- break;
-
- case MONITOR_OVLD_IBUF_MERGES:
- value = ibuf.n_merges;
- break;
-
- case MONITOR_OVLD_IBUF_SIZE:
- value = ibuf.size;
- break;
-
case MONITOR_OVLD_SERVER_ACTIVITY:
value = srv_get_activity_count();
break;
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index c5ccb7ee43b..d8babd40468 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -3,7 +3,7 @@
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -48,7 +48,6 @@ Created 10/8/1995 Heikki Tuuri
#include "buf0lru.h"
#include "dict0boot.h"
#include "dict0load.h"
-#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "log0recv.h"
#include "mem0mem.h"
@@ -126,9 +125,9 @@ my_bool srv_read_only_mode;
/** store to its own file each table created by an user; data
dictionary tables are in the system tablespace 0 */
my_bool srv_file_per_table;
-/** Set if InnoDB operates in read-only mode or innodb-force-recovery
-is greater than SRV_FORCE_NO_TRX_UNDO. */
-my_bool high_level_read_only;
+/** Set if innodb_read_only is set or innodb_force_recovery
+is SRV_FORCE_NO_UNDO_LOG_SCAN or greater. */
+bool high_level_read_only;
/** Sort buffer size in index creation */
ulong srv_sort_buf_size;
@@ -219,13 +218,6 @@ in the buffer cache and accessed sequentially for InnoDB to trigger a
readahead request. */
ulong srv_read_ahead_threshold;
-/** innodb_change_buffer_max_size; maximum on-disk size of change
-buffer in terms of percentage of the buffer pool. */
-uint srv_change_buffer_max_size;
-
-ulong srv_file_flush_method;
-
-
/** copy of innodb_open_files; @see innodb_init_params() */
ulint srv_max_n_open_files;
@@ -282,7 +274,7 @@ my_bool srv_print_all_deadlocks;
INFORMATION_SCHEMA.innodb_cmp_per_index */
my_bool srv_cmp_per_index_enabled;
-/** innodb_fast_shutdown=1 skips purge and change buffer merge.
+/** innodb_fast_shutdown=1 skips the purge of transaction history.
innodb_fast_shutdown=2 effectively crashes the server (no log checkpoint).
innodb_fast_shutdown=3 is a clean shutdown that skips the rollback
of active transaction (to be done on restart). */
@@ -384,8 +376,6 @@ FILE* srv_misc_tmpfile;
ulint srv_main_active_loops;
/** Iterations of the loop bounded by the 'srv_idle' label. */
ulint srv_main_idle_loops;
-/** Iterations of the loop bounded by the 'srv_shutdown' label. */
-static ulint srv_main_shutdown_loops;
/** Log writes involving flush. */
ulint srv_log_writes_and_flush;
@@ -569,10 +559,9 @@ srv_print_master_thread_info(
FILE *file) /* in: output stream */
{
fprintf(file, "srv_master_thread loops: " ULINTPF " srv_active, "
- ULINTPF " srv_shutdown, " ULINTPF " srv_idle\n"
+ ULINTPF " srv_idle\n"
"srv_master_thread log flush and writes: " ULINTPF "\n",
srv_main_active_loops,
- srv_main_shutdown_loops,
srv_main_idle_loops,
srv_log_writes_and_flush);
}
@@ -791,8 +780,6 @@ srv_printf_innodb_monitor(
"--------\n", file);
os_aio_print(file);
- ibuf_print(file);
-
#ifdef BTR_CUR_HASH_ADAPT
if (btr_search_enabled) {
fputs("-------------------\n"
@@ -1296,31 +1283,6 @@ static void srv_sync_log_buffer_in_background()
}
}
-/** Report progress during shutdown.
-@param last time of last output
-@param n_read number of page reads initiated for change buffer merge */
-static void srv_shutdown_print(time_t &last, ulint n_read)
-{
- time_t now= time(nullptr);
- if (now - last >= 15)
- {
- last= now;
-
- const ulint ibuf_size= ibuf.size;
- sql_print_information("Completing change buffer merge;"
- " %zu page reads initiated;"
- " %zu change buffer pages remain",
- n_read, ibuf_size);
-#if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY
- service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
- "Completing change buffer merge;"
- " %zu page reads initiated;"
- " %zu change buffer pages remain",
- n_read, ibuf_size);
-#endif
- }
-}
-
/** Perform periodic tasks whenever the server is active.
@param counter_time microsecond_interval_timer() */
static void srv_master_do_active_tasks(ulonglong counter_time)
@@ -1358,32 +1320,6 @@ static void srv_master_do_idle_tasks(ulonglong counter_time)
MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
}
-/**
-Complete the shutdown tasks such as background DROP TABLE,
-and optionally change buffer merge (on innodb_fast_shutdown=0). */
-void srv_shutdown(bool ibuf_merge)
-{
- ulint n_read = 0;
- time_t now = time(NULL);
-
- do {
- ut_ad(!srv_read_only_mode);
- ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
- ++srv_main_shutdown_loops;
-
- if (ibuf_merge) {
- srv_main_thread_op_info = "doing insert buffer merge";
- /* Disallow the use of change buffer to
- avoid a race condition with
- ibuf_read_merge_pages() */
- ibuf_max_size_update(0);
- log_free_check();
- n_read = ibuf_contract();
- srv_shutdown_print(now, n_read);
- }
- } while (n_read);
-}
-
/** The periodic master task controlling the server. */
void srv_master_callback(void*)
{
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 2ed5ac57a0c..5266450ce10 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -3,7 +3,7 @@
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -69,7 +69,6 @@ Created 2/16/1996 Heikki Tuuri
#include "btr0btr.h"
#include "btr0cur.h"
#include "rem0rec.h"
-#include "ibuf0ibuf.h"
#include "srv0start.h"
#include "srv0srv.h"
#include "btr0defragment.h"
@@ -95,6 +94,7 @@ Created 2/16/1996 Heikki Tuuri
#include "row0row.h"
#include "row0mysql.h"
#include "btr0pcur.h"
+#include "ibuf0ibuf.h"
#include "zlib.h"
#include "log.h"
@@ -1031,7 +1031,7 @@ srv_init_abort_low(
/** Prepare to delete the redo log file. Flush the dirty pages from all the
buffer pools. Flush the redo log buffer to the redo log file.
@return lsn upto which data pages have been flushed. */
-static lsn_t srv_prepare_to_delete_redo_log_file()
+ATTRIBUTE_COLD static lsn_t srv_prepare_to_delete_redo_log_file()
{
DBUG_ENTER("srv_prepare_to_delete_redo_log_file");
@@ -1098,6 +1098,67 @@ same_size:
DBUG_RETURN(flushed_lsn);
}
+/** Upgrade the redo log to the latest format, or change its size
+or encryption, before starting to write any log records. */
+ATTRIBUTE_COLD static dberr_t srv_log_rebuild()
+{
+ /* Prepare to delete the old redo log file */
+ const lsn_t lsn{srv_prepare_to_delete_redo_log_file()};
+
+ DBUG_EXECUTE_IF("innodb_log_abort_1", return DB_ERROR;);
+ /* Prohibit redo log writes from any other threads until creating a
+ log checkpoint at the end of create_log_file(). */
+ ut_d(recv_no_log_write= true);
+ DBUG_ASSERT(!buf_pool.any_io_pending());
+
+ /* Close the redo log file, so that we can replace it */
+ log_sys.close_file();
+
+ DBUG_EXECUTE_IF("innodb_log_abort_5", return DB_ERROR;);
+
+ dberr_t err= create_log_file(false, lsn);
+
+ if (err == DB_SUCCESS && log_sys.resize_rename())
+ err = DB_ERROR;
+
+ return err;
+}
+
+/** Rebuild the redo log if needed. */
+static dberr_t srv_log_rebuild_if_needed()
+{
+ if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO)
+ /* Completely ignore the redo log. */
+ return DB_SUCCESS;
+ if (srv_read_only_mode)
+ /* Leave the redo log alone. */
+ return DB_SUCCESS;
+
+ if (log_sys.file_size == srv_log_file_size &&
+ log_sys.format ==
+ (srv_encrypt_log ? log_t::FORMAT_ENC_10_8 : log_t::FORMAT_10_8))
+ {
+ /* No need to add or remove encryption, upgrade, or resize. */
+ delete_log_files();
+ return DB_SUCCESS;
+ }
+
+ return srv_log_rebuild();
+}
+
+ATTRIBUTE_COLD static dberr_t ibuf_log_rebuild_if_needed()
+{
+ mysql_mutex_lock(&recv_sys.mutex);
+ recv_sys.apply(true);
+ mysql_mutex_unlock(&recv_sys.mutex);
+
+ if (recv_sys.is_corrupt_log() || recv_sys.is_corrupt_fs())
+ return DB_CORRUPTION;
+
+ recv_sys.debug_free();
+ return srv_log_rebuild_if_needed();
+}
+
static tpool::task_group rollback_all_recovered_group(1);
static tpool::task rollback_all_recovered_task(trx_rollback_all_recovered,
nullptr,
@@ -1136,10 +1197,6 @@ dberr_t srv_start(bool create_new_db)
ib::info() << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!";
#endif
-#ifdef UNIV_IBUF_DEBUG
- ib::info() << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!";
-#endif
-
ib::info() << "Compressed tables use zlib " ZLIB_VERSION
#ifdef UNIV_ZIP_DEBUG
" with validation"
@@ -1235,11 +1292,6 @@ dberr_t srv_start(bool create_new_db)
return(srv_init_abort(err));
}
- if (srv_read_only_mode) {
- ib::info() << "Disabling background log and ibuf IO write"
- << " threads.";
- }
-
if (os_aio_init()) {
ib::error() << "Cannot initialize AIO sub-system";
@@ -1385,31 +1437,41 @@ dberr_t srv_start(bool create_new_db)
if (create_new_db) {
ut_ad(!srv_read_only_mode);
- mtr_start(&mtr);
+ mtr.start();
ut_ad(fil_system.sys_space->id == 0);
compile_time_assert(TRX_SYS_SPACE == 0);
- compile_time_assert(IBUF_SPACE_ID == 0);
- ut_a(fsp_header_init(fil_system.sys_space,
- uint32_t(sum_of_new_sizes), &mtr)
- == DB_SUCCESS);
-
- ulint ibuf_root = btr_create(
- DICT_CLUSTERED | DICT_IBUF, fil_system.sys_space,
- DICT_IBUF_ID_MIN, nullptr, &mtr, &err);
-
- mtr_commit(&mtr);
-
- if (ibuf_root == FIL_NULL) {
- return srv_init_abort(err);
+ err = fsp_header_init(fil_system.sys_space,
+ uint32_t(sum_of_new_sizes), &mtr);
+ /* Allocate dummy change buffer pages for backward
+ compatibility and to prevent a downgrade. */
+ if (err != DB_SUCCESS) {
+ } else if (buf_block_t *b =
+ fseg_create(fil_system.sys_space, PAGE_DATA, &mtr,
+ &err)) {
+ ut_ad(b->page.id()
+ == page_id_t(0, FSP_IBUF_HEADER_PAGE_NO));
+ b = fseg_alloc_free_page_general(
+ b->page.frame + PAGE_DATA,
+ FSP_IBUF_TREE_ROOT_PAGE_NO, FSP_UP, false,
+ &mtr, &mtr, &err);
+ if (b) {
+ ut_ad(b->page.id() == page_id_t
+ (0, FSP_IBUF_TREE_ROOT_PAGE_NO));
+ mtr.set_modified(*b);
+ fsp_init_file_page(fil_system.sys_space, b,
+ &mtr);
+ } else {
+ ut_ad(err != DB_SUCCESS);
+ }
}
-
- ut_ad(ibuf_root == IBUF_TREE_ROOT_PAGE_NO);
-
/* To maintain backward compatibility we create only
the first rollback segment before the double write buffer.
All the remaining rollback segments will be created later,
after the double write buffer has been created. */
- err = trx_sys_create_sys_pages(&mtr);
+ if (err == DB_SUCCESS) {
+ err = trx_sys_create_sys_pages(&mtr);
+ }
+ mtr.commit();
if (err != DB_SUCCESS) {
return(srv_init_abort(err));
@@ -1443,38 +1505,58 @@ dberr_t srv_start(bool create_new_db)
recv_sys.dblwr.pages.clear();
- if (err != DB_SUCCESS) {
- return(srv_init_abort(err));
- }
+ bool must_upgrade_ibuf = false;
switch (srv_operation) {
case SRV_OPERATION_NORMAL:
case SRV_OPERATION_RESTORE_EXPORT:
- /* Initialize the change buffer. */
- err = dict_boot();
if (err != DB_SUCCESS) {
- return(srv_init_abort(err));
+ break;
+ }
+
+ err = ibuf_upgrade_needed();
+
+ if (UNIV_UNLIKELY(err == DB_FAIL)) {
+ must_upgrade_ibuf = true;
+ err = ibuf_log_rebuild_if_needed();
}
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ err = dict_boot();
/* fall through */
case SRV_OPERATION_RESTORE:
- /* This must precede recv_sys.apply(true). */
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
srv_undo_tablespaces_active
= trx_rseg_get_n_undo_tablespaces();
if (srv_operation != SRV_OPERATION_RESTORE) {
dict_sys.load_sys_tables();
}
- err = trx_lists_init_at_db_start();
- if (err != DB_SUCCESS) {
- return srv_init_abort(err);
+
+ if (UNIV_UNLIKELY(must_upgrade_ibuf)) {
+ dict_load_tablespaces();
+ err = ibuf_upgrade();
+ if (err != DB_SUCCESS) {
+ break;
+ }
}
+
+ err = trx_lists_init_at_db_start();
break;
- case SRV_OPERATION_RESTORE_DELTA:
- case SRV_OPERATION_BACKUP:
- case SRV_OPERATION_BACKUP_NO_DEFER:
+ default:
ut_ad("wrong mariabackup mode" == 0);
}
+ if (err != DB_SUCCESS) {
+ return srv_init_abort(err);
+ }
+
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
/* Apply the hashed log records to the
respective file pages, for the last batch of
@@ -1592,47 +1674,10 @@ dberr_t srv_start(bool create_new_db)
/* Upgrade or resize or rebuild the redo logs before
generating any dirty pages, so that the old redo log
file will not be written to. */
+ err = srv_log_rebuild_if_needed();
- if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
- /* Completely ignore the redo log. */
- } else if (srv_read_only_mode) {
- /* Leave the redo log alone. */
- } else if (log_sys.file_size == srv_log_file_size
- && log_sys.format
- == (srv_encrypt_log
- ? log_t::FORMAT_ENC_10_8
- : log_t::FORMAT_10_8)) {
- /* No need to add or remove encryption,
- upgrade, or resize. */
- delete_log_files();
- } else {
- /* Prepare to delete the old redo log file */
- const lsn_t lsn{srv_prepare_to_delete_redo_log_file()};
-
- DBUG_EXECUTE_IF("innodb_log_abort_1",
- return(srv_init_abort(DB_ERROR)););
- /* Prohibit redo log writes from any other
- threads until creating a log checkpoint at the
- end of create_log_file(). */
- ut_d(recv_no_log_write = true);
- DBUG_ASSERT(!buf_pool.any_io_pending());
-
- /* Close the redo log file, so that we can replace it */
- log_sys.close_file();
-
- DBUG_EXECUTE_IF("innodb_log_abort_5",
- return(srv_init_abort(DB_ERROR)););
- DBUG_PRINT("ib_log", ("After innodb_log_abort_5"));
-
- err = create_log_file(false, lsn);
-
- if (err == DB_SUCCESS && log_sys.resize_rename()) {
- err = DB_ERROR;
- }
-
- if (err != DB_SUCCESS) {
- return(srv_init_abort(err));
- }
+ if (err != DB_SUCCESS) {
+ return(srv_init_abort(err));
}
recv_sys.debug_free();
@@ -1686,8 +1731,7 @@ dberr_t srv_start(bool create_new_db)
/* Bitmap page types will be reset in
buf_dblwr_check_block() without redo logging. */
block = buf_page_get(
- page_id_t(IBUF_SPACE_ID,
- FSP_IBUF_HEADER_PAGE_NO),
+ page_id_t(0, FSP_IBUF_HEADER_PAGE_NO),
0, RW_X_LATCH, &mtr);
if (UNIV_UNLIKELY(!block)) {
corrupted_old_page:
@@ -1745,21 +1789,7 @@ dberr_t srv_start(bool create_new_db)
}
if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
- /* The following call is necessary for the insert
- buffer to work with multiple tablespaces. We must
- know the mapping between space id's and .ibd file
- names.
-
- In a crash recovery, we check that the info in data
- dictionary is consistent with what we already know
- about space id's from the calls to fil_ibd_load().
-
- In a normal startup, we create the space objects for
- every table in the InnoDB data dictionary that has
- an .ibd file.
-
- We also determine the maximum tablespace id used. */
- dict_check_tablespaces_and_store_max_id();
+ dict_load_tablespaces();
}
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
@@ -1841,13 +1871,6 @@ skip_monitors:
trx_sys.get_max_trx_id());
}
- if (srv_force_recovery == 0) {
- /* In the change buffer we may have even bigger tablespace
- id's, because we may have dropped those tablespaces, but
- the buffered records have not been cleaned yet. */
- ibuf_update_max_tablespace_id();
- }
-
if (!srv_read_only_mode) {
if (create_new_db) {
srv_buffer_pool_load_at_startup = FALSE;
@@ -1902,10 +1925,6 @@ void innodb_preshutdown()
return;
if (!srv_fast_shutdown && srv_operation == SRV_OPERATION_NORMAL)
{
- /* Because a slow shutdown must empty the change buffer, we had
- better prevent any further changes from being buffered. */
- innodb_change_buffering= 0;
-
if (trx_sys.is_initialised())
while (trx_sys.any_active_transactions())
std::this_thread::sleep_for(std::chrono::milliseconds(1));
@@ -1971,8 +1990,6 @@ void innodb_shutdown()
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
ut_ad(lock_sys.is_initialised() || !srv_was_started);
ut_ad(log_sys.is_initialised() || !srv_was_started);
- ut_ad(ibuf.index || !innodb_change_buffering || !srv_was_started
- || srv_force_recovery >= SRV_FORCE_NO_DDL_UNDO);
dict_stats_deinit();
@@ -1993,7 +2010,6 @@ void innodb_shutdown()
btr_search_disable();
}
#endif /* BTR_CUR_HASH_ADAPT */
- ibuf_close();
log_sys.close();
purge_sys.close();
trx_sys.close();
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 867126adc0e..841b014019b 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -262,7 +262,6 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
trx_ulogf_t* undo_header = undo_page->page.frame
+ undo->hdr_offset;
- ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1);
ut_ad(rseg->needs_purge > trx->id);
if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT
@@ -352,8 +351,6 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
mtr->write<8,mtr_t::MAYBE_NOP>(*undo_page,
undo_header + TRX_UNDO_TRX_NO,
trx->rw_trx_hash_element->no);
- mtr->write<2,mtr_t::MAYBE_NOP>(*undo_page, undo_header
- + TRX_UNDO_NEEDS_PURGE, 1U);
if (rseg->last_page_no == FIL_NULL) {
rseg->last_page_no = undo->hdr_page_no;
@@ -913,12 +910,8 @@ static void trx_purge_rseg_get_next_history_log(
if (const buf_block_t* undo_page=
buf_page_get_gen(page_id_t(purge_sys.rseg->space->id, prev_log_addr.page),
0, RW_S_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, &mtr))
- {
- const byte *log_hdr= undo_page->page.frame + prev_log_addr.boffset;
-
- trx_no= mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
- ut_ad(mach_read_from_2(log_hdr + TRX_UNDO_NEEDS_PURGE) <= 1);
- }
+ trx_no= mach_read_from_8(undo_page->page.frame + prev_log_addr.boffset +
+ TRX_UNDO_TRX_NO);
mtr.commit();
diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc
index 3fada7d34aa..1dc3c18fc09 100644
--- a/storage/innobase/trx/trx0rseg.cc
+++ b/storage/innobase/trx/trx0rseg.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -446,7 +446,7 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, mtr_t *mtr)
return DB_TABLESPACE_NOT_FOUND;
dberr_t err;
const buf_block_t *rseg_hdr=
- buf_page_get_gen(rseg->page_id(), 0, RW_S_LATCH, nullptr, BUF_GET, mtr,
+ buf_page_get_gen(rseg->page_id(), 0, RW_X_LATCH, nullptr, BUF_GET, mtr,
&err);
if (!rseg_hdr)
return err;
@@ -522,8 +522,6 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, mtr_t *mtr)
rseg->needs_purge= id;
rseg->set_last_commit(node_addr.boffset, id);
- ut_ad(mach_read_from_2(block->page.frame + node_addr.boffset +
- TRX_UNDO_NEEDS_PURGE) <= 1);
if (rseg->last_page_no != FIL_NULL)
/* There is no need to cover this operation by the purge
@@ -574,7 +572,7 @@ dberr_t trx_rseg_array_init()
for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
mtr.start();
- if (const buf_block_t* sys = trx_sysf_get(&mtr, false)) {
+ if (const buf_block_t* sys = trx_sysf_get(&mtr, true)) {
if (rseg_id == 0) {
/* In case this is an upgrade from
before MariaDB 10.3.5, fetch the base
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 374a9d724bc..ab8c0b34e7d 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -105,7 +105,6 @@ trx_sysf_get_n_rseg_slots()
/** Initialize the transaction system when creating the database. */
dberr_t trx_sys_create_sys_pages(mtr_t *mtr)
{
- mtr->start();
mtr->x_lock_space(fil_system.sys_space);
static_assert(TRX_SYS_SPACE == 0, "compatibility");
@@ -114,11 +113,7 @@ dberr_t trx_sys_create_sys_pages(mtr_t *mtr)
buf_block_t *block= fseg_create(fil_system.sys_space,
TRX_SYS + TRX_SYS_FSEG_HEADER, mtr, &err);
if (UNIV_UNLIKELY(!block))
- {
- error:
- mtr->commit();
return err;
- }
ut_a(block->page.id() == page_id_t(0, TRX_SYS_PAGE_NO));
mtr->write<2>(*block, FIL_PAGE_TYPE + block->page.frame,
@@ -138,9 +133,8 @@ dberr_t trx_sys_create_sys_pages(mtr_t *mtr)
buf_block_t *r= trx_rseg_header_create(fil_system.sys_space, 0, 0,
mtr, &err);
if (UNIV_UNLIKELY(!r))
- goto error;
+ return err;
ut_a(r->page.id() == page_id_t(0, FSP_FIRST_RSEG_PAGE_NO));
- mtr->commit();
return trx_lists_init_at_db_start();
}
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index bc41a535dbe..e88f7824ba6 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -412,12 +412,12 @@ void trx_t::free()
#endif
read_view.mem_noaccess();
MEM_NOACCESS(&lock, sizeof lock);
- MEM_NOACCESS(&op_info, sizeof op_info);
- MEM_NOACCESS(&isolation_level, sizeof isolation_level);
- MEM_NOACCESS(&check_foreigns, sizeof check_foreigns);
+ MEM_NOACCESS(&op_info, sizeof op_info +
+ sizeof(unsigned) /* isolation_level,
+ check_foreigns, check_unique_secondary,
+ bulk_insert */);
MEM_NOACCESS(&is_registered, sizeof is_registered);
MEM_NOACCESS(&active_commit_ordered, sizeof active_commit_ordered);
- MEM_NOACCESS(&check_unique_secondary, sizeof check_unique_secondary);
MEM_NOACCESS(&flush_log_later, sizeof flush_log_later);
MEM_NOACCESS(&must_flush_log_later, sizeof must_flush_log_later);
MEM_NOACCESS(&duplicates, sizeof duplicates);
@@ -1155,7 +1155,7 @@ static void trx_flush_log_if_needed_low(lsn_t lsn, const trx_t *trx)
callback= &cb;
}
- log_write_up_to(lsn, srv_file_flush_method != SRV_NOSYNC &&
+ log_write_up_to(lsn, !my_disable_sync &&
(srv_flush_log_at_trx_commit & 1), callback);
}
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index 33b1f93ff65..3b9c598e745 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -588,13 +588,8 @@ static uint16_t trx_undo_header_create(buf_block_t *undo_page, trx_id_t trx_id,
undo_page->page.frame) != 0))
mtr->memset(undo_page, free + TRX_UNDO_TRX_NO, 8, 0);
- /* Write TRX_UNDO_NEEDS_PURGE=1 and TRX_UNDO_LOG_START. */
- mach_write_to_2(buf, 1);
- memcpy_aligned<2>(buf + 2, start, 2);
- static_assert(TRX_UNDO_NEEDS_PURGE + 2 == TRX_UNDO_LOG_START,
- "compatibility");
- mtr->memcpy<mtr_t::MAYBE_NOP>(*undo_page, free + TRX_UNDO_NEEDS_PURGE +
- undo_page->page.frame, buf, 4);
+ mtr->memcpy<mtr_t::MAYBE_NOP>(*undo_page, free + TRX_UNDO_LOG_START +
+ undo_page->page.frame, start, 2);
/* Initialize all fields TRX_UNDO_XID_EXISTS to TRX_UNDO_HISTORY_NODE. */
if (prev_log)
{
diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt
index f55d78f0162..033e88bb8e5 100644
--- a/storage/maria/CMakeLists.txt
+++ b/storage/maria/CMakeLists.txt
@@ -135,4 +135,5 @@ IF(TARGET s3)
TARGET_LINK_LIBRARIES(aria_s3_copy aria myisam mysys mysys_ssl ${CURL_LIBRARIES} ${ZLIB_LIBRARY})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/libmarias3)
ADD_DEFINITIONS(-DWITH_S3_STORAGE_ENGINE)
+ INSTALL_MANPAGES(s3-engine aria_s3_copy.1)
ENDIF()
diff --git a/storage/maria/aria_chk.c b/storage/maria/aria_chk.c
index 7d5598f06b5..61821ec9099 100644
--- a/storage/maria/aria_chk.c
+++ b/storage/maria/aria_chk.c
@@ -15,6 +15,7 @@
/* Describe, check and repair of MARIA tables */
+#define VER "1.3"
#include "ma_fulltext.h"
#include <myisamchk.h>
#include <my_bit.h>
@@ -25,6 +26,7 @@
/* Remove next line if you want aria_chk to produce a stack trace */
#undef HAVE_BACKTRACE
#include <my_stacktrace.h>
+#include <welcome_copyright_notice.h>
static uint decode_bits;
static char **default_argv;
@@ -79,7 +81,6 @@ static char default_open_errmsg[]= "%d when opening Aria table '%s'";
static char default_close_errmsg[]= "%d when closing Aria table '%s'";
static void get_options(int *argc,char * * *argv);
-static void print_version(void);
static void usage(void);
static int maria_chk(HA_CHECK *param, char *filename);
static void descript(HA_CHECK *param, register MARIA_HA *info, char *name);
@@ -471,13 +472,6 @@ static struct my_option my_long_options[] =
};
-static void print_version(void)
-{
- printf("%s Ver 1.3 for %s on %s\n", my_progname, SYSTEM_TYPE,
- MACHINE_TYPE);
-}
-
-
static void usage(void)
{
print_version();
@@ -1624,6 +1618,8 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name)
pos=strmov(pos,"sorted index pages,");
if (!(share->state.changed & STATE_NOT_ZEROFILLED))
pos=strmov(pos,"zerofilled,");
+ if (test_all_bits(share->state.changed, (STATE_NOT_ZEROFILLED | STATE_HAS_LSN)))
+ pos=strmov(pos,"has_lsn,");
if (!(share->state.changed & STATE_NOT_MOVABLE))
pos=strmov(pos,"movable,");
if (have_control_file && (share->state.changed & STATE_MOVED))
diff --git a/storage/maria/aria_dump_log.c b/storage/maria/aria_dump_log.c
index e64c97fcda3..4317e1b6f3b 100644
--- a/storage/maria/aria_dump_log.c
+++ b/storage/maria/aria_dump_log.c
@@ -13,8 +13,11 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+#define VER "1.1"
#include "maria_def.h"
#include <my_getopt.h>
+#include <welcome_copyright_notice.h>
+
extern void translog_example_table_init();
static const char *load_default_groups[]= { "aria_dump_log",0 };
static void get_options(int *argc,char * * *argv);
@@ -64,13 +67,6 @@ static struct my_option my_long_options[] =
};
-static void print_version(void)
-{
- printf("%s Ver 1.1 for %s on %s\n",
- my_progname_short, SYSTEM_TYPE, MACHINE_TYPE);
-}
-
-
static void usage(void)
{
print_version();
diff --git a/storage/maria/aria_pack.c b/storage/maria/aria_pack.c
index 40e7e399613..eab4d512e8b 100644
--- a/storage/maria/aria_pack.c
+++ b/storage/maria/aria_pack.c
@@ -19,6 +19,7 @@
#define USE_MY_FUNC /* We need at least my_malloc */
#endif
+#define VER "1.0"
#include "maria_def.h"
#include "trnman_public.h"
#include "trnman.h"
@@ -33,6 +34,7 @@
#endif
#include <my_getopt.h>
#include <my_handler_errors.h>
+#include <welcome_copyright_notice.h>
#if SIZEOF_LONG_LONG > 4
#define BITS_SAVED 64
@@ -353,12 +355,6 @@ static struct my_option my_long_options[] =
};
-static void print_version(void)
-{
- printf("%s Ver 1.0 for %s on %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE);
-}
-
-
static void usage(void)
{
print_version();
diff --git a/storage/maria/aria_read_log.c b/storage/maria/aria_read_log.c
index c0c76ed5590..85a6f4a5e97 100644
--- a/storage/maria/aria_read_log.c
+++ b/storage/maria/aria_read_log.c
@@ -139,6 +139,12 @@ int main(int argc, char **argv)
if (opt_display_only)
printf("You are using --display-only, NOTHING will be written to disk\n");
+ if (translog_get_horizon() == LSN_IMPOSSIBLE)
+ {
+ fprintf(stdout, "The transaction log is empty\n");
+ goto end;
+ }
+
lsn= translog_first_lsn_in_log();
if (lsn == LSN_ERROR)
{
@@ -147,7 +153,8 @@ int main(int argc, char **argv)
}
if (lsn == LSN_IMPOSSIBLE)
{
- fprintf(stdout, "The transaction log is empty\n");
+ fprintf(stdout, "The transaction log is empty\n");
+ goto end;
}
if (opt_start_from_checkpoint && !opt_start_from_lsn &&
last_checkpoint_lsn != LSN_IMPOSSIBLE)
@@ -300,7 +307,7 @@ static struct my_option my_long_options[] =
static void print_version(void)
{
- printf("%s Ver 1.5 for %s on %s\n",
+ printf("%s Ver 1.6 for %s on %s\n",
my_progname_short, SYSTEM_TYPE, MACHINE_TYPE);
}
@@ -308,7 +315,7 @@ static void print_version(void)
static void usage(void)
{
print_version();
- puts("Copyright (C) 2007 MySQL AB, 2009-2011 Monty Program Ab, 2020 MariaDB Corporation");
+ puts("Copyright (C) 2007 MySQL AB, 2009-2011 Monty Program Ab, 2022 MariaDB Corporation");
puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
puts("and you are welcome to modify and redistribute it under the GPL license\n");
diff --git a/storage/maria/aria_s3_copy.1 b/storage/maria/aria_s3_copy.1
new file mode 100644
index 00000000000..5844d5a76f0
--- /dev/null
+++ b/storage/maria/aria_s3_copy.1
@@ -0,0 +1,52 @@
+.TH ARIA_S3_COPY "1" "June 2020" "aria_s3_copy Ver 1.0" "User Commands"
+.SH NAME
+aria_s3_copy \- Copy an Aria table to and from s3
+.SH DESCRIPTION
+Usage: aria_s3_copy \-\-aws\-access\-key=# \-\-aws\-secret\-access\-key=# \-\-aws\-region=# \-\-op=(from_s3 | to_s3 | delete_from_s3) [OPTIONS] tables[.MAI]
+.TP
+\fB\-?\fR, \fB\-h\fR, \fB\-\-help\fR
+Display help and exit.
+.TP
+\fB\-k\fR, \fB\-\-s3\-access\-key=name\fR
+AWS access key ID
+.TP
+\fB\-r\fR, \fB\-s3\-region=name\fR
+AWS region
+.TP
+\fB\-K\fR, \fB\-s3\-secret\-key=name\fR
+AWS secret access key ID
+.TP
+\fB\-b\fR, \fB\-s3\-bucket=name\fR
+AWS prefix for tables
+.TP
+\fB\-h\fR, \fB\-s3\-host\-name=name\fR
+Host name to S3 provider
+.TP
+\fB\-c\fR, \fB\-compress\fR
+Use compression
+.TP
+\fB\-o\fR, \fB\-op=name\fR
+Operation to execute. One of 'from_s3', 'to_s3' or 'delete_from_s3'
+.TP
+\fB\-d\fR, \fB\-database=name\fR
+Database for copied table (second prefix). If not given, the directory of the table file is used
+.TP
+\fB\-B\fR, \fB\-s3\-block\-size=#\fR
+Block size for data/index blocks in s3
+.TP
+\fB\-L\fR, \fB\-s3\-protocol\-version=name\fR
+Protocol used to communication with S3. One of "Auto", "Amazon" or "Original".
+.TP
+\fB\-f\fR, \fB\-force\fR
+Force copy even if target exists
+.TP
+\fB\-V\fR, \fB\-version\fR
+Print version and exit.
+.TP
+\fB\-\-s3\-debug\fR
+Output debug log from marias3 to stdout
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+Be verbose.
+.PP
+For more information, please refer to the MariaDB Knowledge Base page https://mariadb.com/kb/en/aria_s3_copy/
diff --git a/storage/maria/aria_s3_copy.cc b/storage/maria/aria_s3_copy.cc
index 77c41ba4572..5c8c2abc7db 100644
--- a/storage/maria/aria_s3_copy.cc
+++ b/storage/maria/aria_s3_copy.cc
@@ -17,6 +17,7 @@
Allow copying of Aria tables to and from S3 and also delete them from S3
*/
+#define VER "1.0"
#include <my_global.h>
#include <m_string.h>
#include "maria_def.h"
@@ -28,6 +29,7 @@
#include <zlib.h>
#include <libmarias3/marias3.h>
#include "s3_func.h"
+#include <welcome_copyright_notice.h>
static const char *op_types[]= {"to_s3", "from_s3", "delete_from_s3", NullS};
static TYPELIB op_typelib= {array_elements(op_types)-1,"", op_types, NULL};
@@ -109,12 +111,6 @@ static struct my_option my_long_options[] =
static bool get_database_from_path(char *to, size_t to_length, const char *path);
-static void print_version(void)
-{
- printf("%s Ver 1.0 for %s on %s\n", my_progname, SYSTEM_TYPE,
- MACHINE_TYPE);
-}
-
static void usage(void)
{
print_version();
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index c245dcea036..f82823dba2b 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -1094,21 +1094,52 @@ ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const
}
else
{
- flags= HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
- HA_READ_ORDER | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN;
+ flags= (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
+ HA_READ_ORDER | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN |
+ HA_DO_RANGE_FILTER_PUSHDOWN);
}
return flags;
}
-double ha_maria::scan_time()
+/*
+ Update costs that are unique for this TABLE instance
+*/
+
+void ha_maria::update_optimizer_costs(OPTIMIZER_COSTS *costs)
{
- if (file->s->data_file_type == BLOCK_RECORD)
- return (ulonglong2double(stats.data_file_length - file->s->block_size) /
- file->s->block_size) + 2;
- return handler::scan_time();
+ /*
+ Default costs for Aria with BLOCK_FORMAT is the same as MariaDB default
+ costs.
+ */
+ if (file->s->data_file_type != BLOCK_RECORD)
+ {
+ /*
+ MyISAM format row lookup costs are slow as the row data is on a not
+ cached file. Costs taken from ha_myisam.cc
+ */
+ costs->row_next_find_cost= 0.000063539;
+ costs->row_lookup_cost= 0.001014818;
+ }
}
+
+IO_AND_CPU_COST ha_maria::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost= handler::rnd_pos_time(rows);
+ /* file may be 0 if this is an internal temporary file that is not yet opened */
+ if (file && file->s->data_file_type != BLOCK_RECORD)
+ {
+ /*
+ Row data is not cached. costs.row_lookup_cost includes the cost of
+ the reading the row from system (probably cached by the OS).
+ */
+ cost.io= 0;
+ }
+ return cost;
+}
+
+
/*
We need to be able to store at least 2 keys on an index page as the
splitting algorithms depends on this. (With only one key on a page
@@ -2505,10 +2536,12 @@ int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
end_range= NULL;
if (index == pushed_idx_cond_keyno)
ma_set_index_cond_func(file, handler_index_cond_check, this);
+ if (pushed_rowid_filter && handler_rowid_filter_is_active(this))
+ ma_set_rowid_filter_func(file, handler_rowid_filter_check, this);
error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
- ma_set_index_cond_func(file, NULL, 0);
+ ma_reset_index_filter_functions(file);
return error;
}
@@ -2582,18 +2615,22 @@ int ha_maria::index_next_same(uchar * buf,
int ha_maria::index_init(uint idx, bool sorted)
{
- active_index=idx;
+ active_index= idx;
if (pushed_idx_cond_keyno == idx)
ma_set_index_cond_func(file, handler_index_cond_check, this);
+ if (pushed_rowid_filter && handler_rowid_filter_is_active(this))
+ ma_set_rowid_filter_func(file, handler_rowid_filter_check, this);
return 0;
}
-
int ha_maria::index_end()
{
+ /*
+ in_range_check_pushed_down and index_id_cond_keyno are reset in
+ handler::cancel_pushed_idx_cond()
+ */
active_index=MAX_KEY;
- ma_set_index_cond_func(file, NULL, 0);
- in_range_check_pushed_down= FALSE;
+ ma_reset_index_filter_functions(file);
ds_mrr.dsmrr_close();
return 0;
}
@@ -2707,8 +2744,8 @@ int ha_maria::info(uint flag)
}
}
/*
- Set data_file_name and index_file_name to point at the symlink value
- if table is symlinked (Ie; Real name is not same as generated name)
+ Set data_file_name and index_file_name to point at the symlink value
+ if table is symlinked (Ie; Real name is not same as generated name)
*/
data_file_name= index_file_name= 0;
fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_DEXT,
@@ -2791,7 +2828,7 @@ int ha_maria::extra(enum ha_extra_function operation)
int ha_maria::reset(void)
{
- ma_set_index_cond_func(file, NULL, 0);
+ ma_reset_index_filter_functions(file);
ds_mrr.dsmrr_close();
if (file->trn)
{
@@ -2825,8 +2862,9 @@ bool ha_maria::auto_repair(int error) const
int ha_maria::delete_all_rows()
{
THD *thd= table->in_use;
- TRN *trn= file->trn;
+ TRN *trn= file->s->now_transactional ? file->trn : (TRN*) 0;
CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("TRUNCATE in WRITE CONCURRENT");
+
#ifdef EXTRA_DEBUG
if (trn && ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
{
@@ -2840,8 +2878,7 @@ int ha_maria::delete_all_rows()
If we are under LOCK TABLES, we have to do a commit as
delete_all_rows() can't be rolled back
*/
- if (table->in_use->locked_tables_mode && trn &&
- trnman_has_locked_tables(trn))
+ if (trn && table->in_use->locked_tables_mode && trnman_has_locked_tables(trn))
{
int error;
if ((error= implicit_commit(thd, 1)))
@@ -3849,6 +3886,10 @@ bool ha_maria::is_changed() const
return file->state->changed;
}
+static void aria_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+}
+
static int ha_maria_init(void *p)
{
@@ -3881,6 +3922,7 @@ static int ha_maria_init(void *p)
maria_hton->show_status= maria_show_status;
maria_hton->prepare_for_backup= maria_prepare_for_backup;
maria_hton->end_backup= maria_end_backup;
+ maria_hton->update_optimizer_costs= aria_update_optimizer_costs;
/* TODO: decide if we support Maria being used for log tables */
maria_hton->flags= (HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES |
@@ -4181,7 +4223,8 @@ int ha_maria::multi_range_read_next(range_id_t *range_info)
ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
- uint *flags, Cost_estimate *cost)
+ uint *flags, ha_rows limit,
+ Cost_estimate *cost)
{
/*
This call is here because there is no location where this->table would
@@ -4190,7 +4233,7 @@ ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
*/
ds_mrr.init(this, table);
return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
- flags, cost);
+ flags, limit, cost);
}
ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
@@ -4241,6 +4284,26 @@ Item *ha_maria::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
return NULL;
}
+bool ha_maria::rowid_filter_push(Rowid_filter* rowid_filter)
+{
+ /* This will be used in index_init() */
+ pushed_rowid_filter= rowid_filter;
+ return false;
+}
+
+
+/* Enable / disable rowid filter depending if it's active or not */
+
+void ha_maria::rowid_filter_changed()
+{
+ if (pushed_rowid_filter && handler_rowid_filter_is_active(this))
+ ma_set_rowid_filter_func(file, handler_rowid_filter_check, this);
+ else
+ ma_set_rowid_filter_func(file, NULL, this);
+}
+
+
+
/**
Find record by unique constrain (used in temporary tables)
diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h
index 2b8b5dc9742..009e8ca5fe7 100644
--- a/storage/maria/ha_maria.h
+++ b/storage/maria/ha_maria.h
@@ -77,8 +77,6 @@ public:
{ return max_supported_key_length(); }
enum row_type get_row_type() const override final;
void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) override final;
- virtual double scan_time() override final;
-
int open(const char *name, int mode, uint test_if_locked) override;
int close(void) override final;
int write_row(const uchar * buf) override;
@@ -114,6 +112,8 @@ public:
int remember_rnd_pos() override final;
int restart_rnd_next(uchar * buf) override final;
void position(const uchar * record) override final;
+ void update_optimizer_costs(OPTIMIZER_COSTS *costs) override final;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override final;
int info(uint) override final;
int info(uint, my_bool);
int extra(enum ha_extra_function operation) override final;
@@ -175,7 +175,8 @@ public:
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
- uint *flags, Cost_estimate *cost) override final;
+ uint *flags, ha_rows limit,
+ Cost_estimate *cost) override final;
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
uint key_parts, uint *bufsz,
uint *flags, Cost_estimate *cost) override final;
@@ -183,6 +184,8 @@ public:
/* Index condition pushdown implementation */
Item *idx_cond_push(uint keyno, Item* idx_cond) override final;
+ bool rowid_filter_push(Rowid_filter* rowid_filter) override;
+ void rowid_filter_changed() override;
int find_unique_row(uchar *record, uint unique_idx) override final;
diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c
index 61fe4f9d080..ec1b0955655 100644
--- a/storage/maria/ma_bitmap.c
+++ b/storage/maria/ma_bitmap.c
@@ -1172,6 +1172,7 @@ static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap)
{
pgcache_page_no_t page= bitmap->page;
MARIA_STATE_INFO *state= &info->s->state;
+ my_bool res;
DBUG_ENTER("move_to_next_bitmap");
if (state->first_bitmap_with_space != ~(pgcache_page_no_t) 0 &&
@@ -1186,7 +1187,8 @@ static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap)
page+= bitmap->pages_covered;
DBUG_ASSERT(page % bitmap->pages_covered == 0);
}
- DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page));
+ res= _ma_change_bitmap_page(info, bitmap, page);
+ DBUG_RETURN(res);
}
diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c
index 98ef5c21e55..c56721a2359 100644
--- a/storage/maria/ma_blockrec.c
+++ b/storage/maria/ma_blockrec.c
@@ -5284,6 +5284,7 @@ my_bool _ma_scan_init_block_record(MARIA_HA *info)
{
MARIA_SHARE *share= info->s;
myf flag= MY_WME | (share->temporary ? MY_THREAD_SPECIFIC : 0);
+ my_bool res;
DBUG_ENTER("_ma_scan_init_block_record");
DBUG_ASSERT(info->dfile.file == share->bitmap.file.file);
@@ -5310,7 +5311,8 @@ my_bool _ma_scan_init_block_record(MARIA_HA *info)
_ma_scan_block_record()), we may miss recently inserted rows (bitmap page
in page cache would be too old).
*/
- DBUG_RETURN(_ma_bitmap_flush(info->s));
+ res= _ma_bitmap_flush(info->s);
+ DBUG_RETURN(res);
}
diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c
index f470d3691c1..45d6df7a63a 100644
--- a/storage/maria/ma_check.c
+++ b/storage/maria/ma_check.c
@@ -3651,28 +3651,35 @@ err:
int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
{
- my_bool error, reenable_logging,
+ my_bool error= 0, reenable_logging,
zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN);
MARIA_SHARE *share= info->s;
DBUG_ENTER("maria_zerofill");
if ((reenable_logging= share->now_transactional))
_ma_tmp_disable_logging_for_table(info, 0);
- if (!(error= (maria_zerofill_index(param, info, name) ||
- maria_zerofill_data(param, info, name) ||
- _ma_set_uuid(info->s, 0))))
+
+ if (share->state.changed & (STATE_NOT_ZEROFILLED | (zero_lsn ? STATE_HAS_LSN : 0)))
+ error= (maria_zerofill_index(param, info, name) ||
+ maria_zerofill_data(param, info, name));
+ if (!error)
+ error= _ma_set_uuid(info->s, 0);
+
+ if (!error)
{
/*
- Mark that we have done zerofill of data and index. If we zeroed pages'
- LSN, table is movable.
+ Mark that we have done zerofill of data and index. If we zeroed the LSN
+ on the pages, table is movable.
*/
share->state.changed&= ~STATE_NOT_ZEROFILLED;
if (zero_lsn)
{
- share->state.changed&= ~(STATE_NOT_MOVABLE | STATE_MOVED);
+ share->state.changed&= ~(STATE_NOT_MOVABLE | STATE_MOVED | STATE_HAS_LSN);
/* Table should get new LSNs */
share->state.create_rename_lsn= share->state.is_of_horizon=
share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
}
+ else
+ share->state.changed|= STATE_HAS_LSN;
/* Ensure state is later flushed to disk, if within maria_chk */
info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c
index 21befb70bd9..237b75b99b7 100644
--- a/storage/maria/ma_control_file.c
+++ b/storage/maria/ma_control_file.c
@@ -104,7 +104,7 @@ one should increment the control file version number.
This LSN serves for the two-checkpoint rule, and also to find the
checkpoint record when doing a recovery.
*/
-LSN last_checkpoint_lsn= LSN_IMPOSSIBLE;
+volatile LSN last_checkpoint_lsn= LSN_IMPOSSIBLE;
uint32 last_logno= FILENO_IMPOSSIBLE;
/**
The maximum transaction id given to a transaction. It is only updated at
diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h
index 40428f665f4..c74957b8322 100644
--- a/storage/maria/ma_control_file.h
+++ b/storage/maria/ma_control_file.h
@@ -37,7 +37,7 @@ C_MODE_START
LSN of the last checkoint
(if last_checkpoint_lsn == LSN_IMPOSSIBLE then there was never a checkpoint)
*/
-extern LSN last_checkpoint_lsn;
+extern volatile LSN last_checkpoint_lsn;
/*
Last log number (if last_logno == FILENO_IMPOSSIBLE then there is no log
file yet)
diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c
index 425cb421e22..087100e3d8c 100644
--- a/storage/maria/ma_extra.c
+++ b/storage/maria/ma_extra.c
@@ -510,8 +510,17 @@ void ma_set_index_cond_func(MARIA_HA *info, index_cond_func_t func,
{
info->index_cond_func= func;
info->index_cond_func_arg= func_arg;
+ info->has_cond_pushdown= (info->index_cond_func || info->rowid_filter_func);
}
+void ma_set_rowid_filter_func(MARIA_HA *info,
+ rowid_filter_func_t check_func,
+ void *func_arg)
+{
+ info->rowid_filter_func= check_func;
+ info->rowid_filter_func_arg= func_arg;
+ info->has_cond_pushdown= (info->index_cond_func || info->rowid_filter_func);
+}
/*
Start/Stop Inserting Duplicates Into a Table, WL#1648.
diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c
index ddf92654be0..3de6b8b74c5 100644
--- a/storage/maria/ma_info.c
+++ b/storage/maria/ma_info.c
@@ -20,14 +20,6 @@
#include <sys/stat.h>
#endif
- /* Get position to last record */
-
-MARIA_RECORD_POS maria_position(MARIA_HA *info)
-{
- return info->cur_row.lastpos;
-}
-
-
uint maria_max_key_length()
{
uint tmp= (_ma_max_key_length() - 8 - HA_MAX_KEY_SEG*3);
diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c
index d47e8cf715a..1b58c1c12c8 100644
--- a/storage/maria/ma_key.c
+++ b/storage/maria/ma_key.c
@@ -678,22 +678,44 @@ int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos)
CHECK_OUT_OF_RANGE to indicate that we don't have any active row.
*/
-check_result_t ma_check_index_cond(register MARIA_HA *info, uint keynr,
- uchar *record)
+check_result_t ma_check_index_cond_real(register MARIA_HA *info, uint keynr,
+ uchar *record)
{
check_result_t res= CHECK_POS;
+ DBUG_ASSERT(info->index_cond_func || info->rowid_filter_func);
+
+ if (_ma_put_key_in_record(info, keynr, FALSE, record))
+ {
+ /* Impossible case; Can only happen if bug in code */
+ _ma_print_error(info, HA_ERR_CRASHED, 0);
+ info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_CRASHED;
+ return CHECK_ERROR;
+ }
+
if (info->index_cond_func)
{
- if (_ma_put_key_in_record(info, keynr, FALSE, record))
+ if ((res= info->index_cond_func(info->index_cond_func_arg)) ==
+ CHECK_OUT_OF_RANGE)
{
- /* Impossible case; Can only happen if bug in code */
- _ma_print_error(info, HA_ERR_CRASHED, 0);
+ /* We got beyond the end of scanned range */
info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */
- my_errno= HA_ERR_CRASHED;
- res= CHECK_ERROR;
+ my_errno= HA_ERR_END_OF_FILE;
+ return res;
}
- else if ((res= info->index_cond_func(info->index_cond_func_arg)) ==
- CHECK_OUT_OF_RANGE)
+ /*
+ If we got an error, out-of-range condition, or ICP condition computed to
+ FALSE - we don't need to check the Rowid Filter.
+ */
+ if (res != CHECK_POS)
+ return res;
+ }
+
+ /* Check the Rowid Filter, if present */
+ if (info->rowid_filter_func)
+ {
+ if ((res= info->rowid_filter_func(info->rowid_filter_func_arg)) ==
+ CHECK_OUT_OF_RANGE)
{
/* We got beyond the end of scanned range */
info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
index 8e6426e3aa4..c3615e5271c 100644
--- a/storage/maria/ma_loghandler.c
+++ b/storage/maria/ma_loghandler.c
@@ -478,7 +478,7 @@ static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args);
static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner);
static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected);
LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
-
+static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link);
/*
Initialize log_record_type_descriptors
@@ -3116,7 +3116,10 @@ restart:
PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_LEFT_UNLOCKED,
NULL)))
+ {
+ translog_unlock();
DBUG_RETURN(NULL);
+ }
}
else
skipped_data= 0; /* Read after skipped in buffer data */
@@ -3217,6 +3220,11 @@ restart:
PAGECACHE_LOCK_READ :
PAGECACHE_LOCK_LEFT_UNLOCKED),
direct_link);
+ if (!buffer && direct_link)
+ {
+ translog_free_link(*direct_link);
+ *direct_link= 0;
+ }
DBUG_PRINT("info", ("Direct link is assigned to : %p * %p",
direct_link,
(direct_link ? *direct_link : NULL)));
@@ -3786,16 +3794,26 @@ my_bool translog_init_with_table(const char *directory,
}
else if (LSN_OFFSET(last_page) == 0)
{
- if (LSN_FILE_NO(last_page) == 1)
+ if (LSN_FILE_NO(last_page) == 1 ||
+ !translog_is_file(LSN_FILE_NO(last_page-1)))
{
logs_found= 0; /* file #1 has no pages */
DBUG_PRINT("info", ("log found. But is is empty => no log assumed"));
}
else
{
- last_page-= LSN_ONE_FILE;
- if (translog_get_last_page_addr(&last_page, &pageok, 0))
- goto err;
+ do
+ {
+ last_page-= LSN_ONE_FILE;
+ if (translog_get_last_page_addr(&last_page, &pageok, 0))
+ goto err;
+ }
+ while (LSN_OFFSET(last_page) == 0 && LSN_FILE_NO(last_page) >= 1);
+ if (LSN_OFFSET(last_page) == 0)
+ {
+ /* All files have a size less than TRANSLOG_PAGE_SIZE */
+ logs_found= 0;
+ }
}
}
if (logs_found)
@@ -3893,36 +3911,38 @@ my_bool translog_init_with_table(const char *directory,
old_log_was_recovered= 1;
/* This file is not written till the end so it should be last */
last_page= current_file_last_page;
- /* TODO: issue warning */
}
- do
+ if (LSN_OFFSET(current_file_last_page) >= TRANSLOG_PAGE_SIZE)
{
- TRANSLOG_VALIDATOR_DATA data;
- TRANSLOG_PAGE_SIZE_BUFF psize_buff;
- uchar *page;
- data.addr= &current_page;
- if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
- goto err;
- if (data.was_recovered)
+ do
{
- DBUG_PRINT("error", ("file no: %lu (%d) "
- "rec_offset: 0x%lx (%lu) (%d)",
- (ulong) LSN_FILE_NO(current_page),
- (uint3korr(page + 3) !=
- LSN_FILE_NO(current_page)),
- (ulong) LSN_OFFSET(current_page),
- (ulong) (LSN_OFFSET(current_page) /
- TRANSLOG_PAGE_SIZE),
- (uint3korr(page) !=
- LSN_OFFSET(current_page) /
- TRANSLOG_PAGE_SIZE)));
- old_log_was_recovered= 1;
- break;
- }
- old_flags= page[TRANSLOG_PAGE_FLAGS];
- last_valid_page= current_page;
- current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
- } while (current_page <= current_file_last_page);
+ TRANSLOG_VALIDATOR_DATA data;
+ TRANSLOG_PAGE_SIZE_BUFF psize_buff;
+ uchar *page;
+ data.addr= &current_page;
+ if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
+ goto err;
+ if (data.was_recovered)
+ {
+ DBUG_PRINT("error", ("file no: %lu (%d) "
+ "rec_offset: 0x%lx (%lu) (%d)",
+ (ulong) LSN_FILE_NO(current_page),
+ (uint3korr(page + 3) !=
+ LSN_FILE_NO(current_page)),
+ (ulong) LSN_OFFSET(current_page),
+ (ulong) (LSN_OFFSET(current_page) /
+ TRANSLOG_PAGE_SIZE),
+ (uint3korr(page) !=
+ LSN_OFFSET(current_page) /
+ TRANSLOG_PAGE_SIZE)));
+ old_log_was_recovered= 1;
+ break;
+ }
+ old_flags= page[TRANSLOG_PAGE_FLAGS];
+ last_valid_page= current_page;
+ current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
+ } while (current_page <= current_file_last_page);
+ }
current_page+= LSN_ONE_FILE;
current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
} while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
@@ -4014,7 +4034,7 @@ my_bool translog_init_with_table(const char *directory,
}
DBUG_PRINT("info", ("Logs found: %d was recovered: %d",
logs_found, old_log_was_recovered));
- if (!logs_found)
+ if (!logs_found && !readonly)
{
TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(PSI_INSTRUMENT_ME,
sizeof(TRANSLOG_FILE), MYF(MY_WME));
@@ -4064,6 +4084,10 @@ my_bool translog_init_with_table(const char *directory,
translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
}
+ else if (readonly && !logs_found)
+ {
+ log_descriptor.horizon= LSN_IMPOSSIBLE;
+ }
/* all LSNs that are on disk are flushed */
log_descriptor.log_start= log_descriptor.sent_to_disk=
@@ -4145,21 +4169,24 @@ my_bool translog_init_with_table(const char *directory,
uint32 file_no= LSN_FILE_NO(page_addr);
my_bool last_page_ok;
/* it is beginning of the current file */
- if (unlikely(file_no == 1))
+ do
{
- /*
- It is beginning of the log => there is no LSNs in the log =>
- There is no harm in leaving it "as-is".
+ if (unlikely(file_no == 1))
+ {
+ /*
+ It is beginning of the log => there is no LSNs in the log =>
+ There is no harm in leaving it "as-is".
*/
- log_descriptor.previous_flush_horizon= log_descriptor.horizon;
- DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
- LSN_IN_PARTS(log_descriptor.
+ log_descriptor.previous_flush_horizon= log_descriptor.horizon;
+ DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
+ LSN_IN_PARTS(log_descriptor.
previous_flush_horizon)));
- DBUG_RETURN(0);
- }
- file_no--;
- page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
- translog_get_last_page_addr(&page_addr, &last_page_ok, 0);
+ DBUG_RETURN(0);
+ }
+ file_no--;
+ page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
+ translog_get_last_page_addr(&page_addr, &last_page_ok, 0);
+ } while (LSN_OFFSET(page_addr) == 0);
/* page should be OK as it is not the last file */
DBUG_ASSERT(last_page_ok);
}
@@ -6905,17 +6932,19 @@ translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
/* if it is log end it have to be caught before */
DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
LSN_FILE_NO(scanner->page_addr));
- scanner->page_addr+= LSN_ONE_FILE;
- scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
- TRANSLOG_PAGE_SIZE);
- if (translog_scanner_set_last_page(scanner))
- DBUG_RETURN(1);
+ do
+ {
+ scanner->page_addr+= LSN_ONE_FILE;
+ scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
+ TRANSLOG_PAGE_SIZE);
+ if (translog_scanner_set_last_page(scanner))
+ DBUG_RETURN(1);
+ } while (!LSN_OFFSET(scanner->last_file_page));
}
else
{
scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
}
-
if (translog_scanner_get_page(scanner))
DBUG_RETURN(1);
@@ -6926,7 +6955,9 @@ translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
scanner->page_offset= 0;
DBUG_RETURN(0);
}
+#ifdef CHECK_EMPTY_PAGE
DBUG_ASSERT(scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
+#endif
}
DBUG_RETURN(0);
}
diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h
index 3e5c58a8053..abe85a12727 100644
--- a/storage/maria/ma_loghandler.h
+++ b/storage/maria/ma_loghandler.h
@@ -25,7 +25,11 @@
/* minimum possible transaction log size */
#define TRANSLOG_MIN_FILE_SIZE (8*MB)
/* transaction log default flags (TODO: make it global variable) */
+#ifdef HAVE_DBUG_TRANSLOG_CRC
+#define TRANSLOG_DEFAULT_FLAGS IF_DBUG(TRANSLOG_PAGE_CRC,0)
+#else
#define TRANSLOG_DEFAULT_FLAGS 0
+#endif
/*
Transaction log flags.
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c
index 144b10a86da..c4c85d0bdd0 100644
--- a/storage/maria/ma_pagecache.c
+++ b/storage/maria/ma_pagecache.c
@@ -3876,7 +3876,7 @@ restart:
{
pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_ASSERT(0);
- return (uchar*) 0;
+ DBUG_RETURN((uchar*) 0);
}
}
/*
@@ -5227,7 +5227,7 @@ int flush_pagecache_blocks_with_filter(PAGECACHE *pagecache,
{
int res;
DBUG_ENTER("flush_pagecache_blocks_with_filter");
- DBUG_PRINT("enter", ("pagecache: %p", pagecache));
+ DBUG_PRINT("enter", ("pagecache: %p fd: %di", pagecache, file->file));
if (pagecache->disk_blocks <= 0)
DBUG_RETURN(0);
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
index 006c8bef672..90d0ed3c708 100644
--- a/storage/maria/ma_recovery.c
+++ b/storage/maria/ma_recovery.c
@@ -133,7 +133,7 @@ static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn,
static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id);
static int new_page(uint32 fileid, pgcache_page_no_t pageid, LSN rec_lsn,
struct st_dirty_page *dirty_page);
-static int close_all_tables(void);
+static int close_all_tables(my_bool force_end_newline);
static my_bool close_one_table(const char *name, TRANSLOG_ADDRESS addr);
static void print_redo_phase_progress(TRANSLOG_ADDRESS addr);
static void delete_all_transactions();
@@ -467,7 +467,7 @@ int maria_apply_log(LSN from_lsn, LSN end_redo_lsn, LSN end_undo_lsn,
we don't use maria_panic() because it would maria_end(), and Recovery does
not want that (we want to keep some modules initialized for runtime).
*/
- if (close_all_tables())
+ if (close_all_tables(0))
{
ma_message_no_user(0, "closing of tables failed");
goto err;
@@ -495,6 +495,8 @@ int maria_apply_log(LSN from_lsn, LSN end_redo_lsn, LSN end_undo_lsn,
/* No dirty pages, all tables are closed, no active transactions, save: */
if (ma_checkpoint_execute(CHECKPOINT_FULL, FALSE))
goto err;
+ tprint(tracef, "checkpoint done at " LSN_FMT "\n",
+ LSN_IN_PARTS(last_checkpoint_lsn));
}
goto end;
@@ -505,7 +507,7 @@ err2:
delete_all_transactions();
if (!abort_message_printed)
error= 1;
- if (close_all_tables())
+ if (close_all_tables(1))
{
ma_message_no_user(0, "closing of tables failed");
}
@@ -3472,7 +3474,7 @@ static int new_page(uint32 fileid, pgcache_page_no_t pageid, LSN rec_lsn,
}
-static int close_all_tables(void)
+static int close_all_tables(my_bool force_end_newline)
{
int error= 0;
uint count= 0;
@@ -3537,7 +3539,7 @@ static int close_all_tables(void)
}
}
end:
- if (recovery_message_printed == REC_MSG_FLUSH)
+ if (recovery_message_printed == REC_MSG_FLUSH && (force_end_newline || error))
{
fputc('\n', stderr);
fflush(stderr);
diff --git a/storage/maria/ma_recovery_util.c b/storage/maria/ma_recovery_util.c
index fe43d812600..b8123c422c1 100644
--- a/storage/maria/ma_recovery_util.c
+++ b/storage/maria/ma_recovery_util.c
@@ -87,7 +87,7 @@ void eprint(FILE *trace_file __attribute__ ((unused)),
if (!trace_file)
trace_file= stderr;
- if (procent_printed)
+ if (procent_printed && trace_file == stderr)
{
procent_printed= 0;
/* In silent mode, print on another line than the 0% 10% 20% line */
diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c
index 8cd82e1c6fc..7e43ed4befa 100644
--- a/storage/maria/ma_rkey.c
+++ b/storage/maria/ma_rkey.c
@@ -120,6 +120,7 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
/* The key references a concurrently inserted record. */
if (search_flag == HA_READ_KEY_EXACT &&
+ (keyinfo->flag & HA_NOSAME) &&
last_used_keyseg == keyinfo->seg + keyinfo->keysegs)
{
/* Simply ignore the key if it matches exactly. (Bug #29838) */
diff --git a/storage/maria/ma_scan.c b/storage/maria/ma_scan.c
index 5f2945a3078..3e789489090 100644
--- a/storage/maria/ma_scan.c
+++ b/storage/maria/ma_scan.c
@@ -48,10 +48,12 @@ int maria_scan_init(register MARIA_HA *info)
int maria_scan(MARIA_HA *info, uchar *record)
{
+ int res;
DBUG_ENTER("maria_scan");
/* Init all but update-flag */
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
- DBUG_RETURN((*info->s->scan)(info, record, info->cur_row.nextpos, 1));
+ res= (*info->s->scan)(info, record, info->cur_row.nextpos, 1);
+ DBUG_RETURN(res);
}
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
index 1dbee5d744a..88f3d22c205 100644
--- a/storage/maria/ma_write.c
+++ b/storage/maria/ma_write.c
@@ -428,14 +428,15 @@ err2:
my_bool _ma_ck_write(MARIA_HA *info, MARIA_KEY *key)
{
+ my_bool tmp;
DBUG_ENTER("_ma_ck_write");
if (info->bulk_insert &&
is_tree_inited(&info->bulk_insert[key->keyinfo->key_nr]))
- {
- DBUG_RETURN(_ma_ck_write_tree(info, key));
- }
- DBUG_RETURN(_ma_ck_write_btree(info, key));
+ tmp= _ma_ck_write_tree(info, key);
+ else
+ tmp= _ma_ck_write_btree(info, key);
+ DBUG_RETURN(tmp);
} /* _ma_ck_write */
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
index c7aef97072b..dc164dfce14 100644
--- a/storage/maria/maria_def.h
+++ b/storage/maria/maria_def.h
@@ -43,6 +43,14 @@
C_MODE_START
+#ifdef _WIN32
+/*
+ We cannot use mmap() on Windows with Aria as mmap() can cause file
+ size to increase in _ma_dynmap_file(). The extra \0 data causes
+ the file to be regarded as corrupted.
+*/
+#undef HAVE_MMAP
+#endif
/*
Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details
*/
@@ -213,7 +221,6 @@ extern int maria_rsame_with_pos(MARIA_HA *file, uchar *record,
extern int maria_update(MARIA_HA *file, const uchar *old,
const uchar *new_record);
extern int maria_write(MARIA_HA *file, const uchar *buff);
-extern MARIA_RECORD_POS maria_position(MARIA_HA *file);
extern int maria_status(MARIA_HA *info, MARIA_INFO *x, uint flag);
extern int maria_lock_database(MARIA_HA *file, int lock_type);
extern int maria_delete_table(const char *name);
@@ -1011,6 +1018,7 @@ struct st_maria_handler
my_bool switched_transactional;
/* If transaction will autocommit */
my_bool autocommit;
+ my_bool has_cond_pushdown;
#ifdef _WIN32
my_bool owned_by_merge; /* This Maria table is part of a merge union */
#endif
@@ -1022,6 +1030,8 @@ struct st_maria_handler
my_bool create_unique_index_by_sort;
index_cond_func_t index_cond_func; /* Index condition function */
void *index_cond_func_arg; /* parameter for the func */
+ rowid_filter_func_t rowid_filter_func; /* rowid filter check function */
+ void *rowid_filter_func_arg; /* parameter for the func */
};
/* Table options for the Aria and S3 storage engine */
@@ -1063,6 +1073,7 @@ struct ha_table_option_struct
#define STATE_IN_REPAIR 1024U /* We are running repair on table */
#define STATE_CRASHED_PRINTED 2048U
#define STATE_DATA_FILE_FULL 4096U
+#define STATE_HAS_LSN 8192U /* Some page still has LSN */
#define STATE_CRASHED_FLAGS (STATE_CRASHED | STATE_CRASHED_ON_REPAIR | STATE_CRASHED_PRINTED)
@@ -1346,7 +1357,11 @@ extern int _ma_read_rnd_no_record(MARIA_HA *info, uchar *buf,
MARIA_RECORD_POS filepos,
my_bool skip_deleted_blocks);
my_off_t _ma_no_keypos_to_recpos(MARIA_SHARE *share, my_off_t pos);
-
+/* Get position to last record */
+static inline MARIA_RECORD_POS maria_position(MARIA_HA *info)
+{
+ return info->cur_row.lastpos;
+}
extern my_bool _ma_ck_write(MARIA_HA *info, MARIA_KEY *key);
extern my_bool _ma_enlarge_root(MARIA_HA *info, MARIA_KEY *key,
MARIA_RECORD_POS *root);
@@ -1733,7 +1748,25 @@ extern my_bool maria_flush_log_for_page_none(PAGECACHE_IO_HOOK_ARGS *args);
extern PAGECACHE *maria_log_pagecache;
extern void ma_set_index_cond_func(MARIA_HA *info, index_cond_func_t func,
void *func_arg);
-check_result_t ma_check_index_cond(MARIA_HA *info, uint keynr, uchar *record);
+extern void ma_set_rowid_filter_func(MARIA_HA *info,
+ rowid_filter_func_t check_func,
+ void *func_arg);
+static inline void ma_reset_index_filter_functions(MARIA_HA *info)
+{
+ info->index_cond_func= NULL;
+ info->rowid_filter_func= NULL;
+ info->has_cond_pushdown= 0;
+}
+check_result_t ma_check_index_cond_real(MARIA_HA *info, uint keynr,
+ uchar *record);
+static inline check_result_t ma_check_index_cond(MARIA_HA *info, uint keynr,
+ uchar *record)
+{
+ if (!info->has_cond_pushdown)
+ return CHECK_POS;
+ return ma_check_index_cond_real(info, keynr, record);
+}
+
extern my_bool ma_yield_and_check_if_killed(MARIA_HA *info, int inx);
extern my_bool ma_killed_standalone(MARIA_HA *);
diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp
index 7787f8b83b5..85d6473ded3 100644
--- a/storage/mroonga/ha_mroonga.cpp
+++ b/storage/mroonga/ha_mroonga.cpp
@@ -4308,6 +4308,7 @@ int ha_mroonga::wrapper_open(const char *name, int mode, uint open_options)
wrap_handler->set_ha_share_ref(&table->s->ha_share);
#endif
error = wrap_handler->ha_open(table, name, mode, open_options);
+ wrap_handler->set_optimizer_costs(ha_thd());
} else {
if (!(wrap_handler = parent_for_clone->wrap_handler->clone(name,
mem_root_for_clone)))
@@ -12313,6 +12314,7 @@ ha_rows ha_mroonga::wrapper_multi_range_read_info_const(uint keyno,
uint n_ranges,
uint *bufsz,
uint *flags,
+ ha_rows limit,
Cost_estimate *cost)
{
MRN_DBUG_ENTER_METHOD();
@@ -12320,7 +12322,8 @@ ha_rows ha_mroonga::wrapper_multi_range_read_info_const(uint keyno,
KEY *key_info = &(table->key_info[keyno]);
if (mrn_is_geo_key(key_info)) {
rows = handler::multi_range_read_info_const(keyno, seq, seq_init_param,
- n_ranges, bufsz, flags, cost);
+ n_ranges, bufsz, flags, limit,
+ cost);
DBUG_RETURN(rows);
}
MRN_SET_WRAP_SHARE_KEY(share, table->s);
@@ -12329,7 +12332,7 @@ ha_rows ha_mroonga::wrapper_multi_range_read_info_const(uint keyno,
set_pk_bitmap();
rows = wrap_handler->multi_range_read_info_const(keyno, seq, seq_init_param,
n_ranges, bufsz, flags,
- cost);
+ limit, cost);
MRN_SET_BASE_SHARE_KEY(share, table->s);
MRN_SET_BASE_TABLE_KEY(this, table);
DBUG_RETURN(rows);
@@ -12341,20 +12344,21 @@ ha_rows ha_mroonga::storage_multi_range_read_info_const(uint keyno,
uint n_ranges,
uint *bufsz,
uint *flags,
+ ha_rows limit,
Cost_estimate *cost)
{
MRN_DBUG_ENTER_METHOD();
ha_rows rows = handler::multi_range_read_info_const(keyno, seq,
seq_init_param,
n_ranges, bufsz, flags,
- cost);
+ limit, cost);
DBUG_RETURN(rows);
}
ha_rows ha_mroonga::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
- uint *flags,
+ uint *flags, ha_rows limit,
Cost_estimate *cost)
{
MRN_DBUG_ENTER_METHOD();
@@ -12363,11 +12367,11 @@ ha_rows ha_mroonga::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
{
rows = wrapper_multi_range_read_info_const(keyno, seq, seq_init_param,
n_ranges, bufsz,
- flags, cost);
+ flags, limit, cost);
} else {
rows = storage_multi_range_read_info_const(keyno, seq, seq_init_param,
n_ranges, bufsz,
- flags, cost);
+ flags, limit, cost);
}
DBUG_RETURN(rows);
}
@@ -13008,9 +13012,9 @@ int ha_mroonga::truncate()
DBUG_RETURN(error);
}
-double ha_mroonga::wrapper_scan_time()
+IO_AND_CPU_COST ha_mroonga::wrapper_scan_time()
{
- double res;
+ IO_AND_CPU_COST res;
MRN_DBUG_ENTER_METHOD();
MRN_SET_WRAP_SHARE_KEY(share, table->s);
MRN_SET_WRAP_TABLE_KEY(this, table);
@@ -13020,17 +13024,16 @@ double ha_mroonga::wrapper_scan_time()
DBUG_RETURN(res);
}
-double ha_mroonga::storage_scan_time()
+IO_AND_CPU_COST ha_mroonga::storage_scan_time()
{
MRN_DBUG_ENTER_METHOD();
- double time = handler::scan_time();
- DBUG_RETURN(time);
+ DBUG_RETURN(handler::scan_time());
}
-double ha_mroonga::scan_time()
+IO_AND_CPU_COST ha_mroonga::scan_time()
{
MRN_DBUG_ENTER_METHOD();
- double time;
+ IO_AND_CPU_COST time;
if (share->wrapper_mode)
{
time = wrapper_scan_time();
@@ -13040,51 +13043,87 @@ double ha_mroonga::scan_time()
DBUG_RETURN(time);
}
-double ha_mroonga::wrapper_read_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_mroonga::wrapper_rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST res;
+ MRN_DBUG_ENTER_METHOD();
+ MRN_SET_WRAP_SHARE_KEY(share, table->s);
+ MRN_SET_WRAP_TABLE_KEY(this, table);
+ res = wrap_handler->rnd_pos_time(rows);
+ MRN_SET_BASE_SHARE_KEY(share, table->s);
+ MRN_SET_BASE_TABLE_KEY(this, table);
+ DBUG_RETURN(res);
+}
+
+IO_AND_CPU_COST ha_mroonga::storage_rnd_pos_time(ha_rows rows)
{
- double res;
+ MRN_DBUG_ENTER_METHOD();
+ IO_AND_CPU_COST time = handler::rnd_pos_time(rows);
+ DBUG_RETURN(time);
+}
+
+
+IO_AND_CPU_COST ha_mroonga::rnd_pos_time(ha_rows rows)
+{
+ MRN_DBUG_ENTER_METHOD();
+ IO_AND_CPU_COST time;
+ if (share->wrapper_mode)
+ {
+ time = wrapper_rnd_pos_time(rows);
+ } else {
+ time = storage_rnd_pos_time(rows);
+ }
+ DBUG_RETURN(time);
+}
+
+
+IO_AND_CPU_COST ha_mroonga::wrapper_keyread_time(uint index, ulong ranges,
+ ha_rows rows, ulonglong blocks)
+{
+ IO_AND_CPU_COST res;
MRN_DBUG_ENTER_METHOD();
if (index < MAX_KEY) {
KEY *key_info = &(table->key_info[index]);
if (mrn_is_geo_key(key_info)) {
- res = handler::read_time(index, ranges, rows);
+ res = handler::keyread_time(index, ranges, rows, blocks);
DBUG_RETURN(res);
}
MRN_SET_WRAP_SHARE_KEY(share, table->s);
MRN_SET_WRAP_TABLE_KEY(this, table);
- res = wrap_handler->read_time(share->wrap_key_nr[index], ranges, rows);
+ res = wrap_handler->keyread_time(share->wrap_key_nr[index], ranges, rows, blocks);
MRN_SET_BASE_SHARE_KEY(share, table->s);
MRN_SET_BASE_TABLE_KEY(this, table);
} else {
MRN_SET_WRAP_SHARE_KEY(share, table->s);
MRN_SET_WRAP_TABLE_KEY(this, table);
- res = wrap_handler->read_time(index, ranges, rows);
+ res = wrap_handler->keyread_time(index, ranges, rows, blocks);
MRN_SET_BASE_SHARE_KEY(share, table->s);
MRN_SET_BASE_TABLE_KEY(this, table);
}
DBUG_RETURN(res);
}
-double ha_mroonga::storage_read_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_mroonga::storage_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks)
{
MRN_DBUG_ENTER_METHOD();
- double time = handler::read_time(index, ranges, rows);
+ IO_AND_CPU_COST time = handler::keyread_time(index, ranges, rows, blocks);
DBUG_RETURN(time);
}
-double ha_mroonga::read_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_mroonga::keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks)
{
MRN_DBUG_ENTER_METHOD();
- double time;
+ IO_AND_CPU_COST time;
if (share->wrapper_mode)
{
- time = wrapper_read_time(index, ranges, rows);
+ time = wrapper_keyread_time(index, ranges, rows, blocks);
} else {
- time = storage_read_time(index, ranges, rows);
+ time = storage_keyread_time(index, ranges, rows, blocks);
}
DBUG_RETURN(time);
}
+
#ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
const key_map *ha_mroonga::wrapper_keys_to_use_for_scanning()
{
diff --git a/storage/mroonga/ha_mroonga.hpp b/storage/mroonga/ha_mroonga.hpp
index 66767899e21..27219ffd158 100644
--- a/storage/mroonga/ha_mroonga.hpp
+++ b/storage/mroonga/ha_mroonga.hpp
@@ -505,7 +505,8 @@ public:
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
- uint *flags, Cost_estimate *cost) mrn_override;
+ uint *flags, ha_rows limit,
+ Cost_estimate *cost) mrn_override;
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
#ifdef MRN_HANDLER_HAVE_MULTI_RANGE_READ_INFO_KEY_PARTS
uint key_parts,
@@ -531,8 +532,9 @@ public:
int end_bulk_insert() mrn_override;
int delete_all_rows() mrn_override;
int truncate() mrn_override;
- double scan_time() mrn_override;
- double read_time(uint index, uint ranges, ha_rows rows) mrn_override;
+ IO_AND_CPU_COST scan_time() mrn_override;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) mrn_override;
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks) mrn_override;
#ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
const key_map *keys_to_use_for_scanning() mrn_override;
#endif
@@ -1056,6 +1058,7 @@ private:
uint n_ranges,
uint *bufsz,
uint *flags,
+ ha_rows limit,
Cost_estimate *cost);
ha_rows storage_multi_range_read_info_const(uint keyno,
RANGE_SEQ_IF *seq,
@@ -1063,6 +1066,7 @@ private:
uint n_ranges,
uint *bufsz,
uint *flags,
+ ha_rows limit,
Cost_estimate *cost);
ha_rows wrapper_multi_range_read_info(uint keyno, uint n_ranges, uint keys,
#ifdef MRN_HANDLER_HAVE_MULTI_RANGE_READ_INFO_KEY_PARTS
@@ -1106,10 +1110,12 @@ private:
int wrapper_truncate_index();
int storage_truncate();
int storage_truncate_index();
- double wrapper_scan_time();
- double storage_scan_time();
- double wrapper_read_time(uint index, uint ranges, ha_rows rows);
- double storage_read_time(uint index, uint ranges, ha_rows rows);
+ IO_AND_CPU_COST wrapper_scan_time();
+ IO_AND_CPU_COST storage_scan_time();
+ IO_AND_CPU_COST wrapper_rnd_pos_time(ha_rows rows);
+ IO_AND_CPU_COST storage_rnd_pos_time(ha_rows rows);
+ IO_AND_CPU_COST wrapper_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks);
+ IO_AND_CPU_COST storage_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks);
#ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
const key_map *wrapper_keys_to_use_for_scanning();
const key_map *storage_keys_to_use_for_scanning();
diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/optimization_count_skip_index_not_equal.result b/storage/mroonga/mysql-test/mroonga/storage/r/optimization_count_skip_index_not_equal.result
index a1a123e7d5f..837ca2b6381 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/r/optimization_count_skip_index_not_equal.result
+++ b/storage/mroonga/mysql-test/mroonga/storage/r/optimization_count_skip_index_not_equal.result
@@ -9,6 +9,9 @@ INSERT INTO users (age) VALUES (28);
INSERT INTO users (age) VALUES (29);
INSERT INTO users (age) VALUES (29);
INSERT INTO users (age) VALUES (29);
+explain SELECT COUNT(*) FROM users WHERE age <> 29;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE users range age age 5 NULL 4 Using where; Using index
SELECT COUNT(*) FROM users WHERE age <> 29;
COUNT(*)
2
diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/optimization_count_skip_index_not_equal.test b/storage/mroonga/mysql-test/mroonga/storage/t/optimization_count_skip_index_not_equal.test
index 3948d218a69..7e6cf5f510b 100644
--- a/storage/mroonga/mysql-test/mroonga/storage/t/optimization_count_skip_index_not_equal.test
+++ b/storage/mroonga/mysql-test/mroonga/storage/t/optimization_count_skip_index_not_equal.test
@@ -33,6 +33,7 @@ INSERT INTO users (age) VALUES (29);
INSERT INTO users (age) VALUES (29);
INSERT INTO users (age) VALUES (29);
+explain SELECT COUNT(*) FROM users WHERE age <> 29;
SELECT COUNT(*) FROM users WHERE age <> 29;
SHOW STATUS LIKE 'mroonga_count_skip';
diff --git a/storage/mroonga/mysql-test/mroonga/wrapper/r/geometry_contains.result b/storage/mroonga/mysql-test/mroonga/wrapper/r/geometry_contains.result
index 550554eac8c..6dd6dd25f3f 100644
--- a/storage/mroonga/mysql-test/mroonga/wrapper/r/geometry_contains.result
+++ b/storage/mroonga/mysql-test/mroonga/wrapper/r/geometry_contains.result
@@ -154,7 +154,7 @@ id name location_text
select id, name, ST_AsText(location) as location_text from shops
where MBRContains(ST_GeomFromText('LineString(139.7727 35.6684, 139.7038 35.7121)'), location);
id name location_text
+26 kazuya POINT(139.760895 35.673508)
14 tetsuji POINT(139.76857 35.680912)
19 daruma POINT(139.770599 35.681461)
-26 kazuya POINT(139.760895 35.673508)
drop table shops;
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index c0419da7e71..b1b1e8fd57e 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -804,6 +804,17 @@ ulong ha_myisam::index_flags(uint inx, uint part, bool all_parts) const
return flags;
}
+IO_AND_CPU_COST ha_myisam::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost= handler::rnd_pos_time(rows);
+ /*
+ Row data is not cached. costs.row_lookup_cost includes the cost of
+ the reading the row from system (probably cached by the OS).
+ */
+ cost.io= 0;
+ return cost;
+}
+
/* Name is here without an extension */
int ha_myisam::open(const char *name, int mode, uint test_if_locked)
@@ -1960,9 +1971,8 @@ int ha_myisam::index_init(uint idx, bool sorted)
active_index=idx;
if (pushed_idx_cond_keyno == idx)
mi_set_index_cond_func(file, handler_index_cond_check, this);
- if (pushed_rowid_filter)
- mi_set_rowid_filter_func(file, handler_rowid_filter_check,
- handler_rowid_filter_is_active, this);
+ if (pushed_rowid_filter && handler_rowid_filter_is_active(this))
+ mi_set_rowid_filter_func(file, handler_rowid_filter_check, this);
return 0;
}
@@ -1970,11 +1980,10 @@ int ha_myisam::index_init(uint idx, bool sorted)
int ha_myisam::index_end()
{
DBUG_ENTER("ha_myisam::index_end");
- active_index=MAX_KEY;
- //pushed_idx_cond_keyno= MAX_KEY;
+ active_index= MAX_KEY;
mi_set_index_cond_func(file, NULL, 0);
in_range_check_pushed_down= FALSE;
- mi_set_rowid_filter_func(file, NULL, NULL, 0);
+ mi_set_rowid_filter_func(file, NULL, 0);
ds_mrr.dsmrr_close();
#if !defined(DBUG_OFF) && defined(SQL_SELECT_FIXED_FOR_UPDATE)
file->update&= ~HA_STATE_AKTIV; // Forget active row
@@ -2010,9 +2019,8 @@ int ha_myisam::index_read_idx_map(uchar *buf, uint index, const uchar *key,
end_range= NULL;
if (index == pushed_idx_cond_keyno)
mi_set_index_cond_func(file, handler_index_cond_check, this);
- if (pushed_rowid_filter)
- mi_set_rowid_filter_func(file, handler_rowid_filter_check,
- handler_rowid_filter_is_active, this);
+ if (pushed_rowid_filter && handler_rowid_filter_is_active(this))
+ mi_set_rowid_filter_func(file, handler_rowid_filter_check, this);
res= mi_rkey(file, buf, index, key, keypart_map, find_flag);
mi_set_index_cond_func(file, NULL, 0);
return res;
@@ -2585,6 +2593,22 @@ static int myisam_drop_table(handlerton *hton, const char *path)
return mi_delete_table(path);
}
+
+void myisam_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ MyISAM row lookup costs are slow as the row data is not cached
+ The following numbers where found by check_costs.pl when using 1M rows
+ and all rows are cached. See optimizer_costs.txt
+ */
+ costs->row_next_find_cost= 0.000063539;
+ costs->row_lookup_cost= 0.001014818;
+ costs->key_next_find_cost= 0.000090585;
+ costs->key_lookup_cost= 0.000550142;
+ costs->key_copy_cost= 0.000015685;
+}
+
+
static int myisam_init(void *p)
{
handlerton *hton;
@@ -2604,6 +2628,7 @@ static int myisam_init(void *p)
hton->create= myisam_create_handler;
hton->drop_table= myisam_drop_table;
hton->panic= myisam_panic;
+ hton->update_optimizer_costs= myisam_update_optimizer_costs;
hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
hton->tablefile_extensions= ha_myisam_exts;
mi_killed= mi_killed_in_mariadb;
@@ -2643,7 +2668,8 @@ int ha_myisam::multi_range_read_next(range_id_t *range_info)
ha_rows ha_myisam::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
- uint *flags, Cost_estimate *cost)
+ uint *flags, ha_rows limit,
+ Cost_estimate *cost)
{
/*
This call is here because there is no location where this->table would
@@ -2652,7 +2678,7 @@ ha_rows ha_myisam::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
*/
ds_mrr.init(this, table);
return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
- flags, cost);
+ flags, limit, cost);
}
ha_rows ha_myisam::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
@@ -2707,12 +2733,23 @@ Item *ha_myisam::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
bool ha_myisam::rowid_filter_push(Rowid_filter* rowid_filter)
{
+ /* This will be used in index_init() */
pushed_rowid_filter= rowid_filter;
- mi_set_rowid_filter_func(file, handler_rowid_filter_check,
- handler_rowid_filter_is_active, this);
return false;
}
+
+/* Enable / disable rowid filter depending if it's active or not */
+
+void ha_myisam::rowid_filter_changed()
+{
+ if (pushed_rowid_filter && handler_rowid_filter_is_active(this))
+ mi_set_rowid_filter_func(file, handler_rowid_filter_check, this);
+ else
+ mi_set_rowid_filter_func(file, NULL, this);
+}
+
+
struct st_mysql_storage_engine myisam_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index c4c46a63afa..0914d531788 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -54,125 +54,132 @@ class ha_myisam final : public handler
public:
ha_myisam(handlerton *hton, TABLE_SHARE *table_arg);
~ha_myisam() = default;
- handler *clone(const char *name, MEM_ROOT *mem_root);
- const char *index_type(uint key_number);
- ulonglong table_flags() const { return int_table_flags; }
- int index_init(uint idx, bool sorted);
- int index_end();
- int rnd_end();
-
- ulong index_flags(uint inx, uint part, bool all_parts) const;
- uint max_supported_keys() const { return MI_MAX_KEY; }
- uint max_supported_key_parts() const { return HA_MAX_KEY_SEG; }
- uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; }
- uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; }
- void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share);
- int open(const char *name, int mode, uint test_if_locked);
- int close(void);
- int write_row(const uchar * buf);
- int update_row(const uchar * old_data, const uchar * new_data);
- int delete_row(const uchar * buf);
+ handler *clone(const char *name, MEM_ROOT *mem_root) override;
+ const char *index_type(uint key_number) override;
+ ulonglong table_flags() const override { return int_table_flags; }
+ int index_init(uint idx, bool sorted) override;
+ int index_end() override;
+ int rnd_end() override;
+
+ ulong index_flags(uint inx, uint part, bool all_parts) const override;
+ uint max_supported_keys() const override { return MI_MAX_KEY; }
+ uint max_supported_key_parts() const override { return HA_MAX_KEY_SEG; }
+ uint max_supported_key_length() const override { return HA_MAX_KEY_LENGTH; }
+ uint max_supported_key_part_length() const override
+ { return HA_MAX_KEY_LENGTH; }
+ void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) override;
+ int open(const char *name, int mode, uint test_if_locked) override;
+ int close(void) override;
+ int write_row(const uchar * buf) override;
+ int update_row(const uchar * old_data, const uchar * new_data) override;
+ int delete_row(const uchar * buf) override;
int index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map,
- enum ha_rkey_function find_flag);
+ enum ha_rkey_function find_flag) override;
int index_read_idx_map(uchar *buf, uint index, const uchar *key,
key_part_map keypart_map,
- enum ha_rkey_function find_flag);
- int index_next(uchar * buf);
- int index_prev(uchar * buf);
- int index_first(uchar * buf);
- int index_last(uchar * buf);
- int index_next_same(uchar *buf, const uchar *key, uint keylen);
- int ft_init()
+ enum ha_rkey_function find_flag) override;
+ int index_next(uchar * buf) override;
+ int index_prev(uchar * buf) override;
+ int index_first(uchar * buf) override;
+ int index_last(uchar * buf) override;
+ int index_next_same(uchar *buf, const uchar *key, uint keylen) override;
+ int ft_init() override
{
if (!ft_handler)
return 1;
ft_handler->please->reinit_search(ft_handler);
return 0;
}
- FT_INFO *ft_init_ext(uint flags, uint inx,String *key)
+ FT_INFO *ft_init_ext(uint flags, uint inx,String *key) override
{
return ft_init_search(flags,file,inx,
(uchar *)key->ptr(), key->length(), key->charset(),
table->record[0]);
}
- int ft_read(uchar *buf);
- int rnd_init(bool scan);
- int rnd_next(uchar *buf);
- int rnd_pos(uchar * buf, uchar *pos);
- int remember_rnd_pos();
- int restart_rnd_next(uchar *buf);
- void position(const uchar *record);
- int info(uint);
- int extra(enum ha_extra_function operation);
- int extra_opt(enum ha_extra_function operation, ulong cache_size);
- int reset(void);
- int external_lock(THD *thd, int lock_type);
- int delete_all_rows(void);
- int reset_auto_increment(ulonglong value);
- int disable_indexes(uint mode);
- int enable_indexes(uint mode);
- int indexes_are_disabled(void);
- void start_bulk_insert(ha_rows rows, uint flags);
- int end_bulk_insert();
+ int ft_read(uchar *buf) override;
+ int rnd_init(bool scan) override;
+ int rnd_next(uchar *buf) override;
+ int rnd_pos(uchar * buf, uchar *pos) override;
+ int remember_rnd_pos() override;
+ int restart_rnd_next(uchar *buf) override;
+ void position(const uchar *record) override;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
+ int info(uint) override;
+ int extra(enum ha_extra_function operation) override;
+ int extra_opt(enum ha_extra_function operation, ulong cache_size) override;
+ int reset(void) override;
+ int external_lock(THD *thd, int lock_type) override;
+ int delete_all_rows(void) override;
+ int reset_auto_increment(ulonglong value) override;
+ int disable_indexes(uint mode) override;
+ int enable_indexes(uint mode) override;
+ int indexes_are_disabled(void) override;
+ void start_bulk_insert(ha_rows rows, uint flags) override;
+ int end_bulk_insert() override;
ha_rows records_in_range(uint inx, const key_range *min_key,
- const key_range *max_key, page_range *pages);
- void update_create_info(HA_CREATE_INFO *create_info);
- int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info);
+ const key_range *max_key, page_range *pages) override;
+ void update_create_info(HA_CREATE_INFO *create_info) override;
+ int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info) override;
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
- enum thr_lock_type lock_type);
- virtual void get_auto_increment(ulonglong offset, ulonglong increment,
- ulonglong nb_desired_values,
- ulonglong *first_value,
- ulonglong *nb_reserved_values);
- int rename_table(const char * from, const char * to);
- int delete_table(const char *name);
- int check_for_upgrade(HA_CHECK_OPT *check_opt);
- int check(THD* thd, HA_CHECK_OPT* check_opt);
- int analyze(THD* thd,HA_CHECK_OPT* check_opt);
- int repair(THD* thd, HA_CHECK_OPT* check_opt);
- bool check_and_repair(THD *thd);
- bool is_crashed() const;
- bool auto_repair(int error) const
+ enum thr_lock_type lock_type) override;
+ void get_auto_increment(ulonglong offset, ulonglong increment,
+ ulonglong nb_desired_values,
+ ulonglong *first_value,
+ ulonglong *nb_reserved_values) override;
+ int rename_table(const char * from, const char * to) override;
+ int delete_table(const char *name) override;
+ int check_for_upgrade(HA_CHECK_OPT *check_opt) override;
+ int check(THD* thd, HA_CHECK_OPT* check_opt) override;
+ int analyze(THD* thd,HA_CHECK_OPT* check_opt) override;
+ int repair(THD* thd, HA_CHECK_OPT* check_opt) override;
+ bool check_and_repair(THD *thd) override;
+ bool is_crashed() const override;
+ bool auto_repair(int error) const override
{
return (myisam_recover_options != HA_RECOVER_OFF &&
error == HA_ERR_CRASHED_ON_USAGE);
}
- int optimize(THD* thd, HA_CHECK_OPT* check_opt);
- int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt);
- int preload_keys(THD* thd, HA_CHECK_OPT* check_opt);
+ int optimize(THD* thd, HA_CHECK_OPT* check_opt) override;
+ int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt) override;
+ int preload_keys(THD* thd, HA_CHECK_OPT* check_opt) override;
enum_alter_inplace_result check_if_supported_inplace_alter(TABLE *new_table,
- Alter_inplace_info *alter_info);
- bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes);
+ Alter_inplace_info *alter_info)
+ override;
+ bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes)
+ override;
#ifdef HAVE_QUERY_CACHE
my_bool register_query_cache_table(THD *thd, const char *table_key,
uint key_length,
qc_engine_callback
*engine_callback,
- ulonglong *engine_data);
+ ulonglong *engine_data) override;
#endif
- MI_INFO *file_ptr(void)
- {
- return file;
- }
-public:
/**
* Multi Range Read interface
*/
int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode, HANDLER_BUFFER *buf);
- int multi_range_read_next(range_id_t *range_info);
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf) override;
+ int multi_range_read_next(range_id_t *range_info) override;
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
- uint *flags, Cost_estimate *cost);
+ uint *flags, ha_rows limit,
+ Cost_estimate *cost) override;
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
uint key_parts, uint *bufsz,
- uint *flags, Cost_estimate *cost);
- int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size);
+ uint *flags, Cost_estimate *cost) override;
+ int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size) override;
/* Index condition pushdown implementation */
- Item *idx_cond_push(uint keyno, Item* idx_cond);
- bool rowid_filter_push(Rowid_filter* rowid_filter);
+ Item *idx_cond_push(uint keyno, Item* idx_cond) override;
+ bool rowid_filter_push(Rowid_filter* rowid_filter) override;
+ void rowid_filter_changed() override;
+
+ /* Used by myisammrg */
+ MI_INFO *file_ptr(void)
+ {
+ return file;
+ }
private:
DsMrr_impl ds_mrr;
diff --git a/storage/myisam/mi_extra.c b/storage/myisam/mi_extra.c
index 66238745a04..e7e64edd926 100644
--- a/storage/myisam/mi_extra.c
+++ b/storage/myisam/mi_extra.c
@@ -376,16 +376,16 @@ void mi_set_index_cond_func(MI_INFO *info, index_cond_func_t func,
{
info->index_cond_func= func;
info->index_cond_func_arg= func_arg;
+ info->has_cond_pushdown= (info->index_cond_func || info->rowid_filter_func);
}
void mi_set_rowid_filter_func(MI_INFO *info,
rowid_filter_func_t check_func,
- rowid_filter_is_active_func_t is_active_func,
void *func_arg)
{
info->rowid_filter_func= check_func;
- info->rowid_filter_is_active_func= is_active_func;
info->rowid_filter_func_arg= func_arg;
+ info->has_cond_pushdown= (info->index_cond_func || info->rowid_filter_func);
}
/*
diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c
index 087eb59c7c0..bde3ee19e2c 100644
--- a/storage/myisam/mi_key.c
+++ b/storage/myisam/mi_key.c
@@ -510,14 +510,6 @@ int mi_unpack_index_tuple(MI_INFO *info, uint keynr, uchar *record)
}
-static int mi_check_rowid_filter_is_active(MI_INFO *info)
-{
- if (info->rowid_filter_is_active_func == NULL)
- return 0;
- return info->rowid_filter_is_active_func(info->rowid_filter_func_arg);
-}
-
-
/*
Check the current index tuple: Check ICP condition and/or Rowid Filter
@@ -532,21 +524,23 @@ static int mi_check_rowid_filter_is_active(MI_INFO *info)
Check result according to check_result_t definition
*/
-check_result_t mi_check_index_tuple(MI_INFO *info, uint keynr, uchar *record)
+check_result_t mi_check_index_tuple_real(MI_INFO *info, uint keynr, uchar *record)
{
- int need_unpack= TRUE;
check_result_t res= CHECK_POS;
+ DBUG_ASSERT(info->index_cond_func || info->rowid_filter_func);
+
+ if (mi_unpack_index_tuple(info, keynr, record))
+ return CHECK_ERROR;
if (info->index_cond_func)
{
- if (mi_unpack_index_tuple(info, keynr, record))
- res= CHECK_ERROR;
- else if ((res= info->index_cond_func(info->index_cond_func_arg)) ==
- CHECK_OUT_OF_RANGE)
+ if ((res= info->index_cond_func(info->index_cond_func_arg)) ==
+ CHECK_OUT_OF_RANGE)
{
/* We got beyond the end of scanned range */
info->lastpos= HA_OFFSET_ERROR; /* No active record */
my_errno= HA_ERR_END_OF_FILE;
+ return res;
}
/*
@@ -555,25 +549,17 @@ check_result_t mi_check_index_tuple(MI_INFO *info, uint keynr, uchar *record)
*/
if (res != CHECK_POS)
return res;
-
- need_unpack= FALSE;
}
/* Check the Rowid Filter, if present */
- if (mi_check_rowid_filter_is_active(info))
+ if (info->rowid_filter_func)
{
- /* Unpack the index tuple if we haven't done it already */
- if (need_unpack && mi_unpack_index_tuple(info, keynr, record))
- res= CHECK_ERROR;
- else
+ if ((res= info->rowid_filter_func(info->rowid_filter_func_arg)) ==
+ CHECK_OUT_OF_RANGE)
{
- if ((res= info->rowid_filter_func(info->rowid_filter_func_arg)) ==
- CHECK_OUT_OF_RANGE)
- {
- /* We got beyond the end of scanned range */
- info->lastpos= HA_OFFSET_ERROR; /* No active record */
- my_errno= HA_ERR_END_OF_FILE;
- }
+ /* We got beyond the end of scanned range */
+ info->lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_END_OF_FILE;
}
}
return res;
diff --git a/storage/myisam/mi_rkey.c b/storage/myisam/mi_rkey.c
index bf6f3ef852c..590981fb790 100644
--- a/storage/myisam/mi_rkey.c
+++ b/storage/myisam/mi_rkey.c
@@ -119,7 +119,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
while ((info->lastpos >= info->state->data_file_length &&
(search_flag != HA_READ_KEY_EXACT ||
last_used_keyseg != keyinfo->seg + keyinfo->keysegs)) ||
- (res= mi_check_index_tuple(info, inx, buf)) == CHECK_NEG)
+ (res= mi_check_index_tuple(info, inx, buf)) == CHECK_NEG)
{
uint not_used[2];
/*
diff --git a/storage/myisam/mi_scan.c b/storage/myisam/mi_scan.c
index 8d436c4eada..24aca8e8751 100644
--- a/storage/myisam/mi_scan.c
+++ b/storage/myisam/mi_scan.c
@@ -39,8 +39,10 @@ int mi_scan_init(register MI_INFO *info)
int mi_scan(MI_INFO *info, uchar *buf)
{
+ int tmp;
DBUG_ENTER("mi_scan");
/* Init all but update-flag */
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
- DBUG_RETURN ((*info->s->read_rnd)(info,buf,info->nextpos,1));
+ tmp= (*info->s->read_rnd)(info,buf,info->nextpos,1);
+ DBUG_RETURN(tmp);
}
diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c
index a5777527e54..17d2eef898a 100644
--- a/storage/myisam/myisamchk.c
+++ b/storage/myisam/myisamchk.c
@@ -15,12 +15,14 @@
/* Describe, check and repair of MyISAM tables */
+#define VER "2.7"
#include "fulltext.h"
#include "my_default.h"
#include <m_ctype.h>
#include <stdarg.h>
#include <my_getopt.h>
#include <my_bit.h>
+#include <welcome_copyright_notice.h>
static uint decode_bits;
static char **default_argv;
@@ -53,7 +55,6 @@ static const char *field_pack[]=
static const char *myisam_stats_method_str="nulls_unequal";
static void get_options(int *argc,char * * *argv);
-static void print_version(void);
static void usage(void);
static int myisamchk(HA_CHECK *param, char *filename);
static void descript(HA_CHECK *param, register MI_INFO *info, char * name);
@@ -331,13 +332,6 @@ static struct my_option my_long_options[] =
};
-static void print_version(void)
-{
- printf("%s Ver 2.7 for %s at %s\n", my_progname, SYSTEM_TYPE,
- MACHINE_TYPE);
-}
-
-
static void usage(void)
{
print_version();
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index c90d989c975..5ede6a6159c 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -304,10 +304,10 @@ struct st_myisam_info
/* If info->buff has to be reread for rnext */
my_bool buff_used;
my_bool create_unique_index_by_sort;
+ my_bool has_cond_pushdown;
index_cond_func_t index_cond_func; /* Index condition function */
void *index_cond_func_arg; /* parameter for the func */
rowid_filter_func_t rowid_filter_func; /* rowid filter check function */
- rowid_filter_is_active_func_t rowid_filter_is_active_func; /* is activefunction */
void *rowid_filter_func_arg; /* parameter for the func */
THR_LOCK_DATA lock;
uchar *rtree_recursion_state; /* For RTREE */
@@ -742,7 +742,15 @@ my_bool mi_dynmap_file(MI_INFO *info, my_off_t size);
int mi_munmap_file(MI_INFO *info);
void mi_remap_file(MI_INFO *info, my_off_t size);
-check_result_t mi_check_index_tuple(MI_INFO *info, uint keynr, uchar *record);
+check_result_t mi_check_index_tuple_real(MI_INFO *info, uint keynr,
+ uchar *record);
+static inline check_result_t mi_check_index_tuple(MI_INFO *info, uint keynr,
+ uchar *record)
+{
+ if (!info->has_cond_pushdown && ! info->rowid_filter_func)
+ return CHECK_POS;
+ return mi_check_index_tuple_real(info, keynr, record);
+}
/* Functions needed by mi_check */
int killed_ptr(HA_CHECK *param);
@@ -754,7 +762,6 @@ extern void mi_set_index_cond_func(MI_INFO *info, index_cond_func_t check_func,
void *func_arg);
extern void mi_set_rowid_filter_func(MI_INFO *info,
rowid_filter_func_t check_func,
- rowid_filter_is_active_func_t is_active_func,
void *func_arg);
int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file,
ulonglong *dirty_part_map);
diff --git a/storage/myisam/myisamlog.c b/storage/myisam/myisamlog.c
index 40d473dc532..4e51af4ac3d 100644
--- a/storage/myisam/myisamlog.c
+++ b/storage/myisam/myisamlog.c
@@ -20,12 +20,14 @@
#define USE_MY_FUNC
#endif
+#define VER "1.4"
#include "myisamdef.h"
#include <my_tree.h>
#include <stdarg.h>
#ifdef HAVE_GETRUSAGE
#include <sys/resource.h>
#endif
+#include <welcome_copyright_notice.h>
#define FILENAME(A) (A ? A->show_name : "Unknown")
@@ -249,8 +251,7 @@ static void get_options(register int *argc, register char ***argv)
/* Fall through */
case 'I':
case '?':
- printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
- MACHINE_TYPE);
+ print_version();
puts("By Monty, for your professional use\n");
if (version)
break;
diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c
index d6cd9334a55..709530d915c 100644
--- a/storage/myisam/myisampack.c
+++ b/storage/myisam/myisampack.c
@@ -20,6 +20,7 @@
#define USE_MY_FUNC /* We need at least my_malloc */
#endif
+#define VER "1.23"
#include "myisamdef.h"
#include "my_default.h"
#include <queues.h>
@@ -30,6 +31,7 @@
#endif
#include <my_getopt.h>
#include <assert.h>
+#include <welcome_copyright_notice.h>
#if SIZEOF_LONG_LONG > 4
#define BITS_SAVED 64
@@ -289,13 +291,6 @@ static struct my_option my_long_options[] =
};
-static void print_version(void)
-{
- printf("%s Ver 1.23 for %s on %s\n",
- my_progname, SYSTEM_TYPE, MACHINE_TYPE);
-}
-
-
static void usage(void)
{
print_version();
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index d37636abab7..ee5d44b5d26 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -339,6 +339,33 @@ static void myrg_set_external_ref(MYRG_INFO *m_info, void *ext_ref_arg)
}
}
+IO_AND_CPU_COST ha_myisammrg::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost= handler::rnd_pos_time(rows);
+ /*
+ Row data is notcached. costs.row_lookup_cost includes the cost of
+ the reading the row from system (probably cached by the OS).
+ */
+ cost.io= 0;
+ return cost;
+}
+
+IO_AND_CPU_COST ha_myisammrg::keyread_time(uint index, ulong ranges,
+ ha_rows rows,
+ ulonglong blocks)
+{
+ IO_AND_CPU_COST cost= handler::keyread_time(index, ranges, rows, blocks);
+ if (!blocks)
+ {
+ cost.io*= file->tables;
+ cost.cpu*= file->tables;
+ }
+ /* Add the cost of having to do a key lookup in all trees */
+ if (file->tables)
+ cost.cpu+= (file->tables-1) * (ranges * KEY_LOOKUP_COST);
+ return cost;
+}
+
/**
Open a MERGE parent table, but not its children.
@@ -1744,6 +1771,12 @@ int myisammrg_panic(handlerton *hton, ha_panic_function flag)
return myrg_panic(flag);
}
+static void myisammrg_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ myisam_update_optimizer_costs(costs);
+}
+
+
static int myisammrg_init(void *p)
{
handlerton *myisammrg_hton;
@@ -1759,7 +1792,7 @@ static int myisammrg_init(void *p)
myisammrg_hton->panic= myisammrg_panic;
myisammrg_hton->flags= HTON_NO_PARTITION;
myisammrg_hton->tablefile_extensions= ha_myisammrg_exts;
-
+ myisammrg_hton->update_optimizer_costs= myisammrg_update_optimizer_costs;
return 0;
}
diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h
index 6da327ec84b..0435f7d6bd6 100644
--- a/storage/myisammrg/ha_myisammrg.h
+++ b/storage/myisammrg/ha_myisammrg.h
@@ -82,8 +82,8 @@ public:
ha_myisammrg(handlerton *hton, TABLE_SHARE *table_arg);
~ha_myisammrg();
- const char *index_type(uint key_number);
- ulonglong table_flags() const
+ const char *index_type(uint key_number) override;
+ ulonglong table_flags() const override
{
return (HA_REC_NOT_IN_SEQ | HA_AUTO_PART_KEY | HA_NO_TRANSACTIONS |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
@@ -93,70 +93,81 @@ public:
HA_NO_COPY_ON_ALTER |
HA_DUPLICATE_POS | HA_CAN_MULTISTEP_MERGE);
}
- ulong index_flags(uint inx, uint part, bool all_parts) const
+ ulong index_flags(uint inx, uint part, bool all_parts) const override
{
return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ?
0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
HA_READ_ORDER | HA_KEYREAD_ONLY);
}
- uint max_supported_keys() const { return MI_MAX_KEY; }
- uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; }
- uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; }
- double scan_time()
- { return ulonglong2double(stats.data_file_length) / IO_SIZE + file->tables; }
-
- int open(const char *name, int mode, uint test_if_locked);
- int add_children_list(void);
- int attach_children(void);
- int detach_children(void);
- virtual handler *clone(const char *name, MEM_ROOT *mem_root);
- int close(void);
- int write_row(const uchar * buf);
- int update_row(const uchar * old_data, const uchar * new_data);
- int delete_row(const uchar * buf);
+ uint max_supported_keys() const override { return MI_MAX_KEY; }
+ uint max_supported_key_length() const override { return HA_MAX_KEY_LENGTH; }
+ uint max_supported_key_part_length() const override
+ { return HA_MAX_KEY_LENGTH; }
+ IO_AND_CPU_COST scan_time() override
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= (ulonglong2double(stats.data_file_length) / IO_SIZE +
+ file->tables),
+ cost.cpu= records() * ROW_NEXT_FIND_COST;
+ return cost;
+ }
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks) override;
+ int open(const char *name, int mode, uint test_if_locked) override;
+ handler *clone(const char *name, MEM_ROOT *mem_root) override;
+ int close(void) override;
+ int write_row(const uchar * buf) override;
+ int update_row(const uchar * old_data, const uchar * new_data) override;
+ int delete_row(const uchar * buf) override;
int index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map,
- enum ha_rkey_function find_flag);
+ enum ha_rkey_function find_flag) override;
int index_read_idx_map(uchar *buf, uint index, const uchar *key,
key_part_map keypart_map,
- enum ha_rkey_function find_flag);
- int index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map);
- int index_next(uchar * buf);
- int index_prev(uchar * buf);
- int index_first(uchar * buf);
- int index_last(uchar * buf);
- int index_next_same(uchar *buf, const uchar *key, uint keylen);
- int rnd_init(bool scan);
- int rnd_next(uchar *buf);
- int rnd_pos(uchar * buf, uchar *pos);
- void position(const uchar *record);
+ enum ha_rkey_function find_flag) override;
+ int index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map) override;
+ int index_next(uchar * buf) override;
+ int index_prev(uchar * buf) override;
+ int index_first(uchar * buf) override;
+ int index_last(uchar * buf) override;
+ int index_next_same(uchar *buf, const uchar *key, uint keylen) override;
+ int rnd_init(bool scan) override;
+ int rnd_next(uchar *buf) override;
+ int rnd_pos(uchar * buf, uchar *pos) override;
+ void position(const uchar *record) override;
ha_rows records_in_range(uint inx, const key_range *start_key,
- const key_range *end_key, page_range *pages);
- int delete_all_rows();
- int info(uint);
- int reset(void);
- int extra(enum ha_extra_function operation);
- int extra_opt(enum ha_extra_function operation, ulong cache_size);
- int external_lock(THD *thd, int lock_type);
- uint lock_count(void) const;
- int create_mrg(const char *name, HA_CREATE_INFO *create_info);
- int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info);
+ const key_range *end_key, page_range *pages) override;
+ int delete_all_rows() override;
+ int info(uint) override;
+ int reset(void) override;
+ int extra(enum ha_extra_function operation) override;
+ int extra_opt(enum ha_extra_function operation, ulong cache_size) override;
+ int external_lock(THD *thd, int lock_type) override;
+ uint lock_count(void) const override;
+ int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info) override;
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
- enum thr_lock_type lock_type);
- void update_create_info(HA_CREATE_INFO *create_info);
- void append_create_info(String *packet);
- MYRG_INFO *myrg_info() { return file; }
- TABLE *table_ptr() { return table; }
+ enum thr_lock_type lock_type) override;
+ void update_create_info(HA_CREATE_INFO *create_info) override;
+ void append_create_info(String *packet) override;
enum_alter_inplace_result check_if_supported_inplace_alter(TABLE *,
- Alter_inplace_info *);
+ Alter_inplace_info *) override;
bool inplace_alter_table(TABLE *altered_table,
- Alter_inplace_info *ha_alter_info);
- int check(THD* thd, HA_CHECK_OPT* check_opt);
- ha_rows records();
- virtual uint count_query_cache_dependant_tables(uint8 *tables_type);
+ Alter_inplace_info *ha_alter_info) override;
+ int check(THD* thd, HA_CHECK_OPT* check_opt) override;
+ ha_rows records() override;
+ virtual uint count_query_cache_dependant_tables(uint8 *tables_type) override;
virtual my_bool
register_query_cache_dependant_tables(THD *thd,
Query_cache *cache,
Query_cache_block_table **block,
- uint *n);
- virtual void set_lock_type(enum thr_lock_type lock);
+ uint *n) override;
+ virtual void set_lock_type(enum thr_lock_type lock) override;
+
+ /* Internal interface functions, not part of the normal handler interface */
+ int add_children_list(void);
+ int attach_children(void);
+ int detach_children(void);
+ int create_mrg(const char *name, HA_CREATE_INFO *create_info);
+ MYRG_INFO *myrg_info() { return file; }
+ TABLE *table_ptr() { return table; }
};
diff --git a/storage/oqgraph/ha_oqgraph.h b/storage/oqgraph/ha_oqgraph.h
index c8e175df616..d1f5a898ad7 100644
--- a/storage/oqgraph/ha_oqgraph.h
+++ b/storage/oqgraph/ha_oqgraph.h
@@ -74,9 +74,10 @@ public:
const char **bas_ext() const;
uint max_supported_keys() const { return MAX_KEY; }
uint max_supported_key_part_length() const { return MAX_KEY_LENGTH; }
- double scan_time() { return (double) 1000000000; }
- double read_time(uint index, uint ranges, ha_rows rows)
- { return 1; }
+ IO_AND_CPU_COST scan_time()
+ { return { (double) 1000000000, (double) 1000000000 }; }
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
+ { return { (double) rows, (double) rows }; }
// Doesn't make sense to change the engine on a virtual table.
virtual bool can_switch_engines() { return false; }
diff --git a/storage/perfschema/ha_perfschema.h b/storage/perfschema/ha_perfschema.h
index eab97434265..20ed7448a1e 100644
--- a/storage/perfschema/ha_perfschema.h
+++ b/storage/perfschema/ha_perfschema.h
@@ -104,8 +104,10 @@ public:
ha_rows estimate_rows_upper_bound(void)
{ return HA_POS_ERROR; }
- double scan_time(void)
- { return 1.0; }
+ IO_AND_CPU_COST scan_time(void)
+ {
+ return {0.0, 1.0};
+ }
/**
Open a performance schema table.
diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt
index d3f7ca90889..544ae62e6e2 100644
--- a/storage/rocksdb/CMakeLists.txt
+++ b/storage/rocksdb/CMakeLists.txt
@@ -155,6 +155,8 @@ IF(NOT TARGET rocksdb)
RETURN()
ENDIF()
+INSTALL_MANPAGES(rocksdb-engine mariadb-ldb.1 myrocks_hotbackup.1)
+
CHECK_CXX_SOURCE_COMPILES("
#if defined(_MSC_VER) && !defined(__thread)
#define __thread __declspec(thread)
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 8067d6f6b93..86300f1cf71 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -5235,6 +5235,24 @@ static int rocksdb_check_version(handlerton *hton,
return (create_id == ver);
}
+
+/*
+ Setup costs factors for RocksDB to be able to approximate how many
+ ms different opperations takes. See cost functions in handler.h how
+ the different variables are used
+*/
+
+static void rocksdb_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /* See optimizer_costs.txt for how these are calculated */
+ costs->row_next_find_cost= 0.00015161;
+ costs->row_lookup_cost= 0.00150453;
+ costs->key_next_find_cost= 0.00025108;
+ costs->key_lookup_cost= 0.00079369;
+ costs->row_copy_cost= 0.00006087;
+}
+
+
/*
Storage Engine initialization function, invoked when plugin is loaded.
*/
@@ -5343,6 +5361,7 @@ static int rocksdb_init_func(void *const p) {
rocksdb_hton->savepoint_rollback = rocksdb_rollback_to_savepoint;
rocksdb_hton->savepoint_rollback_can_release_mdl =
rocksdb_rollback_to_savepoint_can_release_mdl;
+ rocksdb_hton->update_optimizer_costs= rocksdb_update_optimizer_costs;
#ifdef MARIAROCKS_NOT_YET
rocksdb_hton->update_table_stats = rocksdb_update_table_stats;
#endif // MARIAROCKS_NOT_YET
@@ -14631,17 +14650,25 @@ bool ha_rocksdb::use_read_free_rpl() const {
}
#endif // MARIAROCKS_NOT_YET
-double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) {
+IO_AND_CPU_COST ha_rocksdb::keyread_time(uint index, ulong ranges,
+ ha_rows rows,
+ ulonglong blocks) {
DBUG_ENTER_FUNC();
+ IO_AND_CPU_COST cost;
+ cost= handler::keyread_time(index, ranges, rows, blocks);
+ cost.io/= 4; // Assume 75% compression (75% less IO)
+ DBUG_RETURN(cost);
+}
- if (index != table->s->primary_key) {
- /* Non covering index range scan */
- DBUG_RETURN(handler::read_time(index, ranges, rows));
- }
- DBUG_RETURN((rows / 20.0) + 1);
+ulonglong ha_rocksdb::index_blocks(uint index, uint ranges, ha_rows rows)
+{
+ size_t len= table->key_storage_length(index);
+ ulonglong blocks= (rows * len / 4) / stats.block_size + ranges; // 75 % compression
+ return blocks * stats.block_size / IO_SIZE;
}
+
void ha_rocksdb::print_error(int error, myf errflag) {
if (error == HA_ERR_ROCKSDB_STATUS_BUSY) {
error = HA_ERR_LOCK_DEADLOCK;
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index f847ee25cb8..d03c183873e 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -623,15 +623,19 @@ public:
bool sorted) override
MY_ATTRIBUTE((__warn_unused_result__));
- virtual double scan_time() override {
+ IO_AND_CPU_COST scan_time() override
+ {
+ IO_AND_CPU_COST cost;
DBUG_ENTER_FUNC();
-
- DBUG_RETURN(
- static_cast<double>((stats.records + stats.deleted) / 20.0 + 10));
+ cost= handler::scan_time();
+ cost.cpu+= stats.deleted * ROW_NEXT_FIND_COST; // We have to skip over deleted rows
+ DBUG_RETURN(cost);
}
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges,
+ ha_rows rows, ulonglong blocks) override;
- virtual double read_time(uint, uint, ha_rows rows) override;
- virtual void print_error(int error, myf errflag) override;
+ ulonglong index_blocks(uint index, uint ranges, ha_rows rows) override;
+ void print_error(int error, myf errflag) override;
int open(const char *const name, int mode, uint test_if_locked) override
MY_ATTRIBUTE((__warn_unused_result__));
diff --git a/storage/rocksdb/mariadb-ldb.1 b/storage/rocksdb/mariadb-ldb.1
new file mode 100644
index 00000000000..e1c08bba995
--- /dev/null
+++ b/storage/rocksdb/mariadb-ldb.1
@@ -0,0 +1,16 @@
+'\" t
+.\"
+.TH "\FBMARIADB-LDB\FR" "1" "15 May 2020" "MariaDB 10\&.10" "MariaDB Database System"
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.SH NAME
+mariadb-ldb \- RocksDB tool (mysql_ldb is now a symlink to mariadb-ldb)
+.SH DESCRIPTION
+Use \fBmysql_ldb \-\-help\fR for details on usage\.
+.PP
+For more information, please refer to the MariaDB Knowledge Base, available online at https://mariadb.com/kb/
diff --git a/storage/rocksdb/myrocks_hotbackup.1 b/storage/rocksdb/myrocks_hotbackup.1
new file mode 100644
index 00000000000..4237c452f76
--- /dev/null
+++ b/storage/rocksdb/myrocks_hotbackup.1
@@ -0,0 +1,82 @@
+.TH MYROCKS_HOTBACKUP "1" "15 May 2020" "MariaDB 10\&.10" "MariaDB Database System"
+.SH NAME
+myrocks_hotbackup \- streaming backup for MariaDB MyRocks
+.SH DESCRIPTION
+Usage:
+.PP
+Backup: set \fB\-o\fR pipefail; myrocks_hotbackup \fB\-\-user\fR=\fI\,root\/\fR \fB\-\-password\fR=\fI\,pw\/\fR \fB\-\-port\fR=\fI\,3306\/\fR \fB\-\-checkpoint_dir=\fR<directory where temporary backup hard links are created> | ssh \fB\-o\fR NoneEnabled=yes remote_server 'tar \fB\-xi\fR \fB\-C\fR <directory on remote server where backups will be sent>' . You need to execute backup command on a server where you take backups.
+.PP
+Backup using WDT: myrocks_hotbackup \fB\-\-user\fR=\fI\,root\/\fR \fB\-\-password\fR=\fI\,pw\/\fR \fB\-\-stream\fR=\fI\,wdt\/\fR \fB\-\-checkpoint_dir=\fR<directory where temporary backup hard links are created> \fB\-\-destination=\fR<remote host name> \fB\-\-backup_dir=\fR<remote directory name>. This has to be executed at the src host.
+.PP
+Move\-Back: myrocks_hotbackup \fB\-\-move_back\fR \fB\-\-datadir=\fR<dest mysql datadir> \fB\-\-rocksdb_datadir=\fR<dest rocksdb datadir> \fB\-\-rocksdb_waldir=\fR<dest rocksdb wal dir> \fB\-\-backup_dir=\fR<where backup files are stored> . You need to execute move\-back command on a server where backup files are sent.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-i\fR CHECKPOINT_INTERVAL, \fB\-\-interval\fR=\fI\,CHECKPOINT_INTERVAL\/\fR
+Number of seconds to renew checkpoint
+.TP
+\fB\-c\fR CHECKPOINT_DIRECTORY, \fB\-\-checkpoint_dir\fR=\fI\,CHECKPOINT_DIRECTORY\/\fR
+Local directory name where checkpoints will be
+created.
+.TP
+\fB\-d\fR DATADIR, \fB\-\-datadir\fR=\fI\,DATADIR\/\fR
+backup mode: src MySQL datadir. move_back mode: dest
+MySQL datadir
+.TP
+\fB\-s\fR OUTPUT_STREAM, \fB\-\-stream\fR=\fI\,OUTPUT_STREAM\/\fR
+Setting streaming backup options. Currently tar, WDT
+and xbstream are supported. Default is tar
+.TP
+\fB\-\-destination\fR=\fI\,DESTINATION\/\fR
+Remote server name. Only used for WDT mode so far.
+.TP
+\fB\-\-avg_mbytes_per_sec\fR=\fI\,AVG_MBYTES_PER_SEC\/\fR
+Average backup rate in MBytes/sec. WDT only.
+.TP
+\fB\-\-extra_wdt_sender_options\fR=\fI\,EXTRA_WDT_SENDER_OPTIONS\/\fR
+Extra options for WDT sender
+.TP
+\fB\-\-extra_wdt_receiver_options\fR=\fI\,EXTRA_WDT_RECEIVER_OPTIONS\/\fR
+Extra options for WDT receiver
+.TP
+\fB\-u\fR MYSQL_USER, \fB\-\-user\fR=\fI\,MYSQL_USER\/\fR
+MySQL user name
+.TP
+\fB\-p\fR MYSQL_PASSWORD, \fB\-\-password\fR=\fI\,MYSQL_PASSWORD\/\fR
+MySQL password name
+.TP
+\fB\-P\fR MYSQL_PORT, \fB\-\-port\fR=\fI\,MYSQL_PORT\/\fR
+MySQL port number
+.TP
+\fB\-S\fR MYSQL_SOCKET, \fB\-\-socket\fR=\fI\,MYSQL_SOCKET\/\fR
+MySQL socket path. Takes precedence over \fB\-\-port\fR.
+.TP
+\fB\-m\fR, \fB\-\-move_back\fR
+Moving MyRocks backup files to proper locations.
+.TP
+\fB\-r\fR ROCKSDB_DATADIR, \fB\-\-rocksdb_datadir\fR=\fI\,ROCKSDB_DATADIR\/\fR
+RocksDB target data directory where backup data files
+will be moved. Must be empty.
+.TP
+\fB\-w\fR ROCKSDB_WALDIR, \fB\-\-rocksdb_waldir\fR=\fI\,ROCKSDB_WALDIR\/\fR
+RocksDB target data directory where backup wal files
+will be moved. Must be empty.
+.TP
+\fB\-b\fR BACKUPDIR, \fB\-\-backup_dir\fR=\fI\,BACKUPDIR\/\fR
+backup mode for WDT: Remote directory to store backup.
+move_back mode: Locations where backup files are
+stored.
+.TP
+\fB\-f\fR, \fB\-\-skip_check_frm_timestamp\fR
+skipping to check if frm files are updated after
+starting backup.
+.TP
+\fB\-D\fR DEBUG_SIGNAL_FILE, \fB\-\-debug_signal_file\fR=\fI\,DEBUG_SIGNAL_FILE\/\fR
+debugging purpose: waiting until the specified file is
+created
+.SH "SEE ALSO"
+For more information, please refer to the MariaDB Knowledge Base, available online at https://mariadb.com/kb/
+.SH AUTHOR
+MariaDB Foundation (http://www.mariadb.org/).
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc b/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc
index c76b52d4cc1..bf593ec9b0c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc
@@ -49,7 +49,7 @@ insert into t3 select a,a/10,a,a from t1;
explain
select * from t3 where kp1=3 and kp2 like '%foo%';
---replace_column 9 #
+--source include/explain-no-costs.inc
explain format=json
select * from t3 where kp1 between 2 and 4 and mod(kp1,3)=0 and kp2 like '%foo%';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result
index 6bd6cea97de..a14ffdec2e3 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result
@@ -92,7 +92,5 @@ disconnect con2;
disconnect con1;
disconnect con0;
SELECT * FROM t1 ORDER BY pk INTO OUTFILE <output_file>;
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
All pk values matched their expected values
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result
index c4a1c5f4668..1f4d1a641a2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result
@@ -20,8 +20,6 @@ END IF;
SET id1_cond = id1_cond + 1;
END WHILE;
END//
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
"Skipping bloom filter"
SET session rocksdb_skip_bloom_filter_on_read=1;
CALL select_test();
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result
index df4c8ee424c..d2974438ecb 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result
@@ -98,12 +98,8 @@ buffer_LRU_unzip_search_num_scan buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL N
buffer_LRU_unzip_search_scanned_per_call buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_member Page scanned per single LRU unzip search
buffer_page_read_index_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Leaf Pages read
buffer_page_read_index_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Non-leaf Pages read
-buffer_page_read_index_ibuf_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Index Leaf Pages read
-buffer_page_read_index_ibuf_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Index Non-Leaf Pages read
buffer_page_read_undo_log buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Undo Log Pages read
buffer_page_read_index_inode buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Inode Pages read
-buffer_page_read_ibuf_free_list buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Free List Pages read
-buffer_page_read_ibuf_bitmap buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Bitmap Pages read
buffer_page_read_system_page buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of System Pages read
buffer_page_read_trx_system buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Transaction System Pages read
buffer_page_read_fsp_hdr buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of File Space Header Pages read
@@ -114,12 +110,8 @@ buffer_page_read_zblob2 buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NU
buffer_page_read_other buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of other/unknown (old version of InnoDB) Pages read
buffer_page_written_index_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Leaf Pages written
buffer_page_written_index_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Non-leaf Pages written
-buffer_page_written_index_ibuf_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Index Leaf Pages written
-buffer_page_written_index_ibuf_non_leaf buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Index Non-Leaf Pages written
buffer_page_written_undo_log buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Undo Log Pages written
buffer_page_written_index_inode buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Index Inode Pages written
-buffer_page_written_ibuf_free_list buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Free List Pages written
-buffer_page_written_ibuf_bitmap buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Insert Buffer Bitmap Pages written
buffer_page_written_system_page buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of System Pages written
buffer_page_written_trx_system buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Transaction System Pages written
buffer_page_written_fsp_hdr buffer_page_io 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of File Space Header Pages written
@@ -187,14 +179,6 @@ adaptive_hash_rows_removed adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL
adaptive_hash_rows_deleted_no_hash_entry adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of rows deleted that did not have corresponding Adaptive Hash Index entries
adaptive_hash_rows_updated adaptive_hash_index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of Adaptive Hash Index rows updated
file_num_open_files file_system 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Number of files currently open (innodb_num_open_files)
-ibuf_merges_insert change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of inserted records merged by change buffering
-ibuf_merges_delete_mark change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of deleted records merged by change buffering
-ibuf_merges_delete change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of purge records merged by change buffering
-ibuf_merges_discard_insert change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of insert merged operations discarded
-ibuf_merges_discard_delete_mark change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of deleted merged operations discarded
-ibuf_merges_discard_delete change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of purge merged operations discarded
-ibuf_merges change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of change buffer merges
-ibuf_size change_buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Change buffer size in pages
innodb_master_thread_sleeps server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of times (seconds) master thread sleeps
innodb_activity_count server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Current server activity count
innodb_master_active_loops server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of times master thread performs its tasks when server is active
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
index 730e12d02f6..6645a33e356 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
@@ -39,8 +39,8 @@ a varchar(10) NOT NULL,
e int(11) DEFAULT 0,
KEY (a)
) ENGINE=ROCKSDB DEFAULT CHARSET=utf8;
-insert into t1 values (1,1,1),(2,2,2);
-explain select a from t1 where a <'zzz';
+insert into t1 values (1,"a",1),(2,"b",2),(3,"c",2);
+explain select a from t1 where a <'b';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 32 NULL # Using where
CREATE TABLE t2(
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result b/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result
index 6ea13872033..3a631d2925b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result
@@ -1,123 +1,63 @@
Warnings:
Note 1051 Unknown table 'test.ti_nk'
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
skip_merge_sort
true
DROP TABLE ti_nk;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
index 989d28e773d..0c9d29efa28 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
@@ -278,12 +278,12 @@ j
1
4
EXPLAIN
-SELECT * FROM t10, t11 WHERE i=j;
+SELECT * FROM t11 straight_join t10 WHERE i=j;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t10 index PRIMARY PRIMARY 4 NULL # Using index
-1 SIMPLE t11 eq_ref PRIMARY PRIMARY 4 test.t10.i # Using index
-SELECT * FROM t10, t11 WHERE i=j;
-i j
+1 SIMPLE t11 index PRIMARY PRIMARY 4 NULL # Using index
+1 SIMPLE t10 eq_ref PRIMARY PRIMARY 4 test.t11.j # Using index
+SELECT * FROM t11 straight_join t10 WHERE i=j;
+j i
1 1
DROP TABLE t10,t11;
#
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result
index f9e3129c73f..a4717570450 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result
@@ -47,6 +47,7 @@ EXPLAIN
{
"query_block": {
"select_id": 1,
+ "cost": "COST_REPLACED",
"nested_loop": [
{
"table": {
@@ -56,7 +57,9 @@ EXPLAIN
"key": "kp1",
"key_length": "5",
"used_key_parts": ["kp1"],
+ "loops": 1,
"rows": 1000,
+ "cost": "COST_REPLACED",
"filtered": 100,
"index_condition": "t3.kp1 between 2 and 4 and t3.kp1 MOD 3 = 0",
"attached_condition": "t3.kp2 like '%foo%'"
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result
index 3634f8c023e..07bce244792 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result
@@ -47,6 +47,7 @@ EXPLAIN
{
"query_block": {
"select_id": 1,
+ "cost": "COST_REPLACED",
"nested_loop": [
{
"table": {
@@ -56,7 +57,9 @@ EXPLAIN
"key": "kp1",
"key_length": "5",
"used_key_parts": ["kp1"],
+ "loops": 1,
"rows": 1000,
+ "cost": "COST_REPLACED",
"filtered": 100,
"index_condition": "t3.kp1 between 2 and 4 and t3.kp1 MOD 3 = 0",
"attached_condition": "t3.kp2 like '%foo%'"
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/select.result b/storage/rocksdb/mysql-test/rocksdb/r/select.result
index 7ea43adc9ea..fc3825d5377 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/select.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/select.result
@@ -115,8 +115,6 @@ SELECT t1.a, t2.b FROM t2, t1 WHERE t1.a = t2.a ORDER BY t2.b, t1.a
INTO OUTFILE '<DATADIR>/select.out'
CHARACTER SET utf8
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '''';
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
200,'bar'
200,'bar'
100,'foobar'
@@ -128,12 +126,8 @@ INTO DUMPFILE '<DATADIR>/select.dump';
ERROR 42000: Result consisted of more than one row
SELECT t1.*, t2.* FROM t1, t2 ORDER BY t2.b, t1.a, t2.a, t1.b, t1.pk, t2.pk LIMIT 1
INTO DUMPFILE '<DATADIR>/select.dump';
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
1z2200bar3
SELECT MIN(a), MAX(a) FROM t1 INTO @min, @max;
-Warnings:
-Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
SELECT @min, @max;
@min @max
1 200
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result
index 3c9c30bb617..39413ea5987 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result
@@ -45,7 +45,7 @@ t1 1 v16 1 v16 A 500 NULL NULL YES LSMTREE NO
INSERT INTO t1 (c,c20,v16,v128,pk) VALUES ('a','char1','varchar1a','varchar1b','1'),('a','char2','varchar2a','varchar2b','2'),('b','char3','varchar1a','varchar1b','3'),('c','char4','varchar3a','varchar3b','4');
EXPLAIN SELECT SUBSTRING(v16,0,3) FROM t1 WHERE v16 LIKE 'varchar%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range v16 v16 19 NULL # Using where; Using index
+1 SIMPLE t1 index v16 v16 19 NULL # Using where; Using index
SELECT SUBSTRING(v16,7,3) FROM t1 WHERE v16 LIKE 'varchar%';
SUBSTRING(v16,7,3)
r1a
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result
index bd40e32f94d..5e89648648d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result
@@ -62,7 +62,7 @@ INSERT INTO t1 (d,dt,ts,t,y,pk) VALUES
(DATE(@tm),@tm,TIMESTAMP(@tm),TIME(@tm),YEAR(@tm),'12:05:00');
EXPLAIN SELECT ts FROM t1 WHERE ts > NOW();
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range ts ts 5 NULL # Using where; Using index
+1 SIMPLE t1 index ts ts 5 NULL # Using where; Using index
SELECT ts FROM t1 WHERE ts > NOW();
ts
EXPLAIN SELECT ts FROM t1 USE INDEX () WHERE ts > NOW();
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result
index b0bcfd7075c..011fa0894ec 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result
@@ -49,7 +49,7 @@ t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE NO
t1 1 b 1 b A 500 NULL NULL YES LSMTREE NO
EXPLAIN SELECT DISTINCT b FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 index NULL b 2 NULL #
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using temporary
SELECT DISTINCT b FROM t1;
b
test1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result
index 89dc65e56f8..a98f90f28da 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result
@@ -114,7 +114,7 @@ INSERT INTO t1 (f,r,d,dp,pk) VALUES
(4644,1422.22,466664.999,0.5,5);
EXPLAIN SELECT DISTINCT d FROM t1 ORDER BY d;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range NULL d 9 NULL # Using index for group-by
+1 SIMPLE t1 index NULL d 9 NULL # Using index
SELECT DISTINCT d FROM t1 ORDER BY d;
d
-1
@@ -177,7 +177,7 @@ INSERT INTO t1 (f,r,d,dp,pk) VALUES
(1.2345,0,0,0,6);
EXPLAIN SELECT DISTINCT f FROM t1 ORDER BY f;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range NULL f 5 NULL # Using index for group-by
+1 SIMPLE t1 index NULL f 5 NULL # Using index
SELECT DISTINCT f FROM t1 ORDER BY f;
f
-1
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test
index 99d4e2d117c..da4ac350654 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test
@@ -37,9 +37,9 @@ CREATE TABLE t1(
e int(11) DEFAULT 0,
KEY (a)
) ENGINE=ROCKSDB DEFAULT CHARSET=utf8;
-insert into t1 values (1,1,1),(2,2,2);
+insert into t1 values (1,"a",1),(2,"b",2),(3,"c",2);
--replace_column 9 #
-explain select a from t1 where a <'zzz';
+explain select a from t1 where a <'b';
CREATE TABLE t2(
pk int,
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
index f7de167bd96..9b24ad952d7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
@@ -266,8 +266,8 @@ select * from t10;
select * from t11;
--replace_column 9 #
EXPLAIN
-SELECT * FROM t10, t11 WHERE i=j;
-SELECT * FROM t10, t11 WHERE i=j;
+SELECT * FROM t11 straight_join t10 WHERE i=j;
+SELECT * FROM t11 straight_join t10 WHERE i=j;
DROP TABLE t10,t11;
diff --git a/storage/rocksdb/tools/mysql_ldb.cc b/storage/rocksdb/tools/mysql_ldb.cc
index b1eec03f214..454b7a63c73 100644
--- a/storage/rocksdb/tools/mysql_ldb.cc
+++ b/storage/rocksdb/tools/mysql_ldb.cc
@@ -8,6 +8,7 @@
#include "rocksdb/ldb_tool.h"
int main(int argc, char **argv) {
+ MY_INIT(argv[0]);
rocksdb::Options db_options;
myrocks::Rdb_pk_comparator pk_comparator;
db_options.comparator = &pk_comparator;
diff --git a/storage/sequence/mysql-test/sequence/group_by.result b/storage/sequence/mysql-test/sequence/group_by.result
index bcda2ba5c76..7c098de9afd 100644
--- a/storage/sequence/mysql-test/sequence/group_by.result
+++ b/storage/sequence/mysql-test/sequence/group_by.result
@@ -86,7 +86,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index NULL PRIMARY 8 NULL 8 Using index; Using join buffer (flat, BNL join)
explain select count(*) from seq_1_to_15_step_2 where seq > 0;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE seq_1_to_15_step_2 index PRIMARY PRIMARY 8 NULL 8 Using where; Using index
+1 SIMPLE seq_1_to_15_step_2 range PRIMARY PRIMARY 8 NULL 8 Using where; Using index
explain select count(*) from seq_1_to_15_step_2 group by mod(seq,2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index; Using temporary; Using filesort
diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc
index b2bce9325ac..6f66e122ed9 100644
--- a/storage/sequence/sequence.cc
+++ b/storage/sequence/sequence.cc
@@ -64,45 +64,53 @@ public:
Sequence_share *seqs;
ha_seq(handlerton *hton, TABLE_SHARE *table_arg)
: handler(hton, table_arg), seqs(0) { }
- ulonglong table_flags() const
+ ulonglong table_flags() const override
{ return HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE; }
/* open/close/locking */
int create(const char *name, TABLE *table_arg,
- HA_CREATE_INFO *create_info)
+ HA_CREATE_INFO *create_info) override
{ return HA_ERR_WRONG_COMMAND; }
- int open(const char *name, int mode, uint test_if_locked);
- int close(void);
- int delete_table(const char *name)
+ int open(const char *name, int mode, uint test_if_locked) override;
+ int close(void) override;
+ int delete_table(const char *name) override
{
return 0;
}
- THR_LOCK_DATA **store_lock(THD *, THR_LOCK_DATA **, enum thr_lock_type);
+ THR_LOCK_DATA **store_lock(THD *, THR_LOCK_DATA **, enum thr_lock_type)
+ override;
/* table scan */
- int rnd_init(bool scan);
- int rnd_next(unsigned char *buf);
- void position(const uchar *record);
- int rnd_pos(uchar *buf, uchar *pos);
- int info(uint flag);
-
+ int rnd_init(bool scan) override;
+ int rnd_next(unsigned char *buf) override;
+ void position(const uchar *record) override;
+ int rnd_pos(uchar *buf, uchar *pos) override;
+ int info(uint flag) override;
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks) override
+ {
+ /* Avoids assert in total_cost() and makes DBUG_PRINT more consistent */
+ return {0,0};
+ }
+ IO_AND_CPU_COST scan_time() override
+ {
+ /* Avoids assert in total_cost() and makes DBUG_PRINT more consistent */
+ return {0, 0};
+ }
/* indexes */
- ulong index_flags(uint inx, uint part, bool all_parts) const
+ ulong index_flags(uint inx, uint part, bool all_parts) const override
{ return HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER |
HA_READ_RANGE | HA_KEYREAD_ONLY; }
- uint max_supported_keys() const { return 1; }
+ uint max_supported_keys() const override { return 1; }
int index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map,
- enum ha_rkey_function find_flag);
- int index_next(uchar *buf);
- int index_prev(uchar *buf);
- int index_first(uchar *buf);
- int index_last(uchar *buf);
+ enum ha_rkey_function find_flag) override;
+ int index_next(uchar *buf) override;
+ int index_prev(uchar *buf) override;
+ int index_first(uchar *buf) override;
+ int index_last(uchar *buf) override;
ha_rows records_in_range(uint inx, const key_range *start_key,
- const key_range *end_key, page_range *pages);
- double scan_time() { return (double)nvalues(); }
- double read_time(uint index, uint ranges, ha_rows rows) { return (double)rows; }
- double keyread_time(uint index, uint ranges, ha_rows rows) { return (double)rows; }
+ const key_range *end_key, page_range *pages) override;
private:
void set(uchar *buf);
@@ -492,6 +500,17 @@ int ha_seq_group_by_handler::next_row()
DBUG_RETURN(0);
}
+static void sequence_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ costs->disk_read_cost= 0;
+ costs->disk_read_ratio= 0.0; // No disk
+ costs->key_next_find_cost=
+ costs->key_lookup_cost=
+ costs->key_copy_cost=
+ costs->row_next_find_cost=
+ costs->row_lookup_cost=
+ costs->row_copy_cost= 0.0000062391530550;
+}
/*****************************************************************************
Initialize the interface between the sequence engine and MariaDB
@@ -518,6 +537,7 @@ static int init(void *p)
hton->savepoint_set= hton->savepoint_rollback= hton->savepoint_release=
dummy_savepoint;
hton->create_group_by= create_group_by_handler;
+ hton->update_optimizer_costs= sequence_update_optimizer_costs;
return 0;
}
diff --git a/storage/sphinx/ha_sphinx.h b/storage/sphinx/ha_sphinx.h
index f03e9d8c797..f5651fc6eb5 100644
--- a/storage/sphinx/ha_sphinx.h
+++ b/storage/sphinx/ha_sphinx.h
@@ -72,14 +72,28 @@ public:
uint max_supported_key_length () const { return MAX_KEY_LENGTH; }
uint max_supported_key_part_length () const { return MAX_KEY_LENGTH; }
- #if MYSQL_VERSION_ID>50100
- virtual double scan_time () { return (double)( stats.records+stats.deleted )/20.0 + 10; } ///< called in test_quick_select to determine if indexes should be used
- #else
- virtual double scan_time () { return (double)( records+deleted )/20.0 + 10; } ///< called in test_quick_select to determine if indexes should be used
- #endif
-
- virtual double read_time(uint index, uint ranges, ha_rows rows)
- { return ranges + (double)rows/20.0 + 1; } ///< index read time estimate
+ IO_AND_CPU_COST scan_time ()
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= 0;
+ cost.cpu= (double) (stats.records+stats.deleted) * DISK_READ_COST;
+ return cost;
+ }
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= ranges;
+ cost.cpu= 0;
+ return cost;
+ }
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= 0;
+ cost.cpu= 0;
+ return cost;
+ }
public:
int open ( const char * name, int mode, uint test_if_locked );
diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc
index eb691e52b0a..5cf67a091db 100644
--- a/storage/spider/ha_spider.cc
+++ b/storage/spider/ha_spider.cc
@@ -238,7 +238,6 @@ int ha_spider::open(
DBUG_PRINT("info",("spider this=%p", this));
dup_key_idx = (uint) -1;
- conn_kinds = SPIDER_CONN_KIND_MYSQL;
table->file->get_no_parts("", &part_num);
if (part_num)
{
@@ -590,22 +589,7 @@ int ha_spider::check_access_kind_for_connection(
int error_num, roop_count;
DBUG_ENTER("ha_spider::check_access_kind_for_connection");
DBUG_PRINT("info",("spider this=%p", this));
- conn_kinds = 0;
- switch (wide_handler->sql_command)
- {
- case SQLCOM_UPDATE:
- case SQLCOM_UPDATE_MULTI:
- case SQLCOM_DELETE:
- case SQLCOM_DELETE_MULTI:
- default:
- conn_kinds |= SPIDER_CONN_KIND_MYSQL;
- for (roop_count = 0; roop_count < (int) share->link_count; roop_count++)
- {
- conn_kind[roop_count] = SPIDER_CONN_KIND_MYSQL;
- }
- break;
- }
- if ((error_num = spider_check_trx_and_get_conn(thd, this, TRUE)))
+ if ((error_num= spider_check_trx_and_get_conn(thd, this)))
{
DBUG_RETURN(error_num);
}
@@ -1035,8 +1019,6 @@ int ha_spider::reset()
for (roop_count = share->link_count - 1; roop_count >= 0; roop_count--)
{
result_list.update_sqls[roop_count].length(0);
-
- conn_kind[roop_count] = SPIDER_CONN_KIND_MYSQL;
}
result_list.bulk_update_mode = 0;
result_list.bulk_update_size = 0;
@@ -1062,7 +1044,6 @@ int ha_spider::reset()
result_list.use_union = FALSE;
result_list.use_both_key = FALSE;
pt_clone_last_searcher = NULL;
- conn_kinds = SPIDER_CONN_KIND_MYSQL;
use_index_merge = FALSE;
init_rnd_handler = FALSE;
if (multi_range_keys)
@@ -3170,6 +3151,7 @@ ha_rows ha_spider::multi_range_read_info_const(
uint n_ranges,
uint *bufsz,
uint *flags,
+ ha_rows limit,
Cost_estimate *cost
)
{
@@ -3209,6 +3191,7 @@ ha_rows ha_spider::multi_range_read_info_const(
n_ranges,
bufsz,
flags,
+ limit,
cost
);
*flags &= ~HA_MRR_USE_DEFAULT_IMPL;
@@ -6680,8 +6663,7 @@ int ha_spider::info(
pthread_mutex_lock(&share->sts_mutex);
if (difftime(tmp_time, share->sts_get_time) >= sts_interval)
{
- if ((error_num = spider_check_trx_and_get_conn(ha_thd(), this,
- FALSE)))
+ if ((error_num= spider_check_trx_and_get_conn(ha_thd(), this)))
{
pthread_mutex_unlock(&share->sts_mutex);
if (!share->sts_init)
@@ -7266,7 +7248,7 @@ int ha_spider::check_crd()
}
if (crd_mode == 3)
crd_mode = 1;
- if ((error_num = spider_check_trx_and_get_conn(ha_thd(), this, FALSE)))
+ if ((error_num= spider_check_trx_and_get_conn(ha_thd(), this)))
{
DBUG_RETURN(check_error_mode(error_num));
}
@@ -8480,7 +8462,7 @@ int ha_spider::truncate()
DBUG_RETURN(ER_SPIDER_READ_ONLY_NUM);
}
wide_handler->sql_command = SQLCOM_TRUNCATE;
- if ((error_num = spider_check_trx_and_get_conn(thd, this, FALSE)))
+ if ((error_num= spider_check_trx_and_get_conn(thd, this)))
{
DBUG_RETURN(error_num);
}
@@ -8504,38 +8486,47 @@ int ha_spider::truncate()
DBUG_RETURN(0);
}
-
-double ha_spider::scan_time()
+IO_AND_CPU_COST ha_spider::scan_time()
{
+ IO_AND_CPU_COST cost;
DBUG_ENTER("ha_spider::scan_time");
DBUG_PRINT("info",("spider this=%p", this));
- DBUG_PRINT("info",("spider scan_time = %.6f",
- share->scan_rate * share->stat.records * share->stat.mean_rec_length + 2));
- DBUG_RETURN(share->scan_rate * share->stat.records *
- share->stat.mean_rec_length + 2);
+ cost.io=0;
+ cost.cpu= (DISK_READ_COST * share->stat.records * share->stat.mean_rec_length);
+ DBUG_PRINT("info",("spider scan_time = %.6f", cost.cpu));
+ DBUG_RETURN(cost);
}
-double ha_spider::read_time(
- uint index,
- uint ranges,
- ha_rows rows
-) {
- DBUG_ENTER("ha_spider::read_time");
+IO_AND_CPU_COST ha_spider::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost= { 0.0, 0.0}; // Row is in memory
+ return cost;
+}
+
+IO_AND_CPU_COST ha_spider::keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+{
+ IO_AND_CPU_COST cost;
+ DBUG_ENTER("ha_spider::keyread_time");
DBUG_PRINT("info",("spider this=%p", this));
+
+ /*
+ Here we only calculate transfer costs. The normal handler cost functions
+ will add costs for accessing a row/key.
+ */
if (wide_handler->keyread)
{
- DBUG_PRINT("info",("spider read_time(keyread) = %.6f",
- share->read_rate * table->key_info[index].key_length *
- rows / 2 + 2));
- DBUG_RETURN(share->read_rate * table->key_info[index].key_length *
- rows / 2 + 2);
+ cost.io= 0;
+ cost.cpu= DISK_READ_COST * rows * table->key_info[index].key_length;
} else {
- DBUG_PRINT("info",("spider read_time = %.6f",
- share->read_rate * share->stat.mean_rec_length * rows + 2));
- DBUG_RETURN(share->read_rate * share->stat.mean_rec_length * rows + 2);
+ cost.io= 0;
+ cost.cpu= DISK_READ_COST * rows * share->stat.mean_rec_length;
}
+ DBUG_PRINT("info",("spider scan_time(keyread) = %.6f", cost.cpu));
+ DBUG_RETURN(cost);
}
+
const key_map *ha_spider::keys_to_use_for_scanning()
{
DBUG_ENTER("ha_spider::keys_to_use_for_scanning");
@@ -12066,8 +12057,7 @@ int ha_spider::append_lock_tables_list()
DBUG_PRINT("info",("spider lock_table_type=%u",
wide_handler->lock_table_type));
- if ((error_num = spider_check_trx_and_get_conn(wide_handler->trx->thd, this,
- FALSE)))
+ if ((error_num= spider_check_trx_and_get_conn(wide_handler->trx->thd, this)))
{
DBUG_RETURN(error_num);
}
diff --git a/storage/spider/ha_spider.h b/storage/spider/ha_spider.h
index 4dffdf78553..ac865e78f2c 100644
--- a/storage/spider/ha_spider.h
+++ b/storage/spider/ha_spider.h
@@ -60,8 +60,6 @@ public:
const char *mem_calc_file_name;
ulong mem_calc_line_no;
ulonglong *connection_ids;
- uint conn_kinds;
- uint *conn_kind;
char *conn_keys_first_ptr;
char **conn_keys;
SPIDER_CONN **conns;
@@ -252,6 +250,7 @@ public:
uint n_ranges,
uint *bufsz,
uint *flags,
+ ha_rows limit,
Cost_estimate *cost
);
ha_rows multi_range_read_info(
@@ -445,12 +444,10 @@ public:
);
int delete_all_rows();
int truncate();
- double scan_time();
- double read_time(
- uint index,
- uint ranges,
- ha_rows rows
- );
+ IO_AND_CPU_COST scan_time();
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows);
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks);
const key_map *keys_to_use_for_scanning();
ha_rows estimate_rows_upper_bound();
void print_error(
diff --git a/storage/spider/mysql-test/spider/bg/r/spider_fixes.result b/storage/spider/mysql-test/spider/bg/r/spider_fixes.result
index a6a7588b014..2f54ef93a13 100644
--- a/storage/spider/mysql-test/spider/bg/r/spider_fixes.result
+++ b/storage/spider/mysql-test/spider/bg/r/spider_fixes.result
@@ -481,7 +481,6 @@ DELETE FROM t1;
Warnings:
Error 12702 Remote table 'auto_test_remote.ter1_1' is not found
Error 12702 Remote table 'auto_test_remote.ter1_1' is not found
-Error 1146 Table 'auto_test_remote.ter1_1' doesn't exist
TRUNCATE t1;
Warnings:
Error 1146 Table 'auto_test_remote.ter1_1' doesn't exist
diff --git a/storage/spider/mysql-test/spider/bugfix/include/sql_mode_init.inc b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_init.inc
index 09ab2934aea..337979a4f3d 100644
--- a/storage/spider/mysql-test/spider/bugfix/include/sql_mode_init.inc
+++ b/storage/spider/mysql-test/spider/bugfix/include/sql_mode_init.inc
@@ -5,21 +5,7 @@
--enable_result_log
--enable_query_log
--enable_warnings
---let $SQL_MODES= real_as_float,pipes_as_concat,ansi_quotes,ignore_space,ignore_bad_table_options,only_full_group_by,no_unsigned_subtraction,no_dir_in_create,postgresql,oracle,mssql,db2,maxdb,no_key_options,no_table_options,no_field_options,mysql323,mysql40,ansi,no_auto_value_on_zero,no_backslash_escapes,strict_trans_tables,strict_all_tables,no_zero_in_date,no_zero_date,allow_invalid_dates,error_for_division_by_zero,traditional,no_auto_create_user,high_not_precedence,no_engine_substitution,pad_char_to_full_length
-if (`SELECT IF(STRCMP('$SERVER_NAME', 'MariaDB') = 0, 1, 0)`)
-{
- if (`SELECT IF($SERVER_MAJOR_VERSION = 10, 1, 0)`)
- {
- if (`SELECT IF($SERVER_MINOR_VERSION >= 3, 1, 0)`)
- {
- --let $SQL_MODES= $SQL_MODES,empty_string_is_null,simultaneous_assignment
- }
- if (`SELECT IF($SERVER_MINOR_VERSION >= 4, 1, 0)`)
- {
- --let $SQL_MODES= $SQL_MODES,time_round_fractional
- }
- }
-}
+--let $SQL_MODES= real_as_float,pipes_as_concat,ansi_quotes,ignore_space,ignore_bad_table_options,only_full_group_by,no_unsigned_subtraction,no_dir_in_create,postgresql,oracle,mssql,db2,maxdb,no_key_options,no_table_options,no_field_options,mysql323,mysql40,ansi,no_auto_value_on_zero,no_backslash_escapes,strict_trans_tables,strict_all_tables,no_zero_in_date,no_zero_date,allow_invalid_dates,error_for_division_by_zero,traditional,no_auto_create_user,high_not_precedence,no_engine_substitution,pad_char_to_full_length,empty_string_is_null,simultaneous_assignment,time_round_fractional
--connection master_1
set @old_sql_mode= @@sql_mode;
eval set session sql_mode= '$SQL_MODES';
diff --git a/storage/spider/mysql-test/spider/bugfix/r/quick_mode_1.result b/storage/spider/mysql-test/spider/bugfix/r/quick_mode_1.result
index 89a07bf64e6..62e1b2e64b2 100644
--- a/storage/spider/mysql-test/spider/bugfix/r/quick_mode_1.result
+++ b/storage/spider/mysql-test/spider/bugfix/r/quick_mode_1.result
@@ -57,6 +57,10 @@ TRUNCATE TABLE mysql.general_log;
connection child2_2;
TRUNCATE TABLE mysql.general_log;
connection master_1;
+explain SELECT a.pkey FROM tbl_a a, tbl_b b WHERE a.pkey = b.pkey;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE a index PRIMARY PRIMARY 4 NULL 2 Using index
+1 SIMPLE b eq_ref PRIMARY PRIMARY 4 auto_test_local.a.pkey 1 Using index
SELECT a.pkey FROM tbl_a a, tbl_b b WHERE a.pkey = b.pkey;
pkey
0
diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_22246.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_22246.test
index 9e58bc1a836..be993647bb9 100644
--- a/storage/spider/mysql-test/spider/bugfix/t/mdev_22246.test
+++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_22246.test
@@ -64,6 +64,7 @@ TRUNCATE TABLE mysql.general_log;
--connection master_1
SELECT * FROM tbl_a;
+--sorted_result
SELECT * FROM tbl_a WHERE id <0 || id >0;
--connection child2_1
diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test
index 60c0ad42921..02a4b803a89 100644
--- a/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test
+++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test
@@ -2,6 +2,10 @@
--echo # MDEV-27172 Prefix indices on Spider tables may lead to wrong query results
--echo #
+# Disable test for ps-protocol as the general log has different number of
+# commands for --ps
+--source include/no_protocol.inc
+
--disable_query_log
--disable_result_log
--source ../../t/test_init.inc
diff --git a/storage/spider/mysql-test/spider/bugfix/t/quick_mode_1.test b/storage/spider/mysql-test/spider/bugfix/t/quick_mode_1.test
index 01fa0cb5128..c878a738c53 100644
--- a/storage/spider/mysql-test/spider/bugfix/t/quick_mode_1.test
+++ b/storage/spider/mysql-test/spider/bugfix/t/quick_mode_1.test
@@ -74,6 +74,7 @@ TRUNCATE TABLE mysql.general_log;
TRUNCATE TABLE mysql.general_log;
--connection master_1
+explain SELECT a.pkey FROM tbl_a a, tbl_b b WHERE a.pkey = b.pkey;
SELECT a.pkey FROM tbl_a a, tbl_b b WHERE a.pkey = b.pkey;
--connection child2_1
diff --git a/storage/spider/mysql-test/spider/r/direct_left_join_nullable.result b/storage/spider/mysql-test/spider/r/direct_left_join_nullable.result
index 4adfb1bd76a..83ec42044a5 100644
--- a/storage/spider/mysql-test/spider/r/direct_left_join_nullable.result
+++ b/storage/spider/mysql-test/spider/r/direct_left_join_nullable.result
@@ -87,7 +87,7 @@ a b c a
connection child2_1;
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %';
argument
-select t0.`a` `a`,t2.`b` `b`,t2.`c` `c`,t3.`a` `a` from `auto_test_remote`.`ta_r_no_idx` t0 left join `auto_test_remote`.`ta_r_auto_inc` t1 on (t1.`a` = t0.`a`) left join `auto_test_remote`.`ta_r_3` t2 on (t2.`c` = t1.`c`) left join `auto_test_remote`.`ta_r` t3 on (t3.`b` = t2.`b`) where 1 order by t0.`a` desc
+select t0.`a` `a`,t2.`b` `b`,t2.`c` `c`,t3.`a` `a` from `auto_test_remote`.`ta_r_no_idx` t0 left join `auto_test_remote`.`ta_r_auto_inc` t1 on ((t1.`a` = t0.`a`) and (t0.`a` is not null)) left join `auto_test_remote`.`ta_r_3` t2 on (t2.`c` = t1.`c`) left join `auto_test_remote`.`ta_r` t3 on ((t3.`b` = t2.`b`) and (t2.`b` is not null)) where 1 order by t0.`a` desc
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %'
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_r ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
diff --git a/storage/spider/mysql-test/spider/r/direct_left_right_join_nullable.result b/storage/spider/mysql-test/spider/r/direct_left_right_join_nullable.result
index a6bd3a7c1a1..ff4f211faf5 100644
--- a/storage/spider/mysql-test/spider/r/direct_left_right_join_nullable.result
+++ b/storage/spider/mysql-test/spider/r/direct_left_right_join_nullable.result
@@ -87,7 +87,7 @@ NULL NULL NULL 3
connection child2_1;
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %';
argument
-select t0.`a` `a`,t2.`b` `b`,t2.`c` `c`,t3.`a` `a` from `auto_test_remote`.`ta_r_no_idx` t3 left join (`auto_test_remote`.`ta_r_auto_inc` t2 join `auto_test_remote`.`ta_r_3` t1 join `auto_test_remote`.`ta_r` t0) on ((t2.`b` = t3.`b`) and (t2.`c` = t1.`c`) and (t0.`a` = t1.`a`) and (t1.`a` is not null)) where 1 order by t3.`a` desc
+select t0.`a` `a`,t2.`b` `b`,t2.`c` `c`,t3.`a` `a` from `auto_test_remote`.`ta_r_no_idx` t3 left join (`auto_test_remote`.`ta_r_auto_inc` t2 join `auto_test_remote`.`ta_r_3` t1 join `auto_test_remote`.`ta_r` t0) on ((t2.`b` = t3.`b`) and (t1.`c` = t2.`c`) and (t0.`a` = t1.`a`) and (t3.`b` is not null) and (t1.`a` is not null)) where 1 order by t3.`a` desc
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %'
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_r ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
diff --git a/storage/spider/mysql-test/spider/r/direct_right_join_nullable.result b/storage/spider/mysql-test/spider/r/direct_right_join_nullable.result
index 5101ea5036a..02f985279f8 100644
--- a/storage/spider/mysql-test/spider/r/direct_right_join_nullable.result
+++ b/storage/spider/mysql-test/spider/r/direct_right_join_nullable.result
@@ -87,7 +87,7 @@ NULL c 2000-01-03 00:00:00 3
connection child2_1;
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %';
argument
-select t0.`a` `a`,t2.`b` `b`,t2.`c` `c`,t3.`a` `a` from `auto_test_remote`.`ta_r_no_idx` t3 left join `auto_test_remote`.`ta_r_auto_inc` t2 on (t2.`b` = t3.`b`) left join `auto_test_remote`.`ta_r_3` t1 on (t1.`c` = t2.`c`) left join `auto_test_remote`.`ta_r` t0 on ((t0.`a` = t1.`a`) and (t1.`a` is not null)) where 1 order by t3.`a` desc
+select t0.`a` `a`,t2.`b` `b`,t2.`c` `c`,t3.`a` `a` from `auto_test_remote`.`ta_r_no_idx` t3 left join `auto_test_remote`.`ta_r_auto_inc` t2 on ((t2.`b` = t3.`b`) and (t3.`b` is not null)) left join `auto_test_remote`.`ta_r_3` t1 on (t1.`c` = t2.`c`) left join `auto_test_remote`.`ta_r` t0 on ((t0.`a` = t1.`a`) and (t1.`a` is not null)) where 1 order by t3.`a` desc
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %'
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_r ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
diff --git a/storage/spider/mysql-test/spider/r/direct_right_left_right_join_nullable.result b/storage/spider/mysql-test/spider/r/direct_right_left_right_join_nullable.result
index f6c808be973..840328508fa 100644
--- a/storage/spider/mysql-test/spider/r/direct_right_left_right_join_nullable.result
+++ b/storage/spider/mysql-test/spider/r/direct_right_left_right_join_nullable.result
@@ -87,7 +87,7 @@ NULL c 2000-01-03 00:00:00 3
connection child2_1;
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %';
argument
-select t0.`a` `a`,t2.`b` `b`,t2.`c` `c`,t3.`a` `a` from `auto_test_remote`.`ta_r_no_idx` t3 left join (`auto_test_remote`.`ta_r_auto_inc` t2 join `auto_test_remote`.`ta_r_3` t1 left join `auto_test_remote`.`ta_r` t0 on ((t0.`a` = t1.`a`) and (t1.`a` is not null))) on ((t2.`b` = t3.`b`) and (t2.`c` = t1.`c`)) where 1 order by t3.`a` desc
+select t0.`a` `a`,t2.`b` `b`,t2.`c` `c`,t3.`a` `a` from `auto_test_remote`.`ta_r_no_idx` t3 left join (`auto_test_remote`.`ta_r_auto_inc` t2 join `auto_test_remote`.`ta_r_3` t1 left join `auto_test_remote`.`ta_r` t0 on ((t0.`a` = t1.`a`) and (t1.`a` is not null))) on ((t2.`b` = t3.`b`) and (t1.`c` = t2.`c`) and (t3.`b` is not null)) where 1 order by t3.`a` desc
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %'
SELECT a, b, date_format(c, '%Y-%m-%d %H:%i:%s') FROM ta_r ORDER BY a;
a b date_format(c, '%Y-%m-%d %H:%i:%s')
diff --git a/storage/spider/mysql-test/spider/r/partition_mrr.result b/storage/spider/mysql-test/spider/r/partition_mrr.result
index c1b7d6e6a4a..61878a15698 100644
--- a/storage/spider/mysql-test/spider/r/partition_mrr.result
+++ b/storage/spider/mysql-test/spider/r/partition_mrr.result
@@ -74,36 +74,36 @@ TRUNCATE TABLE mysql.general_log;
connection master_1;
SELECT a.pkey FROM tbl_a a, tbl_b b WHERE a.pkey = b.pkey;
pkey
-4
-5
+0
+1
10
11
+12
+13
+14
+15
16
17
+18
+19
+2
+20
+21
22
23
+24
+25
+26
+27
28
29
-0
-1
+3
+4
+5
6
7
-12
-13
-18
-19
-24
-25
-2
-3
8
9
-14
-15
-20
-21
-26
-27
SELECT a.pkey FROM tbl_a a, tbl_b b WHERE a.pkey+0 = b.pkey+0 ORDER BY a.pkey;
pkey
0
@@ -140,7 +140,9 @@ connection child2_1;
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %';
argument
select `pkey` from `auto_test_remote`.`tbl_a` order by `pkey`
-select a.id,b.`pkey` from auto_test_remote.tmp_spider_bka_xxxx a,`auto_test_remote`.`tbl_b` b where a.c0 <=> b.`pkey`
+select `pkey` from `auto_test_remote`.`tbl_b` order by `pkey`
+select `pkey` from `auto_test_remote`.`tbl_b` order by `pkey`
+select `pkey` from `auto_test_remote`.`tbl_b` order by `pkey`
select `pkey` from `auto_test_remote`.`tbl_a` order by `pkey`
select `pkey` from `auto_test_remote`.`tbl_b` order by `pkey`
select `pkey` from `auto_test_remote`.`tbl_b` order by `pkey`
@@ -174,7 +176,9 @@ connection child2_2;
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %';
argument
select `pkey` from `auto_test_remote2`.`tbl_a` order by `pkey`
-select a.id,b.`pkey` from auto_test_remote2.tmp_spider_bka_xxxx a,`auto_test_remote2`.`tbl_b` b where a.c0 <=> b.`pkey`
+select `pkey` from `auto_test_remote2`.`tbl_b` order by `pkey`
+select `pkey` from `auto_test_remote2`.`tbl_b` order by `pkey`
+select `pkey` from `auto_test_remote2`.`tbl_b` order by `pkey`
select `pkey` from `auto_test_remote2`.`tbl_a` order by `pkey`
select `pkey` from `auto_test_remote2`.`tbl_b` order by `pkey`
select `pkey` from `auto_test_remote2`.`tbl_b` order by `pkey`
@@ -208,7 +212,9 @@ connection child2_3;
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %';
argument
select `pkey` from `auto_test_remote3`.`tbl_a` order by `pkey`
-select a.id,b.`pkey` from auto_test_remote3.tmp_spider_bka_xxxx a,`auto_test_remote3`.`tbl_b` b where a.c0 <=> b.`pkey`
+select `pkey` from `auto_test_remote3`.`tbl_b` order by `pkey`
+select `pkey` from `auto_test_remote3`.`tbl_b` order by `pkey`
+select `pkey` from `auto_test_remote3`.`tbl_b` order by `pkey`
select `pkey` from `auto_test_remote3`.`tbl_a` order by `pkey`
select `pkey` from `auto_test_remote3`.`tbl_b` order by `pkey`
select `pkey` from `auto_test_remote3`.`tbl_b` order by `pkey`
diff --git a/storage/spider/mysql-test/spider/r/spider_fixes.result b/storage/spider/mysql-test/spider/r/spider_fixes.result
index 3b9d939393a..5e17e83618e 100644
--- a/storage/spider/mysql-test/spider/r/spider_fixes.result
+++ b/storage/spider/mysql-test/spider/r/spider_fixes.result
@@ -481,7 +481,6 @@ DELETE FROM t1;
Warnings:
Error 12702 Remote table 'auto_test_remote.ter1_1' is not found
Error 12702 Remote table 'auto_test_remote.ter1_1' is not found
-Error 1146 Table 'auto_test_remote.ter1_1' doesn't exist
TRUNCATE t1;
Warnings:
Error 1146 Table 'auto_test_remote.ter1_1' doesn't exist
diff --git a/storage/spider/mysql-test/spider/t/partition_mrr.test b/storage/spider/mysql-test/spider/t/partition_mrr.test
index 23f4fdb6e27..6c431401e18 100644
--- a/storage/spider/mysql-test/spider/t/partition_mrr.test
+++ b/storage/spider/mysql-test/spider/t/partition_mrr.test
@@ -168,6 +168,7 @@ if ($USE_CHILD_GROUP2)
}
}
--connection master_1
+--sorted_result
SELECT a.pkey FROM tbl_a a, tbl_b b WHERE a.pkey = b.pkey;
SELECT a.pkey FROM tbl_a a, tbl_b b WHERE a.pkey+0 = b.pkey+0 ORDER BY a.pkey; # MDEV-29947
if ($USE_CHILD_GROUP2)
diff --git a/storage/spider/spd_conn.cc b/storage/spider/spd_conn.cc
index 0f252012bd9..ca556702c65 100644
--- a/storage/spider/spd_conn.cc
+++ b/storage/spider/spd_conn.cc
@@ -108,7 +108,6 @@ uchar *spider_conn_get_key(
) {
DBUG_ENTER("spider_conn_get_key");
*length = conn->conn_key_length;
- DBUG_PRINT("info",("spider conn_kind=%u", conn->conn_kind));
#ifdef DBUG_TRACE
spider_print_keys(conn->conn_key, conn->conn_key_length);
#endif
@@ -382,7 +381,6 @@ SPIDER_CONN *spider_create_conn(
ha_spider *spider,
int link_idx,
int base_link_idx,
- uint conn_kind,
int *error_num
) {
int *need_mon;
@@ -602,7 +600,6 @@ SPIDER_CONN *spider_create_conn(
conn->semi_trx_isolation_chk = FALSE;
conn->semi_trx_chk = FALSE;
conn->link_idx = base_link_idx;
- conn->conn_kind = conn_kind;
conn->conn_need_mon = need_mon;
if (spider)
conn->need_mon = &spider->need_mons[base_link_idx];
@@ -689,13 +686,11 @@ SPIDER_CONN *spider_get_conn(
ha_spider *spider,
bool another,
bool thd_chg,
- uint conn_kind,
int *error_num
) {
SPIDER_CONN *conn = NULL;
int base_link_idx = link_idx;
DBUG_ENTER("spider_get_conn");
- DBUG_PRINT("info",("spider conn_kind=%u", conn_kind));
if (spider)
link_idx = spider->conn_link_idx[base_link_idx];
@@ -734,7 +729,8 @@ SPIDER_CONN *spider_get_conn(
pthread_mutex_unlock(&spider_conn_mutex);
if (spider_param_max_connections())
{ /* enable connection pool */
- conn = spider_get_conn_from_idle_connection(share, link_idx, conn_key, spider, conn_kind, base_link_idx, error_num);
+ conn= spider_get_conn_from_idle_connection(
+ share, link_idx, conn_key, spider, base_link_idx, error_num);
/* failed get conn, goto error */
if (!conn)
goto error;
@@ -743,8 +739,8 @@ SPIDER_CONN *spider_get_conn(
else
{ /* did not enable conncetion pool , create_conn */
DBUG_PRINT("info",("spider create new conn"));
- if (!(conn = spider_create_conn(share, spider, link_idx,
- base_link_idx, conn_kind, error_num)))
+ if (!(conn= spider_create_conn(share, spider, link_idx,
+ base_link_idx, error_num)))
goto error;
*conn->conn_key = *conn_key;
if (spider)
@@ -768,8 +764,8 @@ SPIDER_CONN *spider_get_conn(
} else {
DBUG_PRINT("info",("spider create new conn"));
/* conn_recycle_strict = 0 and conn_recycle_mode = 0 or 2 */
- if (!(conn = spider_create_conn(share, spider, link_idx, base_link_idx,
- conn_kind, error_num)))
+ if (!(conn= spider_create_conn(share, spider, link_idx, base_link_idx,
+ error_num)))
goto error;
*conn->conn_key = *conn_key;
if (spider)
@@ -892,13 +888,10 @@ int spider_check_and_get_casual_read_conn(
char first_byte_bak = *spider->conn_keys[link_idx];
*spider->conn_keys[link_idx] =
'0' + spider->result_list.casual_read[link_idx];
- if (
- !(spider->conns[link_idx] =
- spider_get_conn(spider->share, link_idx,
- spider->conn_keys[link_idx], spider->wide_handler->trx,
- spider, FALSE, TRUE, SPIDER_CONN_KIND_MYSQL,
- &error_num))
- ) {
+ if (!(spider->conns[link_idx]= spider_get_conn(
+ spider->share, link_idx, spider->conn_keys[link_idx],
+ spider->wide_handler->trx, spider, FALSE, TRUE, &error_num)))
+ {
*spider->conn_keys[link_idx] = first_byte_bak;
DBUG_RETURN(error_num);
}
@@ -3017,9 +3010,8 @@ void *spider_bg_sts_action(
if (!conns[spider.search_link_idx])
{
spider_get_conn(share, spider.search_link_idx,
- share->conn_keys[spider.search_link_idx],
- trx, &spider, FALSE, FALSE, SPIDER_CONN_KIND_MYSQL,
- &error_num);
+ share->conn_keys[spider.search_link_idx], trx,
+ &spider, FALSE, FALSE, &error_num);
conns[spider.search_link_idx]->error_mode = 0;
/*
if (
@@ -3342,9 +3334,8 @@ void *spider_bg_crd_action(
if (!conns[spider.search_link_idx])
{
spider_get_conn(share, spider.search_link_idx,
- share->conn_keys[spider.search_link_idx],
- trx, &spider, FALSE, FALSE, SPIDER_CONN_KIND_MYSQL,
- &error_num);
+ share->conn_keys[spider.search_link_idx], trx,
+ &spider, FALSE, FALSE, &error_num);
conns[spider.search_link_idx]->error_mode = 0;
/*
if (
@@ -3911,7 +3902,6 @@ SPIDER_CONN* spider_get_conn_from_idle_connection(
int link_idx,
char *conn_key,
ha_spider *spider,
- uint conn_kind,
int base_link_idx,
int *error_num
)
@@ -3999,7 +3989,8 @@ SPIDER_CONN* spider_get_conn_from_idle_connection(
if (ip_port_conn)
pthread_mutex_unlock(&ip_port_conn->mutex);
DBUG_PRINT("info",("spider create new conn"));
- if (!(conn = spider_create_conn(share, spider, link_idx, base_link_idx, conn_kind, error_num)))
+ if (!(conn= spider_create_conn(share, spider, link_idx, base_link_idx,
+ error_num)))
DBUG_RETURN(conn);
*conn->conn_key = *conn_key;
if (spider)
diff --git a/storage/spider/spd_conn.h b/storage/spider/spd_conn.h
index 807e1474ed2..1759f06baa6 100644
--- a/storage/spider/spd_conn.h
+++ b/storage/spider/spd_conn.h
@@ -84,7 +84,6 @@ SPIDER_CONN *spider_create_conn(
ha_spider *spider,
int link_id,
int base_link_id,
- uint conn_kind,
int *error_num
);
@@ -96,7 +95,6 @@ SPIDER_CONN *spider_get_conn(
ha_spider *spider,
bool another,
bool thd_chg,
- uint conn_kind,
int *error_num
);
@@ -397,7 +395,6 @@ SPIDER_CONN* spider_get_conn_from_idle_connection
int link_idx,
char *conn_key,
ha_spider *spider,
- uint conn_kind,
int base_link_idx,
int *error_num
);
diff --git a/storage/spider/spd_copy_tables.cc b/storage/spider/spd_copy_tables.cc
index eb2454b2f29..5e28b590309 100644
--- a/storage/spider/spd_copy_tables.cc
+++ b/storage/spider/spd_copy_tables.cc
@@ -593,11 +593,10 @@ int spider_udf_get_copy_tgt_conns(
while (table_conn)
{
share = table_conn->share;
- if (
- !(table_conn->conn = spider_get_conn(
- share, 0, share->conn_keys[0], trx, NULL, FALSE, FALSE,
- SPIDER_CONN_KIND_MYSQL, &error_num))
- ) {
+ if (!(table_conn->conn=
+ spider_get_conn(share, 0, share->conn_keys[0], trx, NULL,
+ FALSE, FALSE, &error_num)))
+ {
my_error(ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), share->server_names[0]);
DBUG_RETURN(ER_CONNECT_TO_FOREIGN_DATA_SOURCE);
}
diff --git a/storage/spider/spd_db_conn.cc b/storage/spider/spd_db_conn.cc
index b64aaab4d58..9c91d666c0a 100644
--- a/storage/spider/spd_db_conn.cc
+++ b/storage/spider/spd_db_conn.cc
@@ -80,7 +80,7 @@ int spider_db_connect(
THD* thd = current_thd;
longlong connect_retry_interval;
DBUG_ENTER("spider_db_connect");
- DBUG_ASSERT(conn->conn_kind != SPIDER_CONN_KIND_MYSQL || conn->need_mon);
+ DBUG_ASSERT(conn->need_mon);
DBUG_PRINT("info",("spider link_idx=%d", link_idx));
DBUG_PRINT("info",("spider conn=%p", conn));
@@ -240,7 +240,6 @@ void spider_db_disconnect(
) {
DBUG_ENTER("spider_db_disconnect");
DBUG_PRINT("info",("spider conn=%p", conn));
- DBUG_PRINT("info",("spider conn->conn_kind=%u", conn->conn_kind));
if (conn->db_conn->is_connected())
{
conn->db_conn->disconnect();
diff --git a/storage/spider/spd_db_include.h b/storage/spider/spd_db_include.h
index bbe27271e37..8b2ebb821df 100644
--- a/storage/spider/spd_db_include.h
+++ b/storage/spider/spd_db_include.h
@@ -168,8 +168,6 @@ typedef st_spider_result SPIDER_RESULT;
#define SPIDER_SQL_LOP_CHK_PRM_PRF_STR "spider_lc_"
#define SPIDER_SQL_LOP_CHK_PRM_PRF_LEN (sizeof(SPIDER_SQL_LOP_CHK_PRM_PRF_STR) - 1)
-#define SPIDER_CONN_KIND_MYSQL (1 << 0)
-
#define SPIDER_SQL_TYPE_SELECT_SQL (1 << 0)
#define SPIDER_SQL_TYPE_INSERT_SQL (1 << 1)
#define SPIDER_SQL_TYPE_UPDATE_SQL (1 << 2)
diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc
index 78236243bf2..21c2e6bb434 100644
--- a/storage/spider/spd_db_mysql.cc
+++ b/storage/spider/spd_db_mysql.cc
@@ -5839,88 +5839,7 @@ int spider_db_mbase_util::open_item_func(
alias, alias_length, dbton_id, use_fields, fields));
} else if (!strncasecmp("timestampdiff", func_name, func_name_length))
{
-#ifdef ITEM_FUNC_TIMESTAMPDIFF_ARE_PUBLIC
- Item_func_timestamp_diff *item_func_timestamp_diff =
- (Item_func_timestamp_diff *) item_func;
- if (str)
- {
- const char *interval_str;
- uint interval_len;
- switch (item_func_timestamp_diff->int_type)
- {
- case INTERVAL_YEAR:
- interval_str = SPIDER_SQL_YEAR_STR;
- interval_len = SPIDER_SQL_YEAR_LEN;
- break;
- case INTERVAL_QUARTER:
- interval_str = SPIDER_SQL_QUARTER_STR;
- interval_len = SPIDER_SQL_QUARTER_LEN;
- break;
- case INTERVAL_MONTH:
- interval_str = SPIDER_SQL_MONTH_STR;
- interval_len = SPIDER_SQL_MONTH_LEN;
- break;
- case INTERVAL_WEEK:
- interval_str = SPIDER_SQL_WEEK_STR;
- interval_len = SPIDER_SQL_WEEK_LEN;
- break;
- case INTERVAL_DAY:
- interval_str = SPIDER_SQL_DAY_STR;
- interval_len = SPIDER_SQL_DAY_LEN;
- break;
- case INTERVAL_HOUR:
- interval_str = SPIDER_SQL_HOUR_STR;
- interval_len = SPIDER_SQL_HOUR_LEN;
- break;
- case INTERVAL_MINUTE:
- interval_str = SPIDER_SQL_MINUTE_STR;
- interval_len = SPIDER_SQL_MINUTE_LEN;
- break;
- case INTERVAL_SECOND:
- interval_str = SPIDER_SQL_SECOND_STR;
- interval_len = SPIDER_SQL_SECOND_LEN;
- break;
- case INTERVAL_MICROSECOND:
- interval_str = SPIDER_SQL_MICROSECOND_STR;
- interval_len = SPIDER_SQL_MICROSECOND_LEN;
- break;
- default:
- interval_str = "";
- interval_len = 0;
- break;
- }
- str->length(str->length() - SPIDER_SQL_OPEN_PAREN_LEN);
- if (str->reserve(func_name_length + SPIDER_SQL_OPEN_PAREN_LEN +
- interval_len + SPIDER_SQL_COMMA_LEN))
- DBUG_RETURN(HA_ERR_OUT_OF_MEM);
- str->q_append(func_name, func_name_length);
- str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN);
- str->q_append(interval_str, interval_len);
- str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN);
- }
- if ((error_num = spider_db_print_item_type(item_list[0], NULL, spider,
- str, alias, alias_length, dbton_id, use_fields, fields)))
- DBUG_RETURN(error_num);
- if (str)
- {
- if (str->reserve(SPIDER_SQL_COMMA_LEN))
- DBUG_RETURN(HA_ERR_OUT_OF_MEM);
- str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN);
- }
- if ((error_num = spider_db_print_item_type(item_list[1], NULL, spider,
- str, alias, alias_length, dbton_id, use_fields, fields)))
- DBUG_RETURN(error_num);
- if (str)
- {
- if (str->reserve(SPIDER_SQL_CLOSE_PAREN_LEN))
- DBUG_RETURN(HA_ERR_OUT_OF_MEM);
- str->q_append(SPIDER_SQL_CLOSE_PAREN_STR,
- SPIDER_SQL_CLOSE_PAREN_LEN);
- }
- DBUG_RETURN(0);
-#else
DBUG_RETURN(ER_SPIDER_COND_SKIP_NUM);
-#endif
}
} else if (func_name_length == 14)
{
@@ -8298,10 +8217,10 @@ int spider_mbase_share::discover_table_structure(
SPIDER_CONN *conn;
int need_mon;
- if (!(conn = spider_get_conn(
- spider_share, 0, spider_share->conn_keys[roop_count], trx, NULL, FALSE,
- FALSE, SPIDER_CONN_KIND_MYSQL, &error_num))
- ) {
+ if (!(conn= spider_get_conn(spider_share, 0,
+ spider_share->conn_keys[roop_count], trx, NULL,
+ FALSE, FALSE, &error_num)))
+ {
DBUG_RETURN(error_num);
}
pthread_mutex_assert_not_owner(&conn->mta_conn_mutex);
diff --git a/storage/spider/spd_direct_sql.cc b/storage/spider/spd_direct_sql.cc
index 429c8fa9ae7..40486073730 100644
--- a/storage/spider/spd_direct_sql.cc
+++ b/storage/spider/spd_direct_sql.cc
@@ -551,7 +551,6 @@ SPIDER_CONN *spider_udf_direct_sql_create_conn(
conn->semi_trx_isolation = -2;
conn->semi_trx_isolation_chk = FALSE;
conn->semi_trx_chk = FALSE;
- conn->conn_kind = SPIDER_CONN_KIND_MYSQL;
if (mysql_mutex_init(spd_key_mutex_mta_conn, &conn->mta_conn_mutex,
MY_MUTEX_INIT_FAST))
@@ -697,7 +696,6 @@ SPIDER_CONN *spider_udf_direct_sql_get_conn(
conn->queued_ping = FALSE;
DBUG_PRINT("info",("spider conn=%p", conn));
- DBUG_PRINT("info",("spider conn->conn_kind=%u", conn->conn_kind));
DBUG_RETURN(conn);
error:
diff --git a/storage/spider/spd_include.h b/storage/spider/spd_include.h
index 26acaaa86ec..e6d4c2dca87 100644
--- a/storage/spider/spd_include.h
+++ b/storage/spider/spd_include.h
@@ -85,7 +85,6 @@
#define SPIDER_TEST(A) MY_TEST(A)
-#define SPIDER_FIELD_FIELDPTR_REQUIRES_THDPTR
#define SPIDER_ENGINE_CONDITION_PUSHDOWN_IS_ALWAYS_ON
#define SPIDER_Item_args_arg_count_IS_PROTECTED
@@ -328,7 +327,6 @@ typedef struct st_spider_conn_loop_check SPIDER_CONN_LOOP_CHECK;
/* database connection */
typedef struct st_spider_conn
{
- uint conn_kind;
char *conn_key;
uint conn_key_length;
my_hash_value_type conn_key_hash_value;
diff --git a/storage/spider/spd_init_query.h b/storage/spider/spd_init_query.h
index e66e94d8373..c3bea1c166b 100644
--- a/storage/spider/spd_init_query.h
+++ b/storage/spider/spd_init_query.h
@@ -538,16 +538,15 @@ static LEX_STRING spider_init_queries[] = {
" engine=Aria transactional=1;"
" end if;"
" end if;"
+/*
+ tables for ddl pushdown
+*/
+/*
" if @server_name = 'MariaDB' and"
" ("
- " @server_major_version > 10 or"
- " ("
- " @server_major_version = 10 and"
- " @server_minor_version >= 999"
- " )"
+ " @server_major_version > 11"
" )"
" then"
- " /* table for ddl pushdown */"
" create table if not exists mysql.spider_rewrite_tables("
" table_id bigint unsigned not null auto_increment,"
" db_name char(64) not null default '',"
@@ -602,6 +601,7 @@ static LEX_STRING spider_init_queries[] = {
" primary key (db_name, table_name, table_id, partition_id)"
" ) engine=Aria transactional=1 default charset=utf8 collate=utf8_bin;"
" end if;"
+*/
/*
Fix for version 3.4
*/
@@ -798,18 +798,15 @@ static LEX_STRING spider_init_queries[] = {
" soname 'ha_spider.dll';"
" end if;"
" end if;"
- " if @server_name = 'MariaDB' and"
- " ("
- " @server_major_version > 10 or"
- " ("
- " @server_major_version = 10 and"
- " @server_minor_version >= 999"
- " )"
- " )"
- " then"
/*
Install spider_rewrite plugin
*/
+/*
+ " if @server_name = 'MariaDB' and "
+ " ("
+ " @server_major_version > 11"
+ " )"
+ " then"
" set @have_spider_i_s_rewrite_plugin := 0;"
" select @have_spider_i_s_rewrite_plugin := 1"
" from INFORMATION_SCHEMA.plugins"
@@ -819,11 +816,6 @@ static LEX_STRING spider_init_queries[] = {
" where name = 'spider_rewrite';"
" if @have_spider_i_s_rewrite_plugin = 0 then"
" if @have_spider_rewrite_plugin = 1 then"
- " /*"
- " spider_rewrite plugin is present in mysql.plugin but not in"
- " information_schema.plugins. Remove spider_rewrite plugin entry"
- " in mysql.plugin first."
- " */"
" delete from mysql.plugin where name = 'spider_rewrite';"
" end if;"
" if @win_plugin = 0 then "
@@ -845,6 +837,7 @@ static LEX_STRING spider_init_queries[] = {
" end if;"
" end if;"
" end if;"
+*/
"end;"
)},
{C_STRING_WITH_LEN(
diff --git a/storage/spider/spd_ping_table.cc b/storage/spider/spd_ping_table.cc
index b331a9fec0d..e82a5925265 100644
--- a/storage/spider/spd_ping_table.cc
+++ b/storage/spider/spd_ping_table.cc
@@ -594,11 +594,9 @@ SPIDER_CONN *spider_get_ping_table_tgt_conn(
) {
SPIDER_CONN *conn;
DBUG_ENTER("spider_get_ping_table_tgt_conn");
- if (
- !(conn = spider_get_conn(
- share, 0, share->conn_keys[0], trx, NULL, FALSE, FALSE,
- SPIDER_CONN_KIND_MYSQL, error_num))
- ) {
+ if (!(conn= spider_get_conn(share, 0, share->conn_keys[0], trx, NULL, FALSE,
+ FALSE, error_num)))
+ {
my_error(ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0),
share->server_names[0]);
*error_num = ER_CONNECT_TO_FOREIGN_DATA_SOURCE;
diff --git a/storage/spider/spd_sys_table.cc b/storage/spider/spd_sys_table.cc
index a0cf104d46e..df95336cc19 100644
--- a/storage/spider/spd_sys_table.cc
+++ b/storage/spider/spd_sys_table.cc
@@ -3572,24 +3572,13 @@ TABLE *spider_mk_sys_tmp_table(
TABLE *tmp_table;
DBUG_ENTER("spider_mk_sys_tmp_table");
-#ifdef SPIDER_FIELD_FIELDPTR_REQUIRES_THDPTR
if (!(field = new (thd->mem_root) Field_blob(
4294967295U, FALSE, field_name, cs, TRUE)))
goto error_alloc_field;
-#else
- if (!(field = new Field_blob(
- 4294967295U, FALSE, field_name, cs, TRUE)))
- goto error_alloc_field;
-#endif
field->init(table);
-#ifdef SPIDER_FIELD_FIELDPTR_REQUIRES_THDPTR
if (!(i_field = new (thd->mem_root) Item_field(thd, (Field *) field)))
goto error_alloc_item_field;
-#else
- if (!(i_field = new Item_field((Field *) field)))
- goto error_alloc_item_field;
-#endif
if (i_list.push_back(i_field))
goto error_push_item;
@@ -3650,68 +3639,35 @@ TABLE *spider_mk_sys_tmp_table_for_result(
TABLE *tmp_table;
DBUG_ENTER("spider_mk_sys_tmp_table_for_result");
-#ifdef SPIDER_FIELD_FIELDPTR_REQUIRES_THDPTR
if (!(field1 = new (thd->mem_root) Field_blob(
4294967295U, FALSE, field_name1, cs, TRUE)))
goto error_alloc_field1;
-#else
- if (!(field1 = new Field_blob(
- 4294967295U, FALSE, field_name1, cs, TRUE)))
- goto error_alloc_field1;
-#endif
field1->init(table);
-#ifdef SPIDER_FIELD_FIELDPTR_REQUIRES_THDPTR
if (!(i_field1 = new (thd->mem_root) Item_field(thd, (Field *) field1)))
goto error_alloc_item_field1;
-#else
- if (!(i_field1 = new Item_field((Field *) field1)))
- goto error_alloc_item_field1;
-#endif
if (i_list.push_back(i_field1))
goto error_push_item1;
-#ifdef SPIDER_FIELD_FIELDPTR_REQUIRES_THDPTR
if (!(field2 = new (thd->mem_root) Field_blob(
4294967295U, FALSE, field_name2, cs, TRUE)))
goto error_alloc_field2;
-#else
- if (!(field2 = new Field_blob(
- 4294967295U, FALSE, field_name2, cs, TRUE)))
- goto error_alloc_field2;
-#endif
field2->init(table);
-#ifdef SPIDER_FIELD_FIELDPTR_REQUIRES_THDPTR
if (!(i_field2 = new (thd->mem_root) Item_field(thd, (Field *) field2)))
goto error_alloc_item_field2;
-#else
- if (!(i_field2 = new Item_field((Field *) field2)))
- goto error_alloc_item_field2;
-#endif
if (i_list.push_back(i_field2))
goto error_push_item2;
-#ifdef SPIDER_FIELD_FIELDPTR_REQUIRES_THDPTR
if (!(field3 = new (thd->mem_root) Field_blob(
4294967295U, FALSE, field_name3, cs, TRUE)))
goto error_alloc_field3;
-#else
- if (!(field3 = new Field_blob(
- 4294967295U, FALSE, field_name3, cs, TRUE)))
- goto error_alloc_field3;
-#endif
field3->init(table);
-#ifdef SPIDER_FIELD_FIELDPTR_REQUIRES_THDPTR
if (!(i_field3 = new (thd->mem_root) Item_field(thd, (Field *) field3)))
goto error_alloc_item_field3;
-#else
- if (!(i_field3 = new Item_field((Field *) field3)))
- goto error_alloc_item_field3;
-#endif
if (i_list.push_back(i_field3))
goto error_push_item3;
diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc
index 46dba6abdc9..13afb5077d3 100644
--- a/storage/spider/spd_table.cc
+++ b/storage/spider/spd_table.cc
@@ -4842,7 +4842,6 @@ SPIDER_SHARE *spider_get_share(
&spider->conn_link_idx, sizeof(uint) * share->link_count,
&spider->conn_can_fo, sizeof(uchar) * share->link_bitmap_size,
&spider->connection_ids, sizeof(ulonglong) * share->link_count,
- &spider->conn_kind, sizeof(uint) * share->link_count,
&spider->db_request_id, sizeof(ulonglong) * share->link_count,
&spider->db_request_phase, sizeof(uchar) * share->link_bitmap_size,
&spider->need_mons, sizeof(int) * share->link_count,
@@ -4875,7 +4874,6 @@ SPIDER_SHARE *spider_get_share(
tmp_name += share->conn_keys_lengths[roop_count] + 1;
result_list->upd_tmp_tbl_prms[roop_count].init();
result_list->upd_tmp_tbl_prms[roop_count].field_count = 1;
- spider->conn_kind[roop_count] = SPIDER_CONN_KIND_MYSQL;
}
spider_trx_set_link_idx_for_all(spider);
@@ -4930,7 +4928,6 @@ SPIDER_SHARE *spider_get_share(
!(spider->conns[roop_count] =
spider_get_conn(share, roop_count, spider->conn_keys[roop_count],
spider->wide_handler->trx, spider, FALSE, TRUE,
- SPIDER_CONN_KIND_MYSQL,
error_num))
) {
if (
@@ -5297,7 +5294,6 @@ SPIDER_SHARE *spider_get_share(
&spider->conn_link_idx, sizeof(uint) * share->link_count,
&spider->conn_can_fo, sizeof(uchar) * share->link_bitmap_size,
&spider->connection_ids, sizeof(ulonglong) * share->link_count,
- &spider->conn_kind, sizeof(uint) * share->link_count,
&spider->db_request_id, sizeof(ulonglong) * share->link_count,
&spider->db_request_phase, sizeof(uchar) * share->link_bitmap_size,
&spider->need_mons, sizeof(int) * share->link_count,
@@ -5327,7 +5323,6 @@ SPIDER_SHARE *spider_get_share(
tmp_name += share->conn_keys_lengths[roop_count] + 1;
result_list->upd_tmp_tbl_prms[roop_count].init();
result_list->upd_tmp_tbl_prms[roop_count].field_count = 1;
- spider->conn_kind[roop_count] = SPIDER_CONN_KIND_MYSQL;
}
spider_trx_set_link_idx_for_all(spider);
@@ -5379,7 +5374,6 @@ SPIDER_SHARE *spider_get_share(
!(spider->conns[roop_count] =
spider_get_conn(share, roop_count, spider->conn_keys[roop_count],
spider->wide_handler->trx, spider, FALSE, TRUE,
- SPIDER_CONN_KIND_MYSQL,
error_num))
) {
if (
@@ -6034,11 +6028,9 @@ int spider_open_all_tables(
}
/* create conn */
- if (
- !(conn = spider_get_conn(
- &tmp_share, 0, tmp_share.conn_keys[0], trx, NULL, FALSE, FALSE,
- SPIDER_CONN_KIND_MYSQL, &error_num))
- ) {
+ if (!(conn= spider_get_conn(&tmp_share, 0, tmp_share.conn_keys[0], trx,
+ NULL, FALSE, FALSE, &error_num)))
+ {
spider_sys_index_end(table_tables);
spider_close_sys_table(thd, table_tables,
&open_tables_backup, TRUE);
@@ -6149,11 +6141,9 @@ int spider_open_all_tables(
}
/* create another conn */
- if (
- (!(conn = spider_get_conn(
- &tmp_share, 0, tmp_share.conn_keys[0], trx, spider, TRUE, FALSE,
- SPIDER_CONN_KIND_MYSQL, &error_num)))
- ) {
+ if ((!(conn= spider_get_conn(&tmp_share, 0, tmp_share.conn_keys[0], trx,
+ spider, TRUE, FALSE, &error_num))))
+ {
spider_free_tmp_dbton_handler(spider);
spider_free(trx, share, MYF(0));
delete spider;
@@ -6520,6 +6510,25 @@ int spider_panic(
DBUG_RETURN(0);
}
+static void spider_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /* Assume 1 Gigabyte network */
+ costs->disk_read_cost= IO_SIZE/(1000000000/8)*1000.00000;
+ costs->index_block_copy_cost= 0; // Not used
+
+ /*
+ The following costs are copied from ha_innodb.cc
+ The assumption is that the default storage engine used with Spider is
+ InnoDB.
+ */
+ costs->row_next_find_cost= 0.00007013;
+ costs->row_lookup_cost= 0.00076597;
+ costs->key_next_find_cost= 0.00009900;
+ costs->key_lookup_cost= 0.00079112;
+ costs->row_copy_cost= 0.00006087;
+}
+
+
int spider_db_init(
void *p
) {
@@ -6563,6 +6572,7 @@ int spider_db_init(
spider_hton->show_status = spider_show_status;
spider_hton->create_group_by = spider_create_group_by_handler;
spider_hton->table_options= spider_table_option_list;
+ spider_hton->update_optimizer_costs= spider_update_optimizer_costs;
if (my_gethwaddr((uchar *) addr))
{
@@ -9180,9 +9190,8 @@ void *spider_table_bg_sts_action(
if (!conns[spider->search_link_idx])
{
spider_get_conn(share, spider->search_link_idx,
- share->conn_keys[spider->search_link_idx],
- trx, spider, FALSE, FALSE, SPIDER_CONN_KIND_MYSQL,
- &error_num);
+ share->conn_keys[spider->search_link_idx], trx,
+ spider, FALSE, FALSE, &error_num);
if (conns[spider->search_link_idx])
{
conns[spider->search_link_idx]->error_mode = 0;
@@ -9326,9 +9335,8 @@ void *spider_table_bg_crd_action(
if (!conns[spider->search_link_idx])
{
spider_get_conn(share, spider->search_link_idx,
- share->conn_keys[spider->search_link_idx],
- trx, spider, FALSE, FALSE, SPIDER_CONN_KIND_MYSQL,
- &error_num);
+ share->conn_keys[spider->search_link_idx], trx,
+ spider, FALSE, FALSE, &error_num);
if (conns[spider->search_link_idx])
{
conns[spider->search_link_idx]->error_mode = 0;
diff --git a/storage/spider/spd_trx.cc b/storage/spider/spd_trx.cc
index f266b27c871..8e1257bad21 100644
--- a/storage/spider/spd_trx.cc
+++ b/storage/spider/spd_trx.cc
@@ -2748,13 +2748,11 @@ int spider_internal_xa_commit_by_xid(
goto error;
}
- if (
- !(conn = spider_get_conn(
- &tmp_share, 0, tmp_share.conn_keys[0], trx, NULL, FALSE, FALSE,
- SPIDER_CONN_KIND_MYSQL, &error_num)) &&
- (force_commit == 0 ||
- (force_commit == 1 && error_num != ER_XAER_NOTA))
- ) {
+ if (!(conn= spider_get_conn(&tmp_share, 0, tmp_share.conn_keys[0], trx,
+ NULL, FALSE, FALSE, &error_num)) &&
+ (force_commit == 0 ||
+ (force_commit == 1 && error_num != ER_XAER_NOTA)))
+ {
spider_sys_index_end(table_xa_member);
spider_free_tmp_share_alloc(&tmp_share);
free_root(&mem_root, MYF(0));
@@ -2977,13 +2975,11 @@ int spider_internal_xa_rollback_by_xid(
goto error;
}
- if (
- !(conn = spider_get_conn(
- &tmp_share, 0, tmp_share.conn_keys[0], trx, NULL, FALSE, FALSE,
- SPIDER_CONN_KIND_MYSQL, &error_num)) &&
- (force_commit == 0 ||
- (force_commit == 1 && error_num != ER_XAER_NOTA))
- ) {
+ if (!(conn= spider_get_conn(&tmp_share, 0, tmp_share.conn_keys[0], trx,
+ NULL, FALSE, FALSE, &error_num)) &&
+ (force_commit == 0 ||
+ (force_commit == 1 && error_num != ER_XAER_NOTA)))
+ {
spider_sys_index_end(table_xa_member);
spider_free_tmp_share_alloc(&tmp_share);
free_root(&mem_root, MYF(0));
@@ -3473,11 +3469,8 @@ int spider_end_trx(
DBUG_RETURN(error_num);
}
-int spider_check_trx_and_get_conn(
- THD *thd,
- ha_spider *spider,
- bool use_conn_kind
-) {
+int spider_check_trx_and_get_conn(THD *thd, ha_spider *spider)
+{
int error_num, roop_count, search_link_idx;
SPIDER_TRX *trx;
SPIDER_SHARE *share = spider->share;
@@ -3577,22 +3570,16 @@ int spider_check_trx_and_get_conn(
spider->conn_link_idx, roop_count, share->link_count,
SPIDER_LINK_STATUS_RECOVERY)
) {
- uint tgt_conn_kind = (use_conn_kind ? spider->conn_kind[roop_count] :
- SPIDER_CONN_KIND_MYSQL);
if (roop_count == spider->search_link_idx)
search_link_idx_is_checked = TRUE;
- if (
- tgt_conn_kind == SPIDER_CONN_KIND_MYSQL &&
- !spider->conns[roop_count]
- ) {
+ if (!spider->conns[roop_count])
+ {
*spider->conn_keys[roop_count] = first_byte;
if (
!(conn =
spider_get_conn(share, roop_count,
spider->conn_keys[roop_count], trx,
spider, FALSE, TRUE,
- use_conn_kind ? spider->conn_kind[roop_count] :
- SPIDER_CONN_KIND_MYSQL,
&error_num))
) {
if (
@@ -3672,8 +3659,6 @@ int spider_check_trx_and_get_conn(
spider_get_conn(share, roop_count,
spider->conn_keys[roop_count], trx,
spider, FALSE, TRUE,
- use_conn_kind ? spider->conn_kind[roop_count] :
- SPIDER_CONN_KIND_MYSQL,
&error_num))
) {
if (
diff --git a/storage/spider/spd_trx.h b/storage/spider/spd_trx.h
index 2055a49717e..93b03fcec21 100644
--- a/storage/spider/spd_trx.h
+++ b/storage/spider/spd_trx.h
@@ -227,11 +227,7 @@ int spider_end_trx(
SPIDER_CONN *conn
);
-int spider_check_trx_and_get_conn(
- THD *thd,
- ha_spider *spider,
- bool use_conn_kind
-);
+int spider_check_trx_and_get_conn(THD *thd, ha_spider *spider);
THD *spider_create_tmp_thd();