summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2022-12-14 14:44:28 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2023-01-11 17:59:55 +0200
commit944beb9e7acd53488dbfc7edc09bf29e10ce68ab (patch)
tree13ba5cee83cfa68fb9af0224055eec1f858ebeb7
parentf27e9c894779a4c7ebe6446ba9aa408f1771c114 (diff)
downloadmariadb-git-944beb9e7acd53488dbfc7edc09bf29e10ce68ab.tar.gz
MDEV-19506 Remove the global sequence DICT_HDR_ROW_ID for DB_ROW_ID
InnoDB tables that lack a primary key (and any UNIQUE INDEX whose all columns are NOT NULL) will use an internally generated index, called GEN_CLUST_INDEX(DB_ROW_ID) in the InnoDB data dictionary, and hidden from the SQL layer. The 48-bit (6-byte) DB_ROW_ID is being assigned from a global sequence that is persisted in the DICT_HDR page. There is absolutely no reason for the DB_ROW_ID to be globally unique across all InnoDB tables. A downgrade to earlier versions will be prevented by the file format change related to removing the InnoDB change buffer (MDEV-29694). DICT_HDR_ROW_ID, dict_sys_t::row_id: Remove. dict_table_t::row_id: The per-table sequence of DB_ROW_ID. commit_try_rebuild(): Copy dict_table_t::row_id from the old table. btr_cur_instant_init(), row_import_cleanup(): If needed, perform the equivalent of SELECT MAX(DB_ROW_ID) to initialize dict_table_t::row_id. row_ins(): If needed, obtain DB_ROW_ID from dict_table_t::row_id. Should it exceed the maximum 48-bit value, return DB_OUT_OF_FILE_SPACE to prevent further inserts into the table. dict_load_table_one(): Move a condition to btr_cur_instant_init_low() so that dict_table_t::row_id will be restored also for ROW_FORMAT=COMPRESSED tables. Tested by: Matthias Leich
-rw-r--r--storage/innobase/btr/btr0cur.cc36
-rw-r--r--storage/innobase/dict/dict0boot.cc27
-rw-r--r--storage/innobase/dict/dict0dict.cc1
-rw-r--r--storage/innobase/dict/dict0load.cc4
-rw-r--r--storage/innobase/handler/handler0alter.cc1
-rw-r--r--storage/innobase/include/dict0boot.h35
-rw-r--r--storage/innobase/include/dict0dict.h19
-rw-r--r--storage/innobase/include/dict0dict.inl2
-rw-r--r--storage/innobase/include/dict0mem.h2
-rw-r--r--storage/innobase/row/row0import.cc79
-rw-r--r--storage/innobase/row/row0ins.cc24
11 files changed, 62 insertions, 168 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index a4260c7f111..9a25892ab52 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -348,10 +348,14 @@ when loading a table definition.
static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
{
ut_ad(index->is_primary());
- ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
- ut_ad(index->table->supports_instant());
ut_ad(index->table->is_readable());
+ if (!index->table->supports_instant()) {
+ return DB_SUCCESS;
+ }
+
+ ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
+
dberr_t err;
const fil_space_t* space = index->table->space;
if (!space) {
@@ -618,17 +622,25 @@ when loading a table definition.
@param[in,out] table table definition from the data dictionary
@return error code
@retval DB_SUCCESS if no error occurred */
-dberr_t
-btr_cur_instant_init(dict_table_t* table)
+dberr_t btr_cur_instant_init(dict_table_t *table)
{
- mtr_t mtr;
- dict_index_t* index = dict_table_get_first_index(table);
- mtr.start();
- dberr_t err = index
- ? btr_cur_instant_init_low(index, &mtr)
- : DB_CORRUPTION;
- mtr.commit();
- return(err);
+ mtr_t mtr;
+ dict_index_t *index= dict_table_get_first_index(table);
+ mtr.start();
+ dberr_t err = index ? btr_cur_instant_init_low(index, &mtr) : DB_CORRUPTION;
+ mtr.commit();
+ if (err == DB_SUCCESS && index->is_gen_clust())
+ {
+ btr_cur_t cur;
+ mtr.start();
+ err= cur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr);
+ if (err != DB_SUCCESS);
+ else if (const rec_t *rec= page_rec_get_prev(btr_cur_get_rec(&cur)))
+ if (page_rec_is_user_rec(rec))
+ table->row_id= mach_read_from_6(rec);
+ mtr.commit();
+ }
+ return(err);
}
/** Initialize the n_core_null_bytes on first access to a clustered
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
index 8d18b37f132..316d0f01322 100644
--- a/storage/innobase/dict/dict0boot.cc
+++ b/storage/innobase/dict/dict0boot.cc
@@ -93,18 +93,6 @@ dict_hdr_get_new_id(
mtr.commit();
}
-/** Update dict_sys.row_id in the dictionary header file page. */
-void dict_hdr_flush_row_id(row_id_t id)
-{
- mtr_t mtr;
- mtr.start();
- buf_block_t* d= dict_hdr_get(&mtr);
- byte *row_id= DICT_HDR + DICT_HDR_ROW_ID + d->page.frame;
- if (mach_read_from_8(row_id) < id)
- mtr.write<8>(*d, row_id, id);
- mtr.commit();
-}
-
/** Create the DICT_HDR page on database initialization.
@return error code */
dberr_t dict_create()
@@ -126,10 +114,8 @@ dberr_t dict_create()
}
ut_a(d->page.id() == hdr_page_id);
- /* Start counting row, table, index, and tree ids from
+ /* Start counting table, index, and tree ids from
DICT_HDR_FIRST_ID */
- mtr.write<8>(*d, DICT_HDR + DICT_HDR_ROW_ID + d->page.frame,
- DICT_HDR_FIRST_ID);
mtr.write<8>(*d, DICT_HDR + DICT_HDR_TABLE_ID + d->page.frame,
DICT_HDR_FIRST_ID);
mtr.write<8>(*d, DICT_HDR + DICT_HDR_INDEX_ID + d->page.frame,
@@ -245,17 +231,6 @@ dberr_t dict_boot()
const byte* dict_hdr = &d->page.frame[DICT_HDR];
- /* Because we only write new row ids to disk-based data structure
- (dictionary header) when it is divisible by
- DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
- the latest value of the row id counter. Therefore we advance
- the counter at the database startup to avoid overlapping values.
- Note that when a user after database startup first time asks for
- a new row id, then because the counter is now divisible by
- ..._MARGIN, it will immediately be updated to the disk-based
- header. */
-
- dict_sys.recover_row_id(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID));
if (uint32_t max_space_id
= mach_read_from_4(dict_hdr + DICT_HDR_MAX_SPACE_ID)) {
max_space_id--;
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index c1c911480dd..6c003c43b4f 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1180,6 +1180,7 @@ inline void dict_sys_t::add(dict_table_t* table)
ulint fold = my_crc32c(0, table->name.m_name,
strlen(table->name.m_name));
+ table->row_id = 0;
table->autoinc_mutex.init();
table->lock_mutex_init();
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index b442e926648..cb81e4008df 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -2471,9 +2471,7 @@ corrupted:
goto corrupted;
}
- if (table->supports_instant()) {
- err = btr_cur_instant_init(table);
- }
+ err = btr_cur_instant_init(table);
}
} else {
ut_ad(ignore_err & DICT_ERR_IGNORE_INDEX);
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index abca00323e0..d0f1aa9c342 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -10219,6 +10219,7 @@ commit_try_rebuild(
/* We must be still holding a table handle. */
DBUG_ASSERT(user_table->get_ref_count() == 1);
+ rebuilt_table->row_id = uint64_t{user_table->row_id};
DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;);
switch (error) {
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index a65287476ef..68400d2095d 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -44,39 +44,6 @@ dict_hdr_get_new_id(
(not assigned if NULL) */
uint32_t* space_id); /*!< out: space id
(not assigned if NULL) */
-/** Update dict_sys.row_id in the dictionary header file page. */
-void dict_hdr_flush_row_id(row_id_t id);
-/** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
-inline row_id_t dict_sys_t::get_new_row_id()
-{
- row_id_t id= row_id.fetch_add(1);
- if (!(id % ROW_ID_WRITE_MARGIN))
- dict_hdr_flush_row_id(id);
- return id;
-}
-
-/** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
-inline void dict_sys_t::update_row_id(row_id_t id)
-{
- row_id_t sys_id= row_id;
- while (id >= sys_id)
- {
- if (!row_id.compare_exchange_strong(sys_id, id))
- continue;
- if (!(id % ROW_ID_WRITE_MARGIN))
- dict_hdr_flush_row_id(id);
- break;
- }
-}
-
-/**********************************************************************//**
-Writes a row id to a record or other 6-byte stored form. */
-inline void dict_sys_write_row_id(byte *field, row_id_t row_id)
-{
- static_assert(DATA_ROW_ID_LEN == 6, "compatibility");
- mach_write_to_6(field, row_id);
-}
-
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created.
@@ -116,7 +83,7 @@ inline bool dict_is_sys_table(table_id_t id) { return id < DICT_HDR_FIRST_ID; }
/*-------------------------------------------------------------*/
/* Dictionary header offsets */
-#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
+//#define DICT_HDR_ROW_ID 0 /* Was: latest assigned DB_ROW_ID */
#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 1002213e61c..628ad8366af 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -648,7 +648,7 @@ dict_table_get_all_fts_indexes(
/********************************************************************//**
Gets the number of user-defined non-virtual columns in a table in the
dictionary cache.
-@return number of user-defined (e.g., not ROW_ID) non-virtual
+@return number of user-defined (e.g., not DB_ROW_ID) non-virtual
columns of a table */
UNIV_INLINE
unsigned
@@ -1370,27 +1370,10 @@ private:
std::atomic<table_id_t> temp_table_id{DICT_HDR_FIRST_ID};
/** hash table of temporary table IDs */
hash_table_t temp_id_hash;
- /** the next value of DB_ROW_ID, backed by DICT_HDR_ROW_ID
- (FIXME: remove this, and move to dict_table_t) */
- Atomic_relaxed<row_id_t> row_id;
- /** The synchronization interval of row_id */
- static constexpr size_t ROW_ID_WRITE_MARGIN= 256;
public:
/** Diagnostic message for exceeding the lock_wait() timeout */
static const char fatal_msg[];
- /** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
- inline row_id_t get_new_row_id();
-
- /** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
- inline void update_row_id(row_id_t id);
-
- /** Recover the global DB_ROW_ID sequence on database startup */
- void recover_row_id(row_id_t id)
- {
- row_id= ut_uint64_align_up(id, ROW_ID_WRITE_MARGIN) + ROW_ID_WRITE_MARGIN;
- }
-
/** @return a new temporary table ID */
table_id_t acquire_temporary_table_id()
{
diff --git a/storage/innobase/include/dict0dict.inl b/storage/innobase/include/dict0dict.inl
index 4cc3eae96ab..ead22a21757 100644
--- a/storage/innobase/include/dict0dict.inl
+++ b/storage/innobase/include/dict0dict.inl
@@ -244,7 +244,7 @@ dict_table_get_next_index(
/********************************************************************//**
Gets the number of user-defined non-virtual columns in a table in the
dictionary cache.
-@return number of user-defined (e.g., not ROW_ID) non-virtual
+@return number of user-defined (e.g., not DB_ROW_ID) non-virtual
columns of a table */
UNIV_INLINE
unsigned
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index d9ef949ac13..bbbda57b05d 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -2347,6 +2347,8 @@ private:
Atomic_relaxed<pthread_t> lock_mutex_owner{0};
#endif
public:
+ /** The next DB_ROW_ID value */
+ Atomic_counter<uint64_t> row_id{0};
/** Autoinc counter value to give to the next inserted row. */
uint64_t autoinc;
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index f86b3c8674a..c2c17f718ec 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -2109,8 +2109,9 @@ row_import_cleanup(
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
dberr_t err) /*!< in: error code */
{
+ dict_table_t* table = prebuilt->table;
+
if (err != DB_SUCCESS) {
- dict_table_t* table = prebuilt->table;
table->file_unreadable = true;
if (table->space) {
fil_close_tablespace(table->space_id);
@@ -2141,7 +2142,25 @@ row_import_cleanup(
DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
- return(err);
+ if (err != DB_SUCCESS
+ || !dict_table_get_first_index(table)->is_gen_clust()) {
+ return err;
+ }
+
+ btr_cur_t cur;
+ mtr_t mtr;
+ mtr.start();
+ err = cur.open_leaf(false, dict_table_get_first_index(table),
+ BTR_SEARCH_LEAF, &mtr);
+ if (err != DB_SUCCESS) {
+ } else if (const rec_t *rec =
+ page_rec_get_prev(btr_cur_get_rec(&cur))) {
+ if (page_rec_is_user_rec(rec))
+ table->row_id= mach_read_from_6(rec);
+ }
+ mtr.commit();
+
+ return err;
}
/*****************************************************************//**
@@ -2277,55 +2296,6 @@ row_import_adjust_root_pages_of_secondary_indexes(
}
/*****************************************************************//**
-Ensure that dict_sys.row_id exceeds SELECT MAX(DB_ROW_ID). */
-MY_ATTRIBUTE((nonnull)) static
-void
-row_import_set_sys_max_row_id(
-/*==========================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
- handler */
- const dict_table_t* table) /*!< in: table to import */
-{
- const rec_t* rec;
- mtr_t mtr;
- btr_pcur_t pcur;
- row_id_t row_id = 0;
- dict_index_t* index;
-
- index = dict_table_get_first_index(table);
- ut_ad(index->is_primary());
- ut_ad(dict_index_is_auto_gen_clust(index));
-
- mtr_start(&mtr);
-
- mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-
- if (pcur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr)
- == DB_SUCCESS) {
- rec = btr_pcur_move_to_prev_on_page(&pcur);
-
- if (!rec) {
- /* The table is corrupted. */
- } else if (page_rec_is_infimum(rec)) {
- /* The table is empty. */
- } else if (rec_is_metadata(rec, *index)) {
- /* The clustered index contains the metadata
- record only, that is, the table is empty. */
- } else {
- row_id = mach_read_from_6(rec);
- }
- }
-
- mtr_commit(&mtr);
-
- if (row_id) {
- /* Update the system row id if the imported index row id is
- greater than the max system row id. */
- dict_sys.update_row_id(row_id);
- }
-}
-
-/*****************************************************************//**
Read the a string from the meta data file.
@return DB_SUCCESS or error code. */
static
@@ -4510,13 +4480,6 @@ row_import_for_mysql(
return row_import_error(prebuilt, err);
}
- /* Ensure that the next available DB_ROW_ID is not smaller than
- any DB_ROW_ID stored in the table. */
-
- if (prebuilt->clust_index_was_generated) {
- row_import_set_sys_max_row_id(prebuilt, table);
- }
-
ib::info() << "Phase III - Flush changes to disk";
/* Ensure that all pages dirtied during the IMPORT make it to disk.
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 52ccce4d0af..9cd83e98528 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -3526,19 +3526,6 @@ row_ins_index_entry_step(
}
/***********************************************************//**
-Allocates a row id for row and inits the node->index field. */
-UNIV_INLINE
-void
-row_ins_alloc_row_id_step(
-/*======================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
- if (dict_table_get_first_index(node->table)->is_gen_clust())
- dict_sys_write_row_id(node->sys_buf, dict_sys.get_new_row_id());
-}
-
-/***********************************************************//**
Gets a row to insert from the values list. */
UNIV_INLINE
void
@@ -3618,13 +3605,18 @@ row_ins(
DBUG_PRINT("row_ins", ("table: %s", node->table->name.m_name));
if (node->state == INS_NODE_ALLOC_ROW_ID) {
-
- row_ins_alloc_row_id_step(node);
-
node->index = dict_table_get_first_index(node->table);
ut_ad(node->entry_list.empty() == false);
node->entry = node->entry_list.begin();
+ if (node->index->is_gen_clust()) {
+ const uint64_t db_row_id{++node->table->row_id};
+ if (db_row_id >> 48) {
+ DBUG_RETURN(DB_OUT_OF_FILE_SPACE);
+ }
+ mach_write_to_6(node->sys_buf, db_row_id);
+ }
+
if (node->ins_type == INS_SEARCHED) {
row_ins_get_row_from_select(node);