summaryrefslogtreecommitdiff
path: root/storage/xtradb/row/row0uins.cc
diff options
context:
space:
mode:
authorSergei Golubchik <vuvova@gmail.com>2015-05-04 19:15:28 +0200
committerSergei Golubchik <vuvova@gmail.com>2015-05-04 19:15:28 +0200
commit14a142fca67b9e1fb3f0250fda093f5b967f0138 (patch)
treedd49e0666c863d80b5c50642e36a9c945ea12b8a /storage/xtradb/row/row0uins.cc
parentdfb001edcd4b16bd4370b08b0176df78c4c5523f (diff)
downloadmariadb-git-14a142fca67b9e1fb3f0250fda093f5b967f0138.tar.gz
move to storage/xtradb
Diffstat (limited to 'storage/xtradb/row/row0uins.cc')
-rw-r--r--storage/xtradb/row/row0uins.cc475
1 files changed, 475 insertions, 0 deletions
diff --git a/storage/xtradb/row/row0uins.cc b/storage/xtradb/row/row0uins.cc
new file mode 100644
index 00000000000..849bf096492
--- /dev/null
+++ b/storage/xtradb/row/row0uins.cc
@@ -0,0 +1,475 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0uins.cc
+Fresh insert undo
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0uins.h"
+
+#ifdef UNIV_NONINL
+#include "row0uins.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "trx0undo.h"
+#include "trx0roll.h"
+#include "btr0btr.h"
+#include "mach0data.h"
+#include "row0undo.h"
+#include "row0vers.h"
+#include "row0log.h"
+#include "trx0trx.h"
+#include "trx0rec.h"
+#include "row0row.h"
+#include "row0upd.h"
+#include "que0que.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+
+/*************************************************************************
+IMPORTANT NOTE: Any operation that generates redo MUST check that there
+is enough space in the redo log before for that operation. This is
+done by calling log_free_check(). The reason for checking the
+availability of the redo log space before the start of the operation is
+that we MUST not hold any synchonization objects when performing the
+check.
+If you make a change in this module make sure that no codepath is
+introduced where a call to log_free_check() is bypassed. */
+
+/***************************************************************//**
+Removes a clustered index record. The pcur in node was positioned on the
+record, now it is detached.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_undo_ins_remove_clust_rec(
+/*==========================*/
+ undo_node_t* node) /*!< in: undo node */
+{
+ btr_cur_t* btr_cur;
+ ibool success;
+ dberr_t err;
+ ulint n_tries = 0;
+ mtr_t mtr;
+ dict_index_t* index = node->pcur.btr_cur.index;
+ bool online;
+
+ ut_ad(dict_index_is_clust(index));
+
+ mtr_start(&mtr);
+
+ /* This is similar to row_undo_mod_clust(). The DDL thread may
+ already have copied this row from the log to the new table.
+ We must log the removal, so that the row will be correctly
+ purged. However, we can log the removal out of sync with the
+ B-tree modification. */
+
+ online = dict_index_is_online_ddl(index);
+ if (online) {
+ ut_ad(node->trx->dict_operation_lock_mode
+ != RW_X_LATCH);
+ ut_ad(node->table->id != DICT_INDEXES_ID);
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ }
+
+ success = btr_pcur_restore_position(
+ online
+ ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+ : BTR_MODIFY_LEAF, &node->pcur, &mtr);
+ ut_a(success);
+
+ btr_cur = btr_pcur_get_btr_cur(&node->pcur);
+
+ ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
+ == node->trx->id);
+
+ if (online && dict_index_is_online_ddl(index)) {
+ const rec_t* rec = btr_cur_get_rec(btr_cur);
+ mem_heap_t* heap = NULL;
+ const ulint* offsets = rec_get_offsets(
+ rec, index, NULL, ULINT_UNDEFINED, &heap);
+ row_log_table_delete(rec, index, offsets, NULL);
+ mem_heap_free(heap);
+ }
+
+ if (node->table->id == DICT_INDEXES_ID) {
+ ut_ad(!online);
+ ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
+
+ /* Drop the index tree associated with the row in
+ SYS_INDEXES table: */
+
+ dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ success = btr_pcur_restore_position(
+ BTR_MODIFY_LEAF, &node->pcur, &mtr);
+ ut_a(success);
+ }
+
+ if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
+ err = DB_SUCCESS;
+ goto func_exit;
+ }
+
+ btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+retry:
+ /* If did not succeed, try pessimistic descent to tree */
+ mtr_start(&mtr);
+
+ success = btr_pcur_restore_position(BTR_MODIFY_TREE,
+ &(node->pcur), &mtr);
+ ut_a(success);
+
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
+ trx_is_recv(node->trx)
+ ? RB_RECOVERY
+ : RB_NORMAL, &mtr);
+
+ /* The delete operation may fail if we have little
+ file space left: TODO: easiest to crash the database
+ and restart with more file space */
+
+ if (err == DB_OUT_OF_FILE_SPACE
+ && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
+
+ btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
+
+ n_tries++;
+
+ os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+
+ goto retry;
+ }
+
+func_exit:
+ btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+ trx_undo_rec_release(node->trx, node->undo_no);
+
+ return(err);
+}
+
+/***************************************************************//**
+Removes a secondary index entry if found.
+@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_undo_ins_remove_sec_low(
+/*========================*/
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether we wish optimistic or
+ pessimistic descent down the index tree */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry) /*!< in: index entry to remove */
+{
+ btr_pcur_t pcur;
+ btr_cur_t* btr_cur;
+ dberr_t err = DB_SUCCESS;
+ mtr_t mtr;
+ enum row_search_result search_result;
+
+ log_free_check();
+
+ mtr_start(&mtr);
+
+ if (mode == BTR_MODIFY_LEAF) {
+ mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+ }
+
+ if (row_log_online_op_try(index, entry, 0)) {
+ goto func_exit_no_pcur;
+ }
+
+ search_result = row_search_index_entry(index, entry, mode,
+ &pcur, &mtr);
+
+ switch (search_result) {
+ case ROW_NOT_FOUND:
+ goto func_exit;
+ case ROW_FOUND:
+ break;
+ case ROW_BUFFERED:
+ case ROW_NOT_DELETED_REF:
+ /* These are invalid outcomes, because the mode passed
+ to row_search_index_entry() did not include any of the
+ flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
+ ut_error;
+ }
+
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+ if (mode != BTR_MODIFY_TREE) {
+ err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
+ ? DB_SUCCESS : DB_FAIL;
+ } else {
+ /* No need to distinguish RB_RECOVERY here, because we
+ are deleting a secondary index record: the distinction
+ between RB_NORMAL and RB_RECOVERY only matters when
+ deleting a record that contains externally stored
+ columns. */
+ ut_ad(!dict_index_is_clust(index));
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
+ RB_NORMAL, &mtr);
+ }
+func_exit:
+ btr_pcur_close(&pcur);
+func_exit_no_pcur:
+ mtr_commit(&mtr);
+
+ return(err);
+}
+
+/***************************************************************//**
+Removes a secondary index entry from the index if found. Tries first
+optimistic, then pessimistic descent down the tree.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_undo_ins_remove_sec(
+/*====================*/
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry) /*!< in: index entry to insert */
+{
+ dberr_t err;
+ ulint n_tries = 0;
+
+ /* Try first optimistic descent to the B-tree */
+
+ err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry);
+
+ if (err == DB_SUCCESS) {
+
+ return(err);
+ }
+
+ /* Try then pessimistic descent to the B-tree */
+retry:
+ err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry);
+
+ /* The delete operation may fail if we have little
+ file space left: TODO: easiest to crash the database
+ and restart with more file space */
+
+ if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
+
+ n_tries++;
+
+ os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+
+ goto retry;
+ }
+
+ return(err);
+}
+
+/***********************************************************//**
+Parses the row reference and other info in a fresh insert undo record. */
+static
+void
+row_undo_ins_parse_undo_rec(
+/*========================*/
+ undo_node_t* node, /*!< in/out: row undo node */
+ ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */
+{
+ dict_index_t* clust_index;
+ byte* ptr;
+ undo_no_t undo_no;
+ table_id_t table_id;
+ ulint type;
+ ulint dummy;
+ bool dummy_extern;
+
+ ut_ad(node);
+
+ ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
+ &dummy_extern, &undo_no, &table_id);
+ ut_ad(type == TRX_UNDO_INSERT_REC);
+ node->rec_type = type;
+
+ node->update = NULL;
+ node->table = dict_table_open_on_id(
+ table_id, dict_locked, DICT_TABLE_OP_NORMAL);
+
+ /* Skip the UNDO if we can't find the table or the .ibd file. */
+ if (UNIV_UNLIKELY(node->table == NULL)) {
+ } else if (UNIV_UNLIKELY(node->table->ibd_file_missing)) {
+close_table:
+ dict_table_close(node->table, dict_locked, FALSE);
+ node->table = NULL;
+ } else {
+ clust_index = dict_table_get_first_index(node->table);
+
+ if (clust_index != NULL) {
+ trx_undo_rec_get_row_ref(
+ ptr, clust_index, &node->ref, node->heap);
+
+ if (!row_undo_search_clust_to_pcur(node)) {
+ goto close_table;
+ }
+
+ } else {
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: table ");
+ ut_print_name(stderr, node->trx, TRUE,
+ node->table->name);
+ fprintf(stderr, " has no indexes, "
+ "ignoring the table\n");
+ goto close_table;
+ }
+ }
+}
+
+/***************************************************************//**
+Removes secondary index records.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_undo_ins_remove_sec_rec(
+/*========================*/
+ undo_node_t* node) /*!< in/out: row undo node */
+{
+ dberr_t err = DB_SUCCESS;
+ dict_index_t* index = node->index;
+ mem_heap_t* heap;
+
+ heap = mem_heap_create(1024);
+
+ while (index != NULL) {
+ dtuple_t* entry;
+
+ if (index->type & DICT_FTS) {
+ dict_table_next_uncorrupted_index(index);
+ continue;
+ }
+
+ /* An insert undo record TRX_UNDO_INSERT_REC will
+ always contain all fields of the index. It does not
+ matter if any indexes were created afterwards; all
+ index entries can be reconstructed from the row. */
+ entry = row_build_index_entry(
+ node->row, node->ext, index, heap);
+ if (UNIV_UNLIKELY(!entry)) {
+ /* The database must have crashed after
+ inserting a clustered index record but before
+ writing all the externally stored columns of
+ that record, or a statement is being rolled
+ back because an error occurred while storing
+ off-page columns.
+
+ Because secondary index entries are inserted
+ after the clustered index record, we may
+ assume that the secondary index record does
+ not exist. */
+ } else {
+ err = row_undo_ins_remove_sec(index, entry);
+
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ goto func_exit;
+ }
+ }
+
+ mem_heap_empty(heap);
+ dict_table_next_uncorrupted_index(index);
+ }
+
+func_exit:
+ node->index = index;
+ mem_heap_free(heap);
+ return(err);
+}
+
+/***********************************************************//**
+Undoes a fresh insert of a row to a table. A fresh insert means that
+the same clustered index unique key did not have any record, even delete
+marked, at the time of the insert. InnoDB is eager in a rollback:
+if it figures out that an index record will be removed in the purge
+anyway, it will remove it in the rollback.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
+dberr_t
+row_undo_ins(
+/*=========*/
+ undo_node_t* node) /*!< in: row undo node */
+{
+ dberr_t err;
+ ibool dict_locked;
+
+ ut_ad(node->state == UNDO_NODE_INSERT);
+
+ dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
+
+ row_undo_ins_parse_undo_rec(node, dict_locked);
+
+ if (node->table == NULL) {
+ trx_undo_rec_release(node->trx, node->undo_no);
+
+ return(DB_SUCCESS);
+ }
+
+ /* Iterate over all the indexes and undo the insert.*/
+
+ node->index = dict_table_get_first_index(node->table);
+ ut_ad(dict_index_is_clust(node->index));
+ /* Skip the clustered index (the first index) */
+ node->index = dict_table_get_next_index(node->index);
+
+ dict_table_skip_corrupt_index(node->index);
+
+ err = row_undo_ins_remove_sec_rec(node);
+
+ if (err == DB_SUCCESS) {
+
+ log_free_check();
+
+ if (node->table->id == DICT_INDEXES_ID) {
+
+ if (!dict_locked) {
+ mutex_enter(&dict_sys->mutex);
+ }
+ }
+
+ // FIXME: We need to update the dict_index_t::space and
+ // page number fields too.
+ err = row_undo_ins_remove_clust_rec(node);
+
+ if (node->table->id == DICT_INDEXES_ID
+ && !dict_locked) {
+
+ mutex_exit(&dict_sys->mutex);
+ }
+ }
+
+ dict_table_close(node->table, dict_locked, FALSE);
+
+ node->table = NULL;
+
+ return(err);
+}