move to storage/xtradb

author: Sergei Golubchik <vuvova@gmail.com> 2015-05-04 19:15:28 +0200
committer: Sergei Golubchik <vuvova@gmail.com> 2015-05-04 19:15:28 +0200
commit: 14a142fca67b9e1fb3f0250fda093f5b967f0138 (patch)
tree: dd49e0666c863d80b5c50642e36a9c945ea12b8a /storage/xtradb/row/row0uins.cc
parent: dfb001edcd4b16bd4370b08b0176df78c4c5523f (diff)
download: mariadb-git-14a142fca67b9e1fb3f0250fda093f5b967f0138.tar.gz
1 files changed, 475 insertions, 0 deletions
diff --git a/storage/xtradb/row/row0uins.cc b/storage/xtradb/row/row0uins.cc
new file mode 100644
index 00000000000..849bf096492
--- /dev/null
+++ b/storage/xtradb/row/row0uins.cc
@@ -0,0 +1,475 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0uins.cc
+Fresh insert undo
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0uins.h"
+
+#ifdef UNIV_NONINL
+#include "row0uins.ic"
+#endif
+
+#include "dict0dict.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "trx0undo.h"
+#include "trx0roll.h"
+#include "btr0btr.h"
+#include "mach0data.h"
+#include "row0undo.h"
+#include "row0vers.h"
+#include "row0log.h"
+#include "trx0trx.h"
+#include "trx0rec.h"
+#include "row0row.h"
+#include "row0upd.h"
+#include "que0que.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+
+/*************************************************************************
+IMPORTANT NOTE: Any operation that generates redo MUST check that there
+is enough space in the redo log before for that operation. This is
+done by calling log_free_check(). The reason for checking the
+availability of the redo log space before the start of the operation is
+that we MUST not hold any synchonization objects when performing the
+check.
+If you make a change in this module make sure that no codepath is
+introduced where a call to log_free_check() is bypassed. */
+
+/***************************************************************//**
+Removes a clustered index record. The pcur in node was positioned on the
+record, now it is detached.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+static  __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_undo_ins_remove_clust_rec(
+/*==========================*/
+	undo_node_t*	node)	/*!< in: undo node */
+{
+	btr_cur_t*	btr_cur;
+	ibool		success;
+	dberr_t		err;
+	ulint		n_tries	= 0;
+	mtr_t		mtr;
+	dict_index_t*	index	= node->pcur.btr_cur.index;
+	bool		online;
+
+	ut_ad(dict_index_is_clust(index));
+
+	mtr_start(&mtr);
+
+	/* This is similar to row_undo_mod_clust(). The DDL thread may
+	already have copied this row from the log to the new table.
+	We must log the removal, so that the row will be correctly
+	purged. However, we can log the removal out of sync with the
+	B-tree modification. */
+
+	online = dict_index_is_online_ddl(index);
+	if (online) {
+		ut_ad(node->trx->dict_operation_lock_mode
+		      != RW_X_LATCH);
+		ut_ad(node->table->id != DICT_INDEXES_ID);
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+	}
+
+	success = btr_pcur_restore_position(
+		online
+		? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+		: BTR_MODIFY_LEAF, &node->pcur, &mtr);
+	ut_a(success);
+
+	btr_cur = btr_pcur_get_btr_cur(&node->pcur);
+
+	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
+	      == node->trx->id);
+
+	if (online && dict_index_is_online_ddl(index)) {
+		const rec_t*	rec	= btr_cur_get_rec(btr_cur);
+		mem_heap_t*	heap	= NULL;
+		const ulint*	offsets	= rec_get_offsets(
+			rec, index, NULL, ULINT_UNDEFINED, &heap);
+		row_log_table_delete(rec, index, offsets, NULL);
+		mem_heap_free(heap);
+	}
+
+	if (node->table->id == DICT_INDEXES_ID) {
+		ut_ad(!online);
+		ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
+
+		/* Drop the index tree associated with the row in
+		SYS_INDEXES table: */
+
+		dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);
+
+		mtr_commit(&mtr);
+
+		mtr_start(&mtr);
+
+		success = btr_pcur_restore_position(
+			BTR_MODIFY_LEAF, &node->pcur, &mtr);
+		ut_a(success);
+	}
+
+	if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
+		err = DB_SUCCESS;
+		goto func_exit;
+	}
+
+	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+retry:
+	/* If did not succeed, try pessimistic descent to tree */
+	mtr_start(&mtr);
+
+	success = btr_pcur_restore_position(BTR_MODIFY_TREE,
+					    &(node->pcur), &mtr);
+	ut_a(success);
+
+	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
+				   trx_is_recv(node->trx)
+				   ? RB_RECOVERY
+				   : RB_NORMAL, &mtr);
+
+	/* The delete operation may fail if we have little
+	file space left: TODO: easiest to crash the database
+	and restart with more file space */
+
+	if (err == DB_OUT_OF_FILE_SPACE
+	    && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
+
+		btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
+
+		n_tries++;
+
+		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+
+		goto retry;
+	}
+
+func_exit:
+	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+	trx_undo_rec_release(node->trx, node->undo_no);
+
+	return(err);
+}
+
+/***************************************************************//**
+Removes a secondary index entry if found.
+@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_undo_ins_remove_sec_low(
+/*========================*/
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+				depending on whether we wish optimistic or
+				pessimistic descent down the index tree */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry)	/*!< in: index entry to remove */
+{
+	btr_pcur_t		pcur;
+	btr_cur_t*		btr_cur;
+	dberr_t			err	= DB_SUCCESS;
+	mtr_t			mtr;
+	enum row_search_result	search_result;
+
+	log_free_check();
+
+	mtr_start(&mtr);
+
+	if (mode == BTR_MODIFY_LEAF) {
+		mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+	} else {
+		ut_ad(mode == BTR_MODIFY_TREE);
+		mtr_x_lock(dict_index_get_lock(index), &mtr);
+	}
+
+	if (row_log_online_op_try(index, entry, 0)) {
+		goto func_exit_no_pcur;
+	}
+
+	search_result = row_search_index_entry(index, entry, mode,
+					       &pcur, &mtr);
+
+	switch (search_result) {
+	case ROW_NOT_FOUND:
+		goto func_exit;
+	case ROW_FOUND:
+		break;
+	case ROW_BUFFERED:
+	case ROW_NOT_DELETED_REF:
+		/* These are invalid outcomes, because the mode passed
+		to row_search_index_entry() did not include any of the
+		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
+		ut_error;
+	}
+
+	btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+	if (mode != BTR_MODIFY_TREE) {
+		err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
+			? DB_SUCCESS : DB_FAIL;
+	} else {
+		/* No need to distinguish RB_RECOVERY here, because we
+		are deleting a secondary index record: the distinction
+		between RB_NORMAL and RB_RECOVERY only matters when
+		deleting a record that contains externally stored
+		columns. */
+		ut_ad(!dict_index_is_clust(index));
+		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
+					   RB_NORMAL, &mtr);
+	}
+func_exit:
+	btr_pcur_close(&pcur);
+func_exit_no_pcur:
+	mtr_commit(&mtr);
+
+	return(err);
+}
+
+/***************************************************************//**
+Removes a secondary index entry from the index if found. Tries first
+optimistic, then pessimistic descent down the tree.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_undo_ins_remove_sec(
+/*====================*/
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry)	/*!< in: index entry to insert */
+{
+	dberr_t	err;
+	ulint	n_tries	= 0;
+
+	/* Try first optimistic descent to the B-tree */
+
+	err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry);
+
+	if (err == DB_SUCCESS) {
+
+		return(err);
+	}
+
+	/* Try then pessimistic descent to the B-tree */
+retry:
+	err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry);
+
+	/* The delete operation may fail if we have little
+	file space left: TODO: easiest to crash the database
+	and restart with more file space */
+
+	if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
+
+		n_tries++;
+
+		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+
+		goto retry;
+	}
+
+	return(err);
+}
+
+/***********************************************************//**
+Parses the row reference and other info in a fresh insert undo record. */
+static
+void
+row_undo_ins_parse_undo_rec(
+/*========================*/
+	undo_node_t*	node,		/*!< in/out: row undo node */
+	ibool		dict_locked)	/*!< in: TRUE if own dict_sys->mutex */
+{
+	dict_index_t*	clust_index;
+	byte*		ptr;
+	undo_no_t	undo_no;
+	table_id_t	table_id;
+	ulint		type;
+	ulint		dummy;
+	bool		dummy_extern;
+
+	ut_ad(node);
+
+	ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
+				    &dummy_extern, &undo_no, &table_id);
+	ut_ad(type == TRX_UNDO_INSERT_REC);
+	node->rec_type = type;
+
+	node->update = NULL;
+	node->table = dict_table_open_on_id(
+		table_id, dict_locked, DICT_TABLE_OP_NORMAL);
+
+	/* Skip the UNDO if we can't find the table or the .ibd file. */
+	if (UNIV_UNLIKELY(node->table == NULL)) {
+	} else if (UNIV_UNLIKELY(node->table->ibd_file_missing)) {
+close_table:
+		dict_table_close(node->table, dict_locked, FALSE);
+		node->table = NULL;
+	} else {
+		clust_index = dict_table_get_first_index(node->table);
+
+		if (clust_index != NULL) {
+			trx_undo_rec_get_row_ref(
+				ptr, clust_index, &node->ref, node->heap);
+
+			if (!row_undo_search_clust_to_pcur(node)) {
+				goto close_table;
+			}
+
+		} else {
+			ut_print_timestamp(stderr);
+			fprintf(stderr, "  InnoDB: table ");
+			ut_print_name(stderr, node->trx, TRUE,
+				      node->table->name);
+			fprintf(stderr, " has no indexes, "
+				"ignoring the table\n");
+			goto close_table;
+		}
+	}
+}
+
+/***************************************************************//**
+Removes secondary index records.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_undo_ins_remove_sec_rec(
+/*========================*/
+	undo_node_t*	node)	/*!< in/out: row undo node */
+{
+	dberr_t		err	= DB_SUCCESS;
+	dict_index_t*	index	= node->index;
+	mem_heap_t*	heap;
+
+	heap = mem_heap_create(1024);
+
+	while (index != NULL) {
+		dtuple_t*	entry;
+
+		if (index->type & DICT_FTS) {
+			dict_table_next_uncorrupted_index(index);
+			continue;
+		}
+
+		/* An insert undo record TRX_UNDO_INSERT_REC will
+		always contain all fields of the index. It does not
+		matter if any indexes were created afterwards; all
+		index entries can be reconstructed from the row. */
+		entry = row_build_index_entry(
+			node->row, node->ext, index, heap);
+		if (UNIV_UNLIKELY(!entry)) {
+			/* The database must have crashed after
+			inserting a clustered index record but before
+			writing all the externally stored columns of
+			that record, or a statement is being rolled
+			back because an error occurred while storing
+			off-page columns.
+
+			Because secondary index entries are inserted
+			after the clustered index record, we may
+			assume that the secondary index record does
+			not exist. */
+		} else {
+			err = row_undo_ins_remove_sec(index, entry);
+
+			if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+				goto func_exit;
+			}
+		}
+
+		mem_heap_empty(heap);
+		dict_table_next_uncorrupted_index(index);
+	}
+
+func_exit:
+	node->index = index;
+	mem_heap_free(heap);
+	return(err);
+}
+
+/***********************************************************//**
+Undoes a fresh insert of a row to a table. A fresh insert means that
+the same clustered index unique key did not have any record, even delete
+marked, at the time of the insert.  InnoDB is eager in a rollback:
+if it figures out that an index record will be removed in the purge
+anyway, it will remove it in the rollback.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
+dberr_t
+row_undo_ins(
+/*=========*/
+	undo_node_t*	node)	/*!< in: row undo node */
+{
+	dberr_t	err;
+	ibool	dict_locked;
+
+	ut_ad(node->state == UNDO_NODE_INSERT);
+
+	dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
+
+	row_undo_ins_parse_undo_rec(node, dict_locked);
+
+	if (node->table == NULL) {
+		trx_undo_rec_release(node->trx, node->undo_no);
+
+		return(DB_SUCCESS);
+	}
+
+	/* Iterate over all the indexes and undo the insert.*/
+
+	node->index = dict_table_get_first_index(node->table);
+	ut_ad(dict_index_is_clust(node->index));
+	/* Skip the clustered index (the first index) */
+	node->index = dict_table_get_next_index(node->index);
+
+	dict_table_skip_corrupt_index(node->index);
+
+	err = row_undo_ins_remove_sec_rec(node);
+
+	if (err == DB_SUCCESS) {
+
+		log_free_check();
+
+		if (node->table->id == DICT_INDEXES_ID) {
+
+			if (!dict_locked) {
+				mutex_enter(&dict_sys->mutex);
+			}
+		}
+
+		// FIXME: We need to update the dict_index_t::space and
+		// page number fields too.
+		err = row_undo_ins_remove_clust_rec(node);
+
+		if (node->table->id == DICT_INDEXES_ID
+		    && !dict_locked) {
+
+			mutex_exit(&dict_sys->mutex);
+		}
+	}
+
+	dict_table_close(node->table, dict_locked, FALSE);
+
+	node->table = NULL;
+
+	return(err);
+}
author	Sergei Golubchik <vuvova@gmail.com>	2015-05-04 19:15:28 +0200
committer	Sergei Golubchik <vuvova@gmail.com>	2015-05-04 19:15:28 +0200
commit	14a142fca67b9e1fb3f0250fda093f5b967f0138 (patch)
tree	dd49e0666c863d80b5c50642e36a9c945ea12b8a /storage/xtradb/row/row0uins.cc
parent	dfb001edcd4b16bd4370b08b0176df78c4c5523f (diff)
download	mariadb-git-14a142fca67b9e1fb3f0250fda093f5b967f0138.tar.gz