summaryrefslogtreecommitdiff
path: root/storage/innobase
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase')
-rw-r--r--storage/innobase/btr/btr0btr.c40
-rw-r--r--storage/innobase/btr/btr0cur.c123
-rw-r--r--storage/innobase/dict/dict0load.c64
-rw-r--r--storage/innobase/handler/ha_innodb.cc111
-rw-r--r--storage/innobase/include/btr0btr.h12
-rw-r--r--storage/innobase/include/btr0cur.h39
-rw-r--r--storage/innobase/include/btr0types.h5
-rw-r--r--storage/innobase/include/buf0buf.h19
-rw-r--r--storage/innobase/include/buf0buf.ic2
-rw-r--r--storage/innobase/include/page0page.h20
-rw-r--r--storage/innobase/include/page0page.ic16
-rw-r--r--storage/innobase/include/rem0rec.h13
-rw-r--r--storage/innobase/include/rem0rec.ic37
-rw-r--r--storage/innobase/include/sync0arr.h11
-rw-r--r--storage/innobase/include/sync0sync.h3
-rw-r--r--storage/innobase/include/trx0roll.h3
-rw-r--r--storage/innobase/include/trx0sys.h10
-rw-r--r--storage/innobase/include/trx0sys.ic21
-rw-r--r--storage/innobase/include/trx0trx.h5
-rw-r--r--storage/innobase/include/univ.i2
-rw-r--r--storage/innobase/log/log0log.c9
-rw-r--r--storage/innobase/page/page0page.c31
-rw-r--r--storage/innobase/row/row0ins.c44
-rw-r--r--storage/innobase/row/row0row.c25
-rw-r--r--storage/innobase/row/row0umod.c2
-rw-r--r--storage/innobase/row/row0upd.c34
-rw-r--r--storage/innobase/row/row0vers.c16
-rw-r--r--storage/innobase/srv/srv0srv.c19
-rw-r--r--storage/innobase/sync/sync0arr.c36
-rw-r--r--storage/innobase/sync/sync0rw.c4
-rw-r--r--storage/innobase/sync/sync0sync.c10
-rw-r--r--storage/innobase/trx/trx0rec.c4
-rw-r--r--storage/innobase/trx/trx0trx.c11
33 files changed, 604 insertions, 197 deletions
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
index 9438277050d..790582815a3 100644
--- a/storage/innobase/btr/btr0btr.c
+++ b/storage/innobase/btr/btr0btr.c
@@ -1937,7 +1937,7 @@ btr_node_ptr_delete(
ut_a(err == DB_SUCCESS);
if (!compressed) {
- btr_cur_compress_if_useful(&cursor, mtr);
+ btr_cur_compress_if_useful(&cursor, FALSE, mtr);
}
}
@@ -1945,9 +1945,10 @@ btr_node_ptr_delete(
If page is the only on its level, this function moves its records to the
father page, thus reducing the tree height. */
static
-void
+page_t*
btr_lift_page_up(
/*=============*/
+ /* out: father page */
dict_index_t* index, /* in: index tree */
page_t* page, /* in: page which is the only on its level;
must not be empty: use
@@ -2023,6 +2024,8 @@ btr_lift_page_up(
ibuf_reset_free_bits(index, father_page);
ut_ad(page_validate(father_page, index));
ut_ad(btr_check_node_ptr(index, father_page, mtr));
+
+ return(father_page);
}
/*****************************************************************
@@ -2039,11 +2042,13 @@ enough free extents so that the compression will always succeed if done! */
void
btr_compress(
/*=========*/
- btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
- the page must not be empty: in record delete
- use btr_discard_page if the page would become
- empty */
- mtr_t* mtr) /* in: mtr */
+ btr_cur_t* cursor, /* in/out: cursor on the page to merge
+ or lift; the page must not be empty:
+ when deleting records, use btr_discard_page()
+ if the page would become empty */
+ ibool adjust, /* in: TRUE if should adjust the
+ cursor position even if compression occurs */
+ mtr_t* mtr) /* in/out: mini-transaction */
{
dict_index_t* index;
ulint space;
@@ -2058,6 +2063,7 @@ btr_compress(
rec_t* node_ptr;
ulint data_size;
ulint n_recs;
+ ulint nth_rec = 0; /* remove bogus warning */
ulint max_ins_size;
ulint max_ins_size_reorg;
ulint comp;
@@ -2065,6 +2071,7 @@ btr_compress(
page = btr_cur_get_page(cursor);
index = btr_cur_get_index(cursor);
comp = page_is_comp(page);
+
ut_a((ibool)!!comp == dict_table_is_comp(index->table));
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
@@ -2086,6 +2093,10 @@ btr_compress(
father_page = buf_frame_align(node_ptr);
ut_a(comp == page_is_comp(father_page));
+ if (adjust) {
+ nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
+ }
+
/* Decide the page to which we try to merge and which will inherit
the locks */
@@ -2110,9 +2121,8 @@ btr_compress(
} else {
/* The page is the only one on the level, lift the records
to the father */
- btr_lift_page_up(index, page, mtr);
-
- return;
+ merge_page = btr_lift_page_up(index, page, mtr);
+ goto func_exit;
}
n_recs = page_get_n_recs(page);
@@ -2188,6 +2198,10 @@ btr_compress(
index, mtr);
lock_update_merge_left(merge_page, orig_pred, page);
+
+ if (adjust) {
+ nth_rec += page_rec_get_n_recs_before(orig_pred);
+ }
} else {
orig_succ = page_rec_get_next(
page_get_infimum_rec(merge_page));
@@ -2208,6 +2222,12 @@ btr_compress(
btr_page_free(index, page, mtr);
ut_ad(btr_check_node_ptr(index, merge_page, mtr));
+
+func_exit:
+ if (adjust) {
+ btr_cur_position(index, page_rec_get_nth(merge_page, nth_rec),
+ cursor);
+ }
}
/*****************************************************************
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index 6c0497cbd41..9ce09929f9a 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -31,6 +31,7 @@ Created 10/16/1994 Heikki Tuuri
#include "btr0sea.h"
#include "row0upd.h"
#include "trx0rec.h"
+#include "trx0roll.h" /* trx_roll_crash_recv_trx */
#include "que0que.h"
#include "row0row.h"
#include "srv0srv.h"
@@ -73,6 +74,13 @@ this many index pages */
+ not_empty) \
/ (BTR_KEY_VAL_ESTIMATE_N_PAGES + ext_size))
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/* A BLOB field reference full of zero, for use in assertions and tests.
+Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
/***********************************************************************
Marks all extern fields in a record as owned by the record. This function
should be called if the delete mark of a record is removed: a not delete
@@ -1572,7 +1580,6 @@ btr_cur_optimistic_update(
ulint old_rec_size;
dtuple_t* new_entry;
dulint roll_ptr;
- trx_t* trx;
mem_heap_t* heap;
ibool reorganized = FALSE;
ulint i;
@@ -1585,6 +1592,10 @@ btr_cur_optimistic_update(
heap = mem_heap_create(1024);
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ ut_a(!rec_offs_any_null_extern(rec, offsets)
+ || thr_get_trx(thr) == trx_roll_crash_recv_trx);
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
@@ -1691,13 +1702,11 @@ btr_cur_optimistic_update(
page_cur_move_to_prev(page_cursor);
- trx = thr_get_trx(thr);
-
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
roll_ptr);
row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx->id);
+ thr_get_trx(thr)->id);
}
rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
@@ -1781,7 +1790,9 @@ btr_cur_pessimistic_update(
/* out: DB_SUCCESS or error code */
ulint flags, /* in: undo logging, locking, and rollback
flags */
- btr_cur_t* cursor, /* in: cursor on the record to update */
+ btr_cur_t* cursor, /* in/out: cursor on the record to update;
+ cursor may become invalid if *big_rec == NULL
+ || !(flags & BTR_KEEP_POS_FLAG) */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
upd_t* update, /* in: update vector; this is allowed also
@@ -1916,6 +1927,10 @@ btr_cur_pessimistic_update(
err = DB_TOO_BIG_RECORD;
goto return_after_reservations;
}
+
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(btr_page_get_level(page, mtr) == 0);
+ ut_ad(flags & BTR_KEEP_POS_FLAG);
}
page_cursor = btr_cur_get_page_cur(cursor);
@@ -1942,6 +1957,8 @@ btr_cur_pessimistic_update(
ut_a(rec || optim_err != DB_UNDERFLOW);
if (rec) {
+ page_cursor->rec = rec;
+
lock_rec_restore_from_page_infimum(rec, page);
rec_set_field_extern_bits(rec, index,
ext_vect, n_ext_vect, mtr);
@@ -1955,12 +1972,30 @@ btr_cur_pessimistic_update(
btr_cur_unmark_extern_fields(rec, mtr, offsets);
}
- btr_cur_compress_if_useful(cursor, mtr);
+ btr_cur_compress_if_useful(
+ cursor,
+ big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG),
+ mtr);
err = DB_SUCCESS;
goto return_after_reservations;
}
+ if (big_rec_vec) {
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(btr_page_get_level(page, mtr) == 0);
+ ut_ad(flags & BTR_KEEP_POS_FLAG);
+
+ /* btr_page_split_and_insert() in
+ btr_cur_pessimistic_insert() invokes
+ mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK).
+ We must keep the index->lock when we created a
+ big_rec, so that row_upd_clust_rec() can store the
+ big_rec in the same mini-transaction. */
+
+ mtr_x_lock(dict_index_get_lock(index), mtr);
+ }
+
if (page_cur_is_before_first(page_cursor)) {
/* The record to be updated was positioned as the first user
record on its page */
@@ -1981,6 +2016,7 @@ btr_cur_pessimistic_update(
ut_a(rec);
ut_a(err == DB_SUCCESS);
ut_a(dummy_big_rec == NULL);
+ page_cursor->rec = rec;
rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr);
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
@@ -2015,6 +2051,43 @@ return_after_reservations:
return(err);
}
+/*****************************************************************
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+ btr_cur_t* cursor, /* in: cursor */
+ mtr_t* mtr) /* in/out: mini-transaction */
+{
+ buf_block_t* block;
+
+ block = buf_block_align(btr_cur_get_rec(cursor));
+
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ /* Keep the locks across the mtr_commit(mtr). */
+ rw_lock_x_lock(dict_index_get_lock(cursor->index));
+ rw_lock_x_lock(&block->lock);
+ mutex_enter(&block->mutex);
+#ifdef UNIV_SYNC_DEBUG
+ buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
+#else
+ buf_block_buf_fix_inc(block);
+#endif
+ mutex_exit(&block->mutex);
+ /* Write out the redo log. */
+ mtr_commit(mtr);
+ mtr_start(mtr);
+ /* Reassociate the locks with the mini-transaction.
+ They will be released on mtr_commit(mtr). */
+ mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK);
+ mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
+}
+
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
/********************************************************************
@@ -2383,30 +2456,6 @@ btr_cur_del_unmark_for_ibuf(
/*==================== B-TREE RECORD REMOVE =========================*/
/*****************************************************************
-Tries to compress a page of the tree on the leaf level. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-void
-btr_cur_compress(
-/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0);
-
- btr_compress(cursor, mtr);
-}
-
-/*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
@@ -2417,10 +2466,12 @@ ibool
btr_cur_compress_if_useful(
/*=======================*/
/* out: TRUE if compression occurred */
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid if compression
- occurs */
- mtr_t* mtr) /* in: mtr */
+ btr_cur_t* cursor, /* in/out: cursor on the page to compress;
+ cursor does not stay valid if !adjust and
+ compression occurs */
+ ibool adjust, /* in: TRUE if should adjust the
+ cursor position even if compression occurs */
+ mtr_t* mtr) /* in/out: mini-transaction */
{
ut_ad(mtr_memo_contains(mtr,
dict_index_get_lock(btr_cur_get_index(cursor)),
@@ -2430,7 +2481,7 @@ btr_cur_compress_if_useful(
if (btr_cur_compress_recommendation(cursor, mtr)) {
- btr_compress(cursor, mtr);
+ btr_compress(cursor, adjust, mtr);
return(TRUE);
}
@@ -2643,7 +2694,7 @@ return_after_reservations:
mem_heap_free(heap);
if (ret == FALSE) {
- ret = btr_cur_compress_if_useful(cursor, mtr);
+ ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
}
if (n_extents > 0) {
diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
index c505bfbd6c4..7e820cfb08d 100644
--- a/storage/innobase/dict/dict0load.c
+++ b/storage/innobase/dict/dict0load.c
@@ -454,9 +454,11 @@ dict_load_report_deleted_index(
/************************************************************************
Loads definitions for index fields. */
static
-void
+ulint
dict_load_fields(
/*=============*/
+ /* out: DB_SUCCESS if ok, DB_CORRUPTION
+ if failed */
dict_table_t* table, /* in: table */
dict_index_t* index, /* in: index whose fields to load */
mem_heap_t* heap) /* in: memory heap for temporary storage */
@@ -474,6 +476,7 @@ dict_load_fields(
byte* buf;
ulint i;
mtr_t mtr;
+ ulint error = DB_SUCCESS;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -535,6 +538,26 @@ dict_load_fields(
field = rec_get_nth_field_old(rec, 4, &len);
+ if (prefix_len >= DICT_MAX_INDEX_COL_LEN) {
+ fprintf(stderr, "InnoDB: Error: load index"
+ " '%s' failed.\n"
+ "InnoDB: index field '%s' has a prefix"
+ " length of %lu bytes,\n"
+ "InnoDB: which exceeds the"
+ " maximum limit of %lu bytes.\n"
+ "InnoDB: Please use server that"
+ " supports long index prefix\n"
+ "InnoDB: or turn on"
+ " innodb_force_recovery to load"
+ " the table\n",
+ index->name, mem_heap_strdupl(
+ heap, (char*) field, len),
+ (ulong) prefix_len,
+ (ulong) (DICT_MAX_INDEX_COL_LEN - 1));
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+
dict_mem_index_add_field(index,
mem_heap_strdupl(heap,
(char*) field, len),
@@ -543,8 +566,10 @@ dict_load_fields(
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
}
+func_exit:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
+ return(error);
}
/************************************************************************
@@ -701,10 +726,28 @@ dict_load_indexes(
space, type, n_fields);
index->id = id;
- dict_load_fields(table, index, heap);
+ error = dict_load_fields(table, index, heap);
+
+ if (error != DB_SUCCESS) {
+ fprintf(stderr, "InnoDB: Error: load index '%s'"
+ " for table '%s' failed\n",
+ index->name, table->name);
+
+ /* If the force recovery flag is set, and
+ if the failed index is not the primary index, we
+ will continue and open other indexes */
+ if (srv_force_recovery
+ && !(index->type & DICT_CLUSTERED)) {
+ error = DB_SUCCESS;
+ goto next_rec;
+ } else {
+ goto func_exit;
+ }
+ }
+
dict_index_add_to_cache(table, index, page_no);
}
-
+next_rec:
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
}
@@ -881,9 +924,18 @@ err_exit:
} else {
table->fk_max_recusive_level = 0;
}
- } else if (!srv_force_recovery) {
- dict_table_remove_from_cache(table);
- table = NULL;
+ } else {
+ dict_index_t* index;
+
+ /* Make sure that at least the clustered index was loaded.
+ Otherwise refuse to load the table */
+ index = dict_table_get_first_index(table);
+
+ if (!srv_force_recovery || !index
+ || !(index->type & DICT_CLUSTERED)) {
+ dict_table_remove_from_cache(table);
+ table = NULL;
+ }
}
#if 0
if (err != DB_SUCCESS && table != NULL) {
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 6f58fd70fbd..dfe13ccbbfe 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -189,7 +189,7 @@ innobase_index_name_is_reserved(
/*============================*/
/* out: true if index name matches a
reserved name */
- const trx_t* trx, /* in: InnoDB transaction handle */
+ THD* thd, /* in/out: MySQL connection */
const TABLE* form, /* in: information on table
columns and indexes */
const char* norm_name); /* in: table name */
@@ -5285,10 +5285,6 @@ create_table_def(
DBUG_PRINT("enter", ("table_name: %s", table_name));
ut_a(trx->mysql_thd != NULL);
- if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name,
- (THD*) trx->mysql_thd)) {
- DBUG_RETURN(HA_ERR_GENERIC);
- }
n_cols = form->s->fields;
@@ -5397,6 +5393,8 @@ err_col:
col_len);
}
+ srv_lower_case_table_names = lower_case_table_names;
+
error = row_create_table_for_mysql(table, trx);
innodb_check_for_record_too_big_error(flags & DICT_TF_COMPACT, error);
@@ -5642,6 +5640,35 @@ ha_innobase::create(
DBUG_RETURN(HA_ERR_TO_BIG_ROW);
}
+ strcpy(name2, name);
+
+ normalize_table_name(norm_name, name2);
+
+ /* Create the table definition in InnoDB */
+
+ flags = form->s->row_type != ROW_TYPE_REDUNDANT ? DICT_TF_COMPACT : 0;
+
+ /* Look for a primary key */
+
+ primary_key_no= (form->s->primary_key != MAX_KEY ?
+ (int) form->s->primary_key :
+ -1);
+
+ /* Our function row_get_mysql_key_number_for_index assumes
+ the primary key is always number 0, if it exists */
+
+ DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
+
+ /* Check for name conflicts (with reserved name) for
+ any user indices to be created. */
+ if (innobase_index_name_is_reserved(thd, form, norm_name)) {
+ DBUG_RETURN(-1);
+ }
+
+ if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
+ DBUG_RETURN(HA_ERR_GENERIC);
+ }
+
/* Get the transaction associated with the current thd, or create one
if not yet created */
@@ -5665,48 +5692,12 @@ ha_innobase::create(
trx->check_unique_secondary = FALSE;
}
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
- strcpy(name2, name);
-
- normalize_table_name(norm_name, name2);
-
/* Latch the InnoDB data dictionary exclusively so that no deadlocks
or lock waits can happen in it during a table create operation.
Drop table etc. do this latching in row0mysql.c. */
row_mysql_lock_data_dictionary(trx);
- /* Create the table definition in InnoDB */
-
- flags = 0;
-
- if (form->s->row_type != ROW_TYPE_REDUNDANT) {
- flags |= DICT_TF_COMPACT;
- }
-
- /* Look for a primary key */
-
- primary_key_no= (form->s->primary_key != MAX_KEY ?
- (int) form->s->primary_key :
- -1);
-
- /* Our function row_get_mysql_key_number_for_index assumes
- the primary key is always number 0, if it exists */
-
- DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
-
- /* Check for name conflicts (with reserved name) for
- any user indices to be created. */
- if (innobase_index_name_is_reserved(trx, form, norm_name)) {
- error = -1;
- goto cleanup;
- }
-
error = create_table_def(trx, form, norm_name,
create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
flags);
@@ -5936,12 +5927,6 @@ ha_innobase::delete_table(
trx_search_latch_release_if_reserved(parent_trx);
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
trx = trx_allocate_for_mysql();
trx->mysql_thd = thd;
@@ -5961,6 +5946,8 @@ ha_innobase::delete_table(
/* Drop the table in InnoDB */
+ srv_lower_case_table_names = lower_case_table_names;
+
error = row_drop_table_for_mysql(norm_name, trx,
thd_sql_command(thd)
== SQLCOM_DROP_DB);
@@ -6089,12 +6076,6 @@ ha_innobase::rename_table(
trx_search_latch_release_if_reserved(parent_trx);
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
trx = trx_allocate_for_mysql();
trx->mysql_thd = thd;
INNOBASE_COPY_STMT(thd, trx);
@@ -6114,6 +6095,8 @@ ha_innobase::rename_table(
/* Rename the table in InnoDB */
+ srv_lower_case_table_names = lower_case_table_names;
+
error = row_rename_table_for_mysql(norm_from, norm_to, trx);
/* Flush the log to reduce probability that the .frm files and
@@ -7342,10 +7325,18 @@ ha_innobase::external_lock(
reset_template(prebuilt);
- if (lock_type == F_WRLCK) {
+ if (lock_type == F_WRLCK
+ || (table->s->tmp_table
+ && thd_sql_command(thd) == SQLCOM_LOCK_TABLES)) {
/* If this is a SELECT, then it is in UPDATE TABLE ...
- or SELECT ... FOR UPDATE */
+ or SELECT ... FOR UPDATE
+
+ For temporary tables which are locked for READ by LOCK TABLES
+ updates are still allowed by SQL-layer. In order to accomodate
+ for such a situation we always request X-lock for such table
+ at LOCK TABLES time.
+ */
prebuilt->select_lock_type = LOCK_X;
prebuilt->stored_select_lock_type = LOCK_X;
}
@@ -8565,7 +8556,7 @@ innobase_commit_by_xid(
if (trx) {
innobase_commit_low(trx);
-
+ trx_free_for_background(trx);
return(XA_OK);
} else {
return(XAER_NOTA);
@@ -8588,7 +8579,9 @@ innobase_rollback_by_xid(
trx = trx_get_trx_by_xid(xid);
if (trx) {
- return(innobase_rollback_trx(trx));
+ int ret = innobase_rollback_trx(trx);
+ trx_free_for_background(trx);
+ return(ret);
} else {
return(XAER_NOTA);
}
@@ -8824,7 +8817,7 @@ innobase_index_name_is_reserved(
/*============================*/
/* out: true if an index name
matches the reserved name */
- const trx_t* trx, /* in: InnoDB transaction handle */
+ THD* thd, /* in/out: MySQL connection */
const TABLE* form, /* in: information on table
columns and indexes */
const char* norm_name) /* in: table name */
@@ -8838,7 +8831,7 @@ innobase_index_name_is_reserved(
if (innobase_strcasecmp(key->name,
innobase_index_reserve_name) == 0) {
/* Push warning to mysql */
- push_warning_printf((THD*) trx->mysql_thd,
+ push_warning_printf(thd,
MYSQL_ERROR::WARN_LEVEL_WARN,
ER_CANT_CREATE_TABLE,
"Cannot Create Index with name "
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 1573de7e818..269fa355558 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -312,11 +312,13 @@ enough free extents so that the compression will always succeed if done! */
void
btr_compress(
/*=========*/
- btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
- the page must not be empty: in record delete
- use btr_discard_page if the page would become
- empty */
- mtr_t* mtr); /* in: mtr */
+ btr_cur_t* cursor, /* in/out: cursor on the page to merge
+ or lift; the page must not be empty:
+ when deleting records, use btr_discard_page()
+ if the page would become empty */
+ ibool adjust, /* in: TRUE if should adjust the
+ cursor position even if compression occurs */
+ mtr_t* mtr); /* in/out: mini-transaction */
/*****************************************************************
Discards a page from a B-tree. This is used to remove the last record from
a B-tree page: the whole page must be removed at the same time. This cannot
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 20235c55f22..c068d8d3318 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -23,6 +23,9 @@ Created 10/16/1994 Heikki Tuuri
#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
update vector or inserted entry */
+#define BTR_KEEP_POS_FLAG 8 /* btr_cur_pessimistic_update()
+ must keep cursor position when
+ moving columns to big_rec */
#define BTR_CUR_ADAPT
#define BTR_CUR_HASH_ADAPT
@@ -237,7 +240,9 @@ btr_cur_pessimistic_update(
/* out: DB_SUCCESS or error code */
ulint flags, /* in: undo logging, locking, and rollback
flags */
- btr_cur_t* cursor, /* in: cursor on the record to update */
+ btr_cur_t* cursor, /* in/out: cursor on the record to update;
+ cursor may become invalid if *big_rec == NULL
+ || !(flags & BTR_KEEP_POS_FLAG) */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
upd_t* update, /* in: update vector; this is allowed also
@@ -247,6 +252,15 @@ btr_cur_pessimistic_update(
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+ btr_cur_t* cursor, /* in: cursor */
+ mtr_t* mtr); /* in/out: mini-transaction */
/***************************************************************
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
@@ -286,19 +300,6 @@ btr_cur_del_unmark_for_ibuf(
rec_t* rec, /* in: record to delete unmark */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
-Tries to compress a page of the tree on the leaf level. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-void
-btr_cur_compress(
-/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
@@ -309,10 +310,12 @@ ibool
btr_cur_compress_if_useful(
/*=======================*/
/* out: TRUE if compression occurred */
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid if compression
- occurs */
- mtr_t* mtr); /* in: mtr */
+ btr_cur_t* cursor, /* in/out: cursor on the page to compress;
+ cursor does not stay valid if !adjust and
+ compression occurs */
+ ibool adjust, /* in: TRUE if should adjust the
+ cursor position even if compression occurs */
+ mtr_t* mtr); /* in/out: mini-transaction */
/***********************************************************
Removes the record on which the tree cursor is positioned. It is assumed
that the mtr has an x-latch on the page where the cursor is positioned,
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 8fa0bf0602d..eaa1f36e781 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -18,4 +18,9 @@ typedef struct btr_pcur_struct btr_pcur_t;
typedef struct btr_cur_struct btr_cur_t;
typedef struct btr_search_struct btr_search_t;
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+#define BTR_EXTERN_FIELD_REF_SIZE 20
+extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
#endif
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 3e8972d9182..7479ce9cbf0 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -682,6 +682,25 @@ buf_page_address_fold(
/* out: the folded value */
ulint space, /* in: space id */
ulint offset);/* in: offset of the page within space */
+#ifdef UNIV_SYNC_DEBUG
+/***********************************************************************
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_debug(
+/*========================*/
+ buf_block_t* block, /* in: block to bufferfix */
+ const char* file __attribute__ ((unused)), /* in: file name */
+ ulint line __attribute__ ((unused))); /* in: line */
+#else /* UNIV_SYNC_DEBUG */
+/***********************************************************************
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc(
+/*==================*/
+ buf_block_t* block); /* in: block to bufferfix */
+#endif /* UNIV_SYNC_DEBUG */
/**********************************************************************
Returns the control block of a file page, NULL if not found. */
UNIV_INLINE
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 58c5fd9ef3d..f4d3619f73f 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -660,6 +660,6 @@ buf_page_dbg_add_level(
ulint level __attribute__((unused))) /* in: latching order
level */
{
- sync_thread_add_level(&(buf_block_align(frame)->lock), level);
+ sync_thread_add_level(&(buf_block_align(frame)->lock), level, FALSE);
}
#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index 273007c2778..24698557e77 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -234,11 +234,22 @@ page_get_supremum_rec(
/*==================*/
/* out: the last record in record list */
page_t* page); /* in: page which must have record(s) */
-/****************************************************************
-Returns the middle record of record list. If there are an even number
-of records in the list, returns the first record of upper half-list. */
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before(). */
rec_t*
+page_rec_get_nth(
+/*=============*/
+ /* out: nth record */
+ page_t* page, /* in: page */
+ ulint nth); /* in: nth record */
+/*****************************************************************
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list. */
+UNIV_INLINE
+rec_t*
page_get_middle_rec(
/*================*/
/* out: middle record */
@@ -280,7 +291,8 @@ page_get_n_recs(
page_t* page); /* in: index page */
/*******************************************************************
Returns the number of records before the given record in chain.
-The number includes infimum and supremum records. */
+The number includes infimum and supremum records.
+This is the inverse function of page_rec_get_nth(). */
ulint
page_rec_get_n_recs_before(
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index d9e67f3eeeb..a019aa28515 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -341,6 +341,22 @@ page_rec_is_infimum(
}
/*****************************************************************
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list. */
+UNIV_INLINE
+rec_t*
+page_get_middle_rec(
+/*================*/
+ /* out: middle record */
+ page_t* page) /* in: page */
+{
+ ulint middle = (page_get_n_recs(page) + 2) / 2;
+
+ return(page_rec_get_nth(page, middle));
+}
+
+/*****************************************************************
Compares a data tuple to a physical record. Differs from the function
cmp_dtuple_rec_with_match in the way that the record must reside on an
index page, and also page infimum and supremum records can be given in
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index 58762fc3111..67baeb7d8d2 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -339,6 +339,19 @@ rec_offs_any_extern(
/*================*/
/* out: TRUE if a field is stored externally */
const ulint* offsets);/* in: array returned by rec_get_offsets() */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/********************************************************
+Determine if the offsets are for a record containing null BLOB pointers. */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+ /* out: first field containing
+ a null BLOB pointer,
+ or NULL if none found */
+ rec_t* rec, /*!< in: record */
+ const ulint* offsets); /*!< in: rec_get_offsets(rec) */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
/***************************************************************
Sets the value of the ith field extern storage bit. */
UNIV_INLINE
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index df66bb13aeb..566c62e30f2 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -9,6 +9,7 @@ Created 5/30/1994 Heikki Tuuri
#include "mach0data.h"
#include "ut0byte.h"
#include "dict0dict.h"
+#include "btr0types.h"
/* Compact flag ORed to the extra size returned by rec_get_offsets() */
#define REC_OFFS_COMPACT ((ulint) 1 << 31)
@@ -1020,6 +1021,42 @@ rec_offs_any_extern(
return(FALSE);
}
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/********************************************************
+Determine if the offsets are for a record containing null BLOB pointers. */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+ /* out: first field containing
+ a null BLOB pointer,
+ or NULL if none found */
+ rec_t* rec, /*!< in: record */
+ const ulint* offsets) /*!< in: rec_get_offsets(rec) */
+{
+ ulint i;
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+ for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+ if (rec_offs_nth_extern(offsets, i)) {
+ ulint len;
+ const byte* field
+ = rec_get_nth_field(rec, offsets, i, &len);
+
+ ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+ if (!memcmp(field + len
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero,
+ BTR_EXTERN_FIELD_REF_SIZE)) {
+ return(field);
+ }
+ }
+ }
+
+ return(NULL);
+}
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
/***************************************************************
Sets the value of the ith field extern storage bit. */
UNIV_INLINE
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index fae26b7a63e..ec48059dbcb 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -93,10 +93,13 @@ sync_arr_wake_threads_if_sema_free(void);
Prints warnings of long semaphore waits to stderr. */
ibool
-sync_array_print_long_waits(void);
-/*=============================*/
- /* out: TRUE if fatal semaphore wait threshold
- was exceeded */
+sync_array_print_long_waits(
+/*========================*/
+ /* out: TRUE if fatal semaphore wait threshold
+ was exceeded */
+ os_thread_id_t* waiter, /* out: longest waiting thread */
+ const void** sema) /* out: longest-waited-for semaphore */
+ __attribute__((nonnull));
/************************************************************************
Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 9430d4cb723..595dca0da6d 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -198,8 +198,9 @@ void
sync_thread_add_level(
/*==================*/
void* latch, /* in: pointer to a mutex or an rw-lock */
- ulint level); /* in: level in the latching order; if
+ ulint level, /* in: level in the latching order; if
SYNC_LEVEL_VARYING, nothing is done */
+ ibool relock);/* in: TRUE if re-entering an x-lock */
/**********************************************************************
Removes a latch from the thread level array if it is found there. */
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index c1eca3d5753..4fabb83b025 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -15,6 +15,9 @@ Created 3/26/1996 Heikki Tuuri
#include "mtr0mtr.h"
#include "trx0sys.h"
+/* In crash recovery, the current trx to be rolled back */
+extern trx_t* trx_roll_crash_recv_trx;
+
#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL)
/***********************************************************************
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index bad3c9d570c..7ea981eb85c 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -256,6 +256,16 @@ trx_in_trx_list(
/*============*/
/* out: TRUE if is in */
trx_t* in_trx);/* in: trx */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/********************************************************
+Assert that a transaction is active. */
+UNIV_INLINE
+ibool
+trx_assert_active(
+/*==============*/
+ /* out: TRUE */
+ dulint trx_id); /* in: transaction identifier */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
/*********************************************************************
Updates the offset information about the end of the MySQL binlog entry
which corresponds to the transaction just being committed. In a MySQL
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
index 1142fb60398..f5033c5778a 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innobase/include/trx0sys.ic
@@ -257,6 +257,27 @@ trx_get_on_id(
return(NULL);
}
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/********************************************************
+Assert that a transaction is active. */
+UNIV_INLINE
+ibool
+trx_assert_active(
+/*==============*/
+ /* out: TRUE */
+ dulint trx_id) /* in: transaction identifier */
+{
+ trx_t* trx;
+
+ mutex_enter(&kernel_mutex);
+ trx = trx_get_on_id(trx_id);
+ ut_a(trx);
+ mutex_exit(&kernel_mutex);
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
/********************************************************************
Returns the minumum trx id in trx list. This is the smallest id for which
the trx can possibly be active. (But, you must look at the trx->conc_state to
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 4652f45892e..7cb16107746 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -19,7 +19,12 @@ Created 3/26/1996 Heikki Tuuri
#include "dict0types.h"
#include "trx0xa.h"
+/* Number of transactions currently allocated for MySQL: protected by
+the kernel mutex */
extern ulint trx_n_mysql_transactions;
+/* Number of transactions currently in the XA PREPARED state: protected by
+the kernel mutex */
+extern ulint trx_n_prepared;
/************************************************************************
Releases the search latch if trx has reserved it. */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index ce5d8a092bf..a67b1b3895e 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -88,6 +88,8 @@ memory is read outside the allocated blocks. */
#if 0
#define UNIV_DEBUG_VALGRIND /* Enable extra
Valgrind instrumentation */
+#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column
+ debugging without UNIV_DEBUG */
#define UNIV_DEBUG /* Enable ut_ad() assertions */
#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */
#define UNIV_MEM_DEBUG /* detect memory leaks etc */
diff --git a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
index 3300997112b..092e3bfe37f 100644
--- a/storage/innobase/log/log0log.c
+++ b/storage/innobase/log/log0log.c
@@ -3052,12 +3052,13 @@ loop:
goto loop;
}
- /* Check that there are no longer transactions. We need this wait even
- for the 'very fast' shutdown, because the InnoDB layer may have
- committed or prepared transactions and we don't want to lose them. */
+ /* Check that there are no longer transactions, except for
+ PREPARED ones. We need this wait even for the 'very fast'
+ shutdown, because the InnoDB layer may have committed or
+ prepared transactions and we don't want to lose them. */
if (trx_n_mysql_transactions > 0
- || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
+ || UT_LIST_GET_LEN(trx_sys->trx_list) > trx_n_prepared) {
mutex_exit(&kernel_mutex);
diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
index 543cf9e34eb..6a89df7de22 100644
--- a/storage/innobase/page/page0page.c
+++ b/storage/innobase/page/page0page.c
@@ -1194,49 +1194,42 @@ page_dir_balance_slot(
}
/****************************************************************
-Returns the middle record of the record list. If there are an even number
-of records in the list, returns the first record of the upper half-list. */
+Returns the nth record of the record list. */
rec_t*
-page_get_middle_rec(
-/*================*/
- /* out: middle record */
- page_t* page) /* in: page */
+page_rec_get_nth(
+/*=============*/
+ /* out: nth record */
+ page_t* page, /* in: page */
+ ulint nth) /* in: nth record */
{
page_dir_slot_t* slot;
- ulint middle;
ulint i;
ulint n_owned;
- ulint count;
rec_t* rec;
- /* This many records we must leave behind */
- middle = (page_get_n_recs(page) + 2) / 2;
-
- count = 0;
+ ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
for (i = 0;; i++) {
slot = page_dir_get_nth_slot(page, i);
n_owned = page_dir_slot_get_n_owned(slot);
- if (count + n_owned > middle) {
+ if (n_owned > nth) {
break;
} else {
- count += n_owned;
+ nth -= n_owned;
}
}
ut_ad(i > 0);
slot = page_dir_get_nth_slot(page, i - 1);
rec = page_dir_slot_get_rec(slot);
- rec = page_rec_get_next(rec);
-
- /* There are now count records behind rec */
- for (i = 0; i < middle - count; i++) {
+ do {
rec = page_rec_get_next(rec);
- }
+ ut_ad(rec);
+ } while (nth--);
return(rec);
}
diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
index 9786f90fd39..7ff443a11ad 100644
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
@@ -259,6 +259,7 @@ row_ins_sec_index_entry_by_modify(
err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
&dummy_big_rec, update,
0, thr, mtr);
+ ut_a(!dummy_big_rec);
}
func_exit:
mem_heap_free(heap);
@@ -329,8 +330,9 @@ row_ins_clust_index_entry_by_modify(
goto func_exit;
}
- err = btr_cur_pessimistic_update(0, cursor, big_rec, update,
- 0, thr, mtr);
+ err = btr_cur_pessimistic_update(
+ BTR_KEEP_POS_FLAG, cursor, big_rec, update,
+ 0, thr, mtr);
}
func_exit:
mem_heap_free(heap);
@@ -2083,6 +2085,41 @@ row_ins_index_entry_low(
err = row_ins_clust_index_entry_by_modify(
mode, &cursor, &big_rec, entry,
ext_vec, n_ext_vec, thr, &mtr);
+
+ if (big_rec) {
+ ut_a(err == DB_SUCCESS);
+ /* Write out the externally stored
+ columns while still x-latching
+ index->lock and block->lock. We have
+ to mtr_commit(mtr) first, so that the
+ redo log will be written in the
+ correct order. Otherwise, we would run
+ into trouble on crash recovery if mtr
+ freed B-tree pages on which some of
+ the big_rec fields will be written. */
+ btr_cur_mtr_commit_and_start(&cursor, &mtr);
+
+ rec = btr_cur_get_rec(&cursor);
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED,
+ &heap);
+
+ err = btr_store_big_rec_extern_fields(
+ index, rec, offsets, big_rec, &mtr);
+ /* If writing big_rec fails (for
+ example, because of DB_OUT_OF_FILE_SPACE),
+ the record will be corrupted. Even if
+ we did not update any externally
+ stored columns, our update could cause
+ the record to grow so that a
+ non-updated column was selected for
+ external storage. This non-update
+ would not have been written to the
+ undo log, and thus the record cannot
+ be rolled back. */
+ ut_a(err == DB_SUCCESS);
+ goto stored_big_rec;
+ }
} else {
err = row_ins_sec_index_entry_by_modify(
mode, &cursor, entry, thr, &mtr);
@@ -2119,7 +2156,6 @@ function_exit:
mtr_commit(&mtr);
if (big_rec) {
- rec_t* rec;
mtr_start(&mtr);
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
@@ -2130,7 +2166,7 @@ function_exit:
err = btr_store_big_rec_extern_fields(index, rec,
offsets, big_rec, &mtr);
-
+stored_big_rec:
if (modify) {
dtuple_big_rec_free(big_rec);
} else {
diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c
index 08e50817db9..171039e34ac 100644
--- a/storage/innobase/row/row0row.c
+++ b/storage/innobase/row/row0row.c
@@ -202,6 +202,7 @@ row_build(
ut_ad(index && rec && heap);
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(!mutex_own(&kernel_mutex));
if (!offsets) {
offsets = rec_get_offsets(rec, index, offsets_,
@@ -210,6 +211,26 @@ row_build(
ut_ad(rec_offs_validate(rec, index, offsets));
}
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ /* This condition can occur during crash recovery before
+ trx_rollback_or_clean_all_without_sess() has completed
+ execution.
+
+ This condition is possible if the server crashed
+ during an insert or update before
+ btr_store_big_rec_extern_fields() did mtr_commit() all
+ BLOB pointers to the clustered index record.
+
+ If the record contains a null BLOB pointer, look up the
+ transaction that holds the implicit lock on this record, and
+ assert that it is active. (In this version of InnoDB, we
+ cannot assert that it was recovered, because there is no
+ trx->is_recovered field.) */
+
+ ut_a(!rec_offs_any_null_extern(rec, offsets)
+ || trx_assert_active(row_get_rec_trx_id(rec, index, offsets)));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
if (type != ROW_COPY_POINTERS) {
/* Take a copy of rec to heap */
buf = mem_heap_alloc(heap, rec_offs_size(offsets));
@@ -302,6 +323,10 @@ row_rec_to_index_entry(
rec = rec_copy(buf, rec, offsets);
/* Avoid a debug assertion in rec_offs_validate(). */
rec_offs_make_valid(rec, index, offsets);
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ } else {
+ ut_a(!rec_offs_any_null_extern(rec, offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
}
rec_len = rec_offs_n_fields(offsets);
diff --git a/storage/innobase/row/row0umod.c b/storage/innobase/row/row0umod.c
index a3333fcc536..0b00aa2411a 100644
--- a/storage/innobase/row/row0umod.c
+++ b/storage/innobase/row/row0umod.c
@@ -119,6 +119,7 @@ row_undo_mod_clust_low(
| BTR_KEEP_SYS_FLAG,
btr_cur, &dummy_big_rec, node->update,
node->cmpl_info, thr, mtr);
+ ut_ad(!dummy_big_rec);
}
return(err);
@@ -471,6 +472,7 @@ row_undo_mod_del_unmark_sec_and_undo_update(
BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
btr_cur, &dummy_big_rec,
update, 0, thr, &mtr);
+ ut_ad(!dummy_big_rec);
}
mem_heap_free(heap);
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index 0790cfe02e2..694b00ea265 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -1580,32 +1580,48 @@ row_upd_clust_rec(
ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
dict_table_is_comp(index->table)));
- err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
- &big_rec, node->update,
- node->cmpl_info, thr, mtr);
- mtr_commit(mtr);
+ err = btr_cur_pessimistic_update(
+ BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
+ &big_rec, node->update, node->cmpl_info, thr, mtr);
- if (err == DB_SUCCESS && big_rec) {
+ if (big_rec) {
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_t* rec;
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
- mtr_start(mtr);
+ ut_a(err == DB_SUCCESS);
+ /* Write out the externally stored columns while still
+ x-latching index->lock and block->lock. We have to
+ mtr_commit(mtr) first, so that the redo log will be
+ written in the correct order. Otherwise, we would run
+ into trouble on crash recovery if mtr freed B-tree
+ pages on which some of the big_rec fields will be
+ written. */
+ btr_cur_mtr_commit_and_start(btr_cur, mtr);
- ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields(
index, rec,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
- big_rec, mtr);
+ big_rec, mtr);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
- mtr_commit(mtr);
+ /* If writing big_rec fails (for example, because of
+ DB_OUT_OF_FILE_SPACE), the record will be corrupted.
+ Even if we did not update any externally stored
+ columns, our update could cause the record to grow so
+ that a non-updated column was selected for external
+ storage. This non-update would not have been written
+ to the undo log, and thus the record cannot be rolled
+ back. */
+ ut_a(err == DB_SUCCESS);
}
+ mtr_commit(mtr);
+
if (big_rec) {
dtuple_big_rec_free(big_rec);
}
diff --git a/storage/innobase/row/row0vers.c b/storage/innobase/row/row0vers.c
index 23aca8c3f2e..906b46fb51b 100644
--- a/storage/innobase/row/row0vers.c
+++ b/storage/innobase/row/row0vers.c
@@ -473,6 +473,11 @@ row_vers_build_for_consistent_read(
/* The view already sees this version: we can
copy it to in_heap and return */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ ut_a(!rec_offs_any_null_extern(
+ version, *offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
buf = mem_heap_alloc(in_heap,
rec_offs_size(*offsets));
*old_vers = rec_copy(buf, version, *offsets);
@@ -506,6 +511,10 @@ row_vers_build_for_consistent_read(
*offsets = rec_get_offsets(prev_version, index, *offsets,
ULINT_UNDEFINED, offset_heap);
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
if (read_view_sees_trx_id(view, trx_id)) {
@@ -606,6 +615,10 @@ row_vers_build_for_semi_consistent_read(
/* We found a version that belongs to a
committed transaction: return it. */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ ut_a(!rec_offs_any_null_extern(version, *offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
if (rec == version) {
*old_vers = rec;
err = DB_SUCCESS;
@@ -663,6 +676,9 @@ row_vers_build_for_semi_consistent_read(
version = prev_version;
*offsets = rec_get_offsets(version, index, *offsets,
ULINT_UNDEFINED, offset_heap);
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ ut_a(!rec_offs_any_null_extern(version, *offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
}/* for (;;) */
if (heap) {
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
index 9c34e73109c..3f6f1982992 100644
--- a/storage/innobase/srv/srv0srv.c
+++ b/storage/innobase/srv/srv0srv.c
@@ -2180,9 +2180,15 @@ srv_error_monitor_thread(
os_thread_create */
{
/* number of successive fatal timeouts observed */
- ulint fatal_cnt = 0;
- dulint old_lsn;
- dulint new_lsn;
+ ulint fatal_cnt = 0;
+ dulint old_lsn;
+ dulint new_lsn;
+ /* longest waiting thread for a semaphore */
+ os_thread_id_t waiter = os_thread_get_curr_id();
+ os_thread_id_t old_waiter = waiter;
+ /* the semaphore that is being waited for */
+ const void* sema = NULL;
+ const void* old_sema = NULL;
old_lsn = srv_start_lsn;
@@ -2224,10 +2230,11 @@ loop:
/* In case mutex_exit is not a memory barrier, it is
theoretically possible some threads are left waiting though
the semaphore is already released. Wake up those threads: */
-
+
sync_arr_wake_threads_if_sema_free();
- if (sync_array_print_long_waits()) {
+ if (sync_array_print_long_waits(&waiter, &sema)
+ && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
fatal_cnt++;
if (fatal_cnt > 10) {
@@ -2242,6 +2249,8 @@ loop:
}
} else {
fatal_cnt = 0;
+ old_waiter = waiter;
+ old_sema = sema;
}
/* Flush stderr so that a database user gets the output
diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c
index 41d3492c8c9..93a7398f252 100644
--- a/storage/innobase/sync/sync0arr.c
+++ b/storage/innobase/sync/sync0arr.c
@@ -916,10 +916,12 @@ sync_arr_wake_threads_if_sema_free(void)
Prints warnings of long semaphore waits to stderr. */
ibool
-sync_array_print_long_waits(void)
-/*=============================*/
- /* out: TRUE if fatal semaphore wait threshold
- was exceeded */
+sync_array_print_long_waits(
+/*========================*/
+ /* out: TRUE if fatal semaphore wait threshold
+ was exceeded */
+ os_thread_id_t* waiter, /* out: longest waiting thread */
+ const void** sema) /* out: longest-waited-for semaphore */
{
sync_cell_t* cell;
ibool old_val;
@@ -927,24 +929,40 @@ sync_array_print_long_waits(void)
ulint i;
ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
ibool fatal = FALSE;
+ double longest_diff = 0;
for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
+ double diff;
+ void* wait_object;
+
cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
- if (cell->wait_object != NULL && cell->waiting
- && difftime(time(NULL), cell->reservation_time) > 240) {
+ wait_object = cell->wait_object;
+
+ if (wait_object == NULL || !cell->waiting) {
+
+ continue;
+ }
+
+ diff = difftime(time(NULL), cell->reservation_time);
+
+ if (diff > 240) {
fputs("InnoDB: Warning: a long semaphore wait:\n",
stderr);
sync_array_cell_print(stderr, cell);
noticed = TRUE;
}
- if (cell->wait_object != NULL && cell->waiting
- && difftime(time(NULL), cell->reservation_time)
- > fatal_timeout) {
+ if (diff > fatal_timeout) {
fatal = TRUE;
}
+
+ if (diff > longest_diff) {
+ longest_diff = diff;
+ *sema = wait_object;
+ *waiter = cell->thread;
+ }
}
if (noticed) {
diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c
index ef4c07e8c26..089e87a8a5c 100644
--- a/storage/innobase/sync/sync0rw.c
+++ b/storage/innobase/sync/sync0rw.c
@@ -663,7 +663,9 @@ rw_lock_add_debug_info(
rw_lock_debug_mutex_exit();
if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_add_level(lock, lock->level);
+ sync_thread_add_level(lock, lock->level,
+ lock_type == RW_LOCK_EX
+ && lock->writer_count > 1);
}
}
diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
index 944fd2a97fc..1099dff798e 100644
--- a/storage/innobase/sync/sync0sync.c
+++ b/storage/innobase/sync/sync0sync.c
@@ -641,7 +641,7 @@ mutex_set_debug_info(
ut_ad(mutex);
ut_ad(file_name);
- sync_thread_add_level(mutex, mutex->level);
+ sync_thread_add_level(mutex, mutex->level, FALSE);
mutex->file_name = file_name;
mutex->line = line;
@@ -1011,8 +1011,9 @@ void
sync_thread_add_level(
/*==================*/
void* latch, /* in: pointer to a mutex or an rw-lock */
- ulint level) /* in: level in the latching order; if
+ ulint level, /* in: level in the latching order; if
SYNC_LEVEL_VARYING, nothing is done */
+ ibool relock) /* in: TRUE if re-entering an x-lock */
{
sync_level_t* array;
sync_level_t* slot;
@@ -1060,6 +1061,10 @@ sync_thread_add_level(
array = thread_slot->levels;
+ if (relock) {
+ goto levels_ok;
+ }
+
/* NOTE that there is a problem with _NODE and _LEAF levels: if the
B-tree height changes, then a leaf can change to an internal node
or the other way around. We do not know at present if this can cause
@@ -1209,6 +1214,7 @@ sync_thread_add_level(
ut_error;
}
+levels_ok:
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
slot = sync_thread_levels_get_nth(array, i);
diff --git a/storage/innobase/trx/trx0rec.c b/storage/innobase/trx/trx0rec.c
index 38ad53fcfb0..730ac6a6f60 100644
--- a/storage/innobase/trx/trx0rec.c
+++ b/storage/innobase/trx/trx0rec.c
@@ -1397,6 +1397,10 @@ trx_undo_prev_version_build(
return(DB_ERROR);
}
+# if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ ut_a(!rec_offs_any_null_extern(rec, offsets));
+# endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
ulint* ext_vect;
ulint n_ext_vect;
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index a82d7f452fc..d174f1e1b37 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -41,6 +41,9 @@ sess_t* trx_dummy_sess = NULL;
/* Number of transactions currently allocated for MySQL: protected by
the kernel mutex */
ulint trx_n_mysql_transactions = 0;
+/* Number of transactions currently in the XA PREPARED state: protected by
+the kernel mutex */
+ulint trx_n_prepared = 0;
/*****************************************************************
Starts the transaction if it is not yet started. */
@@ -480,6 +483,7 @@ trx_lists_init_at_db_start(void)
if (srv_force_recovery == 0) {
trx->conc_state = TRX_PREPARED;
+ trx_n_prepared++;
} else {
fprintf(stderr,
"InnoDB: Since"
@@ -558,6 +562,7 @@ trx_lists_init_at_db_start(void)
trx->conc_state
= TRX_PREPARED;
+ trx_n_prepared++;
} else {
fprintf(stderr,
"InnoDB: Since"
@@ -832,6 +837,11 @@ trx_commit_off_kernel(
|| trx->conc_state == TRX_PREPARED);
ut_ad(mutex_own(&kernel_mutex));
+ if (UNIV_UNLIKELY(trx->conc_state == TRX_PREPARED)) {
+ ut_a(trx_n_prepared > 0);
+ trx_n_prepared--;
+ }
+
/* The following assignment makes the transaction committed in memory
and makes its changes to data visible to other transactions.
NOTE that there is a small discrepancy from the strict formal
@@ -1882,6 +1892,7 @@ trx_prepare_off_kernel(
/*--------------------------------------*/
trx->conc_state = TRX_PREPARED;
+ trx_n_prepared++;
/*--------------------------------------*/
if (must_flush_log) {