summaryrefslogtreecommitdiff
path: root/innobase
diff options
context:
space:
mode:
Diffstat (limited to 'innobase')
-rw-r--r--innobase/btr/btr0btr.c30
-rw-r--r--innobase/btr/btr0cur.c93
-rw-r--r--innobase/btr/btr0sea.c164
-rw-r--r--innobase/buf/buf0buf.c78
-rw-r--r--innobase/buf/buf0rea.c29
-rw-r--r--innobase/data/data0data.c115
-rw-r--r--innobase/dict/dict0crea.c1
-rw-r--r--innobase/dict/dict0dict.c16
-rw-r--r--innobase/dict/dict0load.c711
-rw-r--r--innobase/dict/dict0mem.c3
-rw-r--r--innobase/fil/fil0fil.c9
-rw-r--r--innobase/fsp/fsp0fsp.c22
-rw-r--r--innobase/ha/ha0ha.c43
-rw-r--r--innobase/ibuf/ibuf0ibuf.c63
-rw-r--r--innobase/include/btr0btr.h10
-rw-r--r--innobase/include/btr0cur.h3
-rw-r--r--innobase/include/btr0sea.h3
-rw-r--r--innobase/include/buf0buf.h21
-rw-r--r--innobase/include/buf0buf.ic4
-rw-r--r--innobase/include/buf0rea.h2
-rw-r--r--innobase/include/data0data.h10
-rw-r--r--innobase/include/data0data.ic24
-rw-r--r--innobase/include/ha0ha.h2
-rw-r--r--innobase/include/ibuf0ibuf.h6
-rw-r--r--innobase/include/lock0lock.h10
-rw-r--r--innobase/include/log0log.h56
-rw-r--r--innobase/include/log0log.ic29
-rw-r--r--innobase/include/os0file.h18
-rw-r--r--innobase/include/os0thread.h6
-rw-r--r--innobase/include/page0page.h2
-rw-r--r--innobase/include/page0page.ic42
-rw-r--r--innobase/include/rem0rec.ic2
-rw-r--r--innobase/include/row0mysql.h13
-rw-r--r--innobase/include/srv0srv.h8
-rw-r--r--innobase/include/sync0arr.h2
-rw-r--r--innobase/include/sync0sync.h12
-rw-r--r--innobase/include/trx0roll.h8
-rw-r--r--innobase/include/trx0sys.h52
-rw-r--r--innobase/include/trx0trx.h28
-rw-r--r--innobase/include/univ.i41
-rw-r--r--innobase/include/ut0ut.h9
-rw-r--r--innobase/include/ut0ut.ic22
-rw-r--r--innobase/lock/lock0lock.c148
-rw-r--r--innobase/log/log0log.c222
-rw-r--r--innobase/log/log0recv.c147
-rw-r--r--innobase/mtr/mtr0log.c6
-rw-r--r--innobase/mtr/mtr0mtr.c2
-rw-r--r--innobase/os/os0file.c191
-rw-r--r--innobase/os/os0sync.c2
-rw-r--r--innobase/page/page0cur.c9
-rw-r--r--innobase/page/page0page.c60
-rw-r--r--innobase/rem/rem0cmp.c13
-rw-r--r--innobase/row/row0ins.c43
-rw-r--r--innobase/row/row0mysql.c79
-rw-r--r--innobase/row/row0purge.c8
-rw-r--r--innobase/row/row0umod.c5
-rw-r--r--innobase/row/row0upd.c30
-rw-r--r--innobase/srv/srv0srv.c324
-rw-r--r--innobase/srv/srv0start.c64
-rw-r--r--innobase/sync/sync0arr.c78
-rw-r--r--innobase/sync/sync0sync.c33
-rw-r--r--innobase/trx/trx0roll.c29
-rw-r--r--innobase/trx/trx0sys.c121
-rw-r--r--innobase/trx/trx0trx.c135
-rw-r--r--innobase/trx/trx0undo.c4
-rw-r--r--innobase/ut/ut0mem.c10
-rw-r--r--innobase/ut/ut0ut.c66
67 files changed, 2586 insertions, 1065 deletions
diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c
index 0263996a429..38d97785832 100644
--- a/innobase/btr/btr0btr.c
+++ b/innobase/btr/btr0btr.c
@@ -572,6 +572,13 @@ btr_page_get_father_for_rec(
if (btr_node_ptr_get_child_page_no(node_ptr) !=
buf_frame_get_page_no(page)) {
+ fprintf(stderr,
+"InnoDB: Dump of the child page:\n");
+ buf_page_print(buf_frame_align(page));
+ fprintf(stderr,
+"InnoDB: Dump of the parent page:\n");
+ buf_page_print(buf_frame_align(node_ptr));
+
fprintf(stderr,
"InnoDB: Corruption of an index tree: table %s, index %s,\n"
"InnoDB: father ptr page no %lu, child page no %lu\n",
@@ -581,6 +588,12 @@ btr_page_get_father_for_rec(
buf_frame_get_page_no(page));
page_rec_print(page_rec_get_next(page_get_infimum_rec(page)));
page_rec_print(node_ptr);
+
+ fprintf(stderr,
+"InnoDB: You should dump + drop + reimport the table to fix the\n"
+"InnoDB: corruption. If the crash happens at the database startup, see\n"
+"InnoDB: section 6.1 of http://www.innodb.com/ibman.html about forcing\n"
+"InnoDB: recovery. Then dump + drop + reimport.\n");
}
ut_a(btr_node_ptr_get_child_page_no(node_ptr) ==
@@ -780,12 +793,14 @@ top_loop:
/*****************************************************************
Reorganizes an index page. */
-
+static
void
btr_page_reorganize_low(
/*====================*/
- ibool low, /* in: TRUE if locks should not be updated, i.e.,
- there cannot exist locks on the page */
+ ibool recovery,/* in: TRUE if called in recovery: locks should not
+ be updated, i.e., there cannot exist locks on the
+ page, and a hash index should not be dropped: it
+ cannot exist */
page_t* page, /* in: page to be reorganized */
mtr_t* mtr) /* in: mtr */
{
@@ -805,7 +820,9 @@ btr_page_reorganize_low(
/* Copy the old page to temporary space */
buf_frame_copy(new_page, page);
- btr_search_drop_page_hash_index(page);
+ if (!recovery) {
+ btr_search_drop_page_hash_index(page);
+ }
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
@@ -820,7 +837,7 @@ btr_page_reorganize_low(
/* Copy max trx id to recreated page */
page_set_max_trx_id(page, page_get_max_trx_id(new_page));
- if (!low) {
+ if (!recovery) {
/* Update the record lock bitmaps */
lock_move_reorganize_page(page, new_page);
}
@@ -2059,8 +2076,7 @@ btr_discard_page(
btr_search_drop_page_hash_index(page);
- if ((left_page_no == FIL_NULL)
- && (btr_page_get_level(page, mtr) > 0)) {
+ if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) {
/* We have to mark the leftmost node pointer on the right
side page as the predefined minimum record */
diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
index 1274719cf7d..1fe322be81e 100644
--- a/innobase/btr/btr0cur.c
+++ b/innobase/btr/btr0cur.c
@@ -36,9 +36,16 @@ Created 10/16/1994 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "lock0lock.h"
+/* If the following is set to TRUE, this module prints a lot of
+trace information of individual record operations */
+ibool btr_cur_print_record_ops = FALSE;
+
ulint btr_cur_rnd = 0;
ulint btr_cur_n_non_sea = 0;
+ulint btr_cur_n_sea = 0;
+ulint btr_cur_n_non_sea_old = 0;
+ulint btr_cur_n_sea_old = 0;
/* In the optimistic insert, if the insert does not fit, but this much space
can be released by page reorganize, then it is reorganized */
@@ -187,11 +194,7 @@ btr_cur_search_to_nth_level(
tuple must be set so that it cannot get
compared to the node ptr page number field! */
ulint mode, /* in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page relative to the
- record! Inserts should always be made using
+ Inserts should always be made using
PAGE_CUR_LE to search the position! */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
BTR_INSERT and BTR_ESTIMATE;
@@ -268,7 +271,7 @@ btr_cur_search_to_nth_level(
#ifdef UNIV_SEARCH_PERF_STAT
info->n_searches++;
#endif
- if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED
+ if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& !estimate
&& btr_search_guess_on_hash(index, info, tuple, mode,
@@ -283,14 +286,14 @@ btr_cur_search_to_nth_level(
|| mode != PAGE_CUR_LE);
ut_ad(cursor->low_match != ULINT_UNDEFINED
|| mode != PAGE_CUR_LE);
+ btr_cur_n_sea++;
+
return;
}
#endif
#endif
-
-#ifdef UNIV_SEARCH_PERF_STAT
btr_cur_n_non_sea++;
-#endif
+
/* If the hash search did not succeed, do binary search down the
tree */
@@ -796,15 +799,28 @@ btr_cur_optimistic_insert(
ulint data_size;
ulint extra_size;
ulint type;
- ulint err;
-
- ut_ad(dtuple_check_typed(entry));
+ ulint err;
*big_rec = NULL;
page = btr_cur_get_page(cursor);
index = cursor->index;
+ if (!dtuple_check_typed_no_assert(entry)) {
+ fprintf(stderr,
+"InnoDB: Error in a tuple to insert into table %lu index %lu\n",
+ index->table_name, index->name);
+ }
+
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to insert to table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ dtuple_print(entry);
+ }
+
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
max_size = page_get_max_insert_size_after_reorganize(page, 1);
@@ -928,7 +944,7 @@ calculate_sizes_again:
buf_frame_get_page_no(page), max_size,
rec_size + PAGE_DIR_SLOT_SIZE, type);
*/
- if (!(type & (DICT_CLUSTERED | DICT_UNIQUE))) {
+ if (!(type & DICT_CLUSTERED)) {
/* We have added a record to page: update its free bits */
ibuf_update_free_bits_if_full(cursor->index, page, max_size,
rec_size + PAGE_DIR_SLOT_SIZE);
@@ -1258,6 +1274,15 @@ btr_cur_update_sec_rec_in_place(
rec = btr_cur_get_rec(cursor);
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to update table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ rec_print(rec);
+ }
+
err = lock_sec_rec_modify_check_and_lock(0, rec, index, thr);
if (err != DB_SUCCESS) {
@@ -1312,6 +1337,15 @@ btr_cur_update_in_place(
index = cursor->index;
trx = thr_get_trx(thr);
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to update table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ rec_print(rec);
+ }
+
/* Do lock checking and undo logging */
err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
thr, &roll_ptr);
@@ -1323,6 +1357,12 @@ btr_cur_update_in_place(
block = buf_block_align(rec);
if (block->is_hashed) {
+ if (row_upd_changes_ord_field_binary(NULL, index, update)) {
+
+ /* Remove possible hash index pointer to this record */
+ btr_search_update_hash_on_delete(cursor);
+ }
+
rw_lock_x_lock(&btr_search_latch);
}
@@ -1398,6 +1438,15 @@ btr_cur_optimistic_update(
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to update table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ rec_print(rec);
+ }
+
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
if (!row_upd_changes_field_size(rec, index, update)) {
@@ -1973,6 +2022,15 @@ btr_cur_del_mark_set_clust_rec(
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to del mark table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ rec_print(rec);
+ }
+
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_get_deleted_flag(rec) == FALSE);
@@ -2102,6 +2160,15 @@ btr_cur_del_mark_set_sec_rec(
rec = btr_cur_get_rec(cursor);
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to del mark table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ cursor->index->table_name, cursor->index->name);
+ rec_print(rec);
+ }
+
err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index,
thr);
if (err != DB_SUCCESS) {
diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c
index 8a54d9de9c0..cfdfefebd1a 100644
--- a/innobase/btr/btr0sea.c
+++ b/innobase/btr/btr0sea.c
@@ -15,6 +15,7 @@ Created 2/17/1996 Heikki Tuuri
#include "page0page.h"
#include "page0cur.h"
#include "btr0cur.h"
+#include "btr0pcur.h"
#include "btr0btr.h"
ulint btr_search_n_succ = 0;
@@ -145,6 +146,8 @@ btr_search_info_create(
info = mem_heap_alloc(heap, sizeof(btr_search_t));
+ info->magic_n = BTR_SEARCH_MAGIC_N;
+
info->last_search = NULL;
info->n_direction = 0;
info->root_guess = NULL;
@@ -159,6 +162,12 @@ btr_search_info_create(
info->n_patt_succ = 0;
info->n_searches = 0;
+ /* Set some sensible values */
+ info->n_fields = 1;
+ info->n_bytes = 0;
+
+ info->side = BTR_SEARCH_LEFT_SIDE;
+
return(info);
}
@@ -197,7 +206,7 @@ btr_search_info_update_hash(
/* Test if the search would have succeeded using the recommended
hash prefix */
- if ((info->n_fields >= n_unique) && (cursor->up_match >= n_unique)) {
+ if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
info->n_hash_potential++;
@@ -207,8 +216,8 @@ btr_search_info_update_hash(
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
cursor->low_match, cursor->low_bytes);
- if (((info->side == BTR_SEARCH_LEFT_SIDE) && (cmp <= 0))
- || ((info->side == BTR_SEARCH_RIGHT_SIDE) && (cmp > 0))) {
+ if ((info->side == BTR_SEARCH_LEFT_SIDE && cmp <= 0)
+ || (info->side == BTR_SEARCH_RIGHT_SIDE && cmp > 0)) {
goto set_new_recomm;
}
@@ -216,8 +225,8 @@ btr_search_info_update_hash(
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
cursor->up_match, cursor->up_bytes);
- if (((info->side == BTR_SEARCH_LEFT_SIDE) && (cmp > 0))
- || ((info->side == BTR_SEARCH_RIGHT_SIDE) && (cmp <= 0))) {
+ if ((info->side == BTR_SEARCH_LEFT_SIDE && cmp > 0)
+ || (info->side == BTR_SEARCH_RIGHT_SIDE && cmp <= 0)) {
goto set_new_recomm;
}
@@ -233,19 +242,18 @@ set_new_recomm:
info->hash_analysis = 0;
- if ((cursor->up_match >= n_unique)
- || (cursor->low_match >= n_unique)) {
- info->n_fields = n_unique;
- info->n_bytes = 0;
-
- info->side = BTR_SEARCH_LEFT_SIDE;
- }
-
cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
cursor->low_match, cursor->low_bytes);
if (cmp == 0) {
info->n_hash_potential = 0;
+ /* For extra safety, we set some sensible values here */
+
+ info->n_fields = 1;
+ info->n_bytes = 0;
+
+ info->side = BTR_SEARCH_LEFT_SIDE;
+
} else if (cmp > 0) {
info->n_hash_potential = 1;
@@ -305,6 +313,9 @@ btr_search_update_block_hash_info(
info->last_hash_succ = FALSE;
+ ut_a(block->magic_n == BUF_BLOCK_MAGIC_N);
+ ut_a(info->magic_n == BTR_SEARCH_MAGIC_N);
+
if ((block->n_hash_helps > 0)
&& (info->n_hash_potential > 0)
&& (block->n_fields == info->n_fields)
@@ -440,7 +451,9 @@ btr_search_info_update_slow(
rw_lock_x_unlock(&btr_search_latch);
}
- if (build_index) {
+ if (build_index) {
+ ut_a(block->n_fields + block->n_bytes > 0);
+
btr_search_build_page_hash_index(block->frame,
block->n_fields,
block->n_bytes,
@@ -622,6 +635,7 @@ btr_search_guess_on_hash(
dulint tree_id;
#ifdef notdefined
btr_cur_t cursor2;
+ btr_pcur_t pcur;
#endif
ut_ad(index && info && tuple && cursor && mtr);
ut_ad((latch_mode == BTR_SEARCH_LEAF)
@@ -664,6 +678,9 @@ btr_search_guess_on_hash(
rw_lock_s_lock(&btr_search_latch);
}
+ ut_a(btr_search_latch.writer != RW_LOCK_EX);
+ ut_a(btr_search_latch.reader_count > 0);
+
rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
if (!rec) {
@@ -754,7 +771,26 @@ btr_search_guess_on_hash(
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
&cursor2, 0, mtr);
- ut_a(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
+ if (mode == PAGE_CUR_GE
+ && btr_cur_get_rec(&cursor2) == page_get_supremum_rec(
+ buf_frame_align(btr_cur_get_rec(&cursor2)))) {
+
+ /* If mode is PAGE_CUR_GE, then the binary search
+ in the index tree may actually take us to the supremum
+ of the previous page */
+
+ info->last_hash_succ = FALSE;
+
+ btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
+ &pcur, mtr);
+ ut_a(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
+ } else {
+ ut_a(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
+ }
+
+ /* NOTE that it is theoretically possible that the above assertions
+ fail if the page of the cursor gets removed from the buffer pool
+ meanwhile! Thus it might not be a bug. */
info->last_hash_succ = TRUE;
#endif
@@ -835,6 +871,8 @@ btr_search_drop_page_hash_index(
n_fields = block->curr_n_fields;
n_bytes = block->curr_n_bytes;
+ ut_a(n_fields + n_bytes > 0);
+
rw_lock_s_unlock(&btr_search_latch);
n_recs = page_get_n_recs(page);
@@ -851,6 +889,14 @@ btr_search_drop_page_hash_index(
rec = page_get_infimum_rec(page);
rec = page_rec_get_next(rec);
+ if (rec != sup) {
+ ut_a(n_fields <= rec_get_n_fields(rec));
+
+ if (n_bytes > 0) {
+ ut_a(n_fields < rec_get_n_fields(rec));
+ }
+ }
+
tree_id = btr_page_get_index_id(page);
prev_fold = 0;
@@ -861,7 +907,7 @@ btr_search_drop_page_hash_index(
fold = rec_fold(rec, n_fields, n_bytes, tree_id);
- if ((fold == prev_fold) && (prev_fold != 0)) {
+ if (fold == prev_fold && prev_fold != 0) {
goto next_rec;
}
@@ -873,6 +919,7 @@ btr_search_drop_page_hash_index(
n_cached++;
next_rec:
rec = page_rec_get_next(rec);
+ prev_fold = fold;
}
rw_lock_x_lock(&btr_search_latch);
@@ -913,7 +960,7 @@ btr_search_drop_page_hash_when_freed(
mtr_start(&mtr);
/* We assume that if the caller has a latch on the page,
- then the caller has already drooped the hash index for the page,
+ then the caller has already dropped the hash index for the page,
and we never get here. Therefore we can acquire the s-latch to
the page without fearing a deadlock. */
@@ -980,6 +1027,8 @@ btr_search_build_page_hash_index(
return;
}
+ ut_a(n_fields + n_bytes > 0);
+
/* Calculate and cache fold values and corresponding records into
an array for fast insertion to the hash index */
@@ -995,6 +1044,14 @@ btr_search_build_page_hash_index(
rec = page_get_infimum_rec(page);
rec = page_rec_get_next(rec);
+ if (rec != sup) {
+ ut_a(n_fields <= rec_get_n_fields(rec));
+
+ if (n_bytes > 0) {
+ ut_a(n_fields < rec_get_n_fields(rec));
+ }
+ }
+
/* FIXME: in a mixed tree, all records may not have enough ordering
fields: */
@@ -1126,6 +1183,8 @@ btr_search_move_or_delete_hash_entries(
rw_lock_s_unlock(&btr_search_latch);
+ ut_a(n_fields + n_bytes > 0);
+
btr_search_build_page_hash_index(new_page, n_fields, n_bytes,
side);
ut_a(n_fields == block->curr_n_fields);
@@ -1166,9 +1225,11 @@ btr_search_update_hash_on_delete(
return;
}
+ ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
+
table = btr_search_sys->hash_index;
- tree_id = ((cursor->index)->tree)->id;
+ tree_id = cursor->index->tree->id;
fold = rec_fold(rec, block->curr_n_fields, block->curr_n_bytes,
tree_id);
@@ -1285,7 +1346,6 @@ btr_search_update_hash_on_insert(
if (rec != page_get_infimum_rec(page)) {
fold = rec_fold(rec, n_fields, n_bytes, tree_id);
-
} else {
if (side == BTR_SEARCH_LEFT_SIDE) {
@@ -1370,7 +1430,7 @@ btr_search_print_info(void)
rw_lock_x_lock(&btr_search_latch);
- ha_print_info(btr_search_sys->hash_index);
+/* ha_print_info(btr_search_sys->hash_index); */
rw_lock_x_unlock(&btr_search_latch);
}
@@ -1436,11 +1496,71 @@ btr_search_validate(void)
/*=====================*/
/* out: TRUE if ok */
{
+ buf_block_t* block;
+ page_t* page;
+ ha_node_t* node;
+ ulint n_page_dumps = 0;
+ ibool ok = TRUE;
+ ulint i;
+ char rec_str[500];
+
rw_lock_x_lock(&btr_search_latch);
+
+ for (i = 0; i < hash_get_n_cells(btr_search_sys->hash_index); i++) {
+ node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+
+ while (node != NULL) {
+ block = buf_block_align(node->data);
+ page = buf_frame_align(node->data);
+
+ if (!block->is_hashed
+ || node->fold != rec_fold((rec_t*)(node->data),
+ block->curr_n_fields,
+ block->curr_n_bytes,
+ btr_page_get_index_id(page))) {
+ ok = FALSE;
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: Error in an adaptive hash index pointer to page %lu\n"
+"ptr mem address %lu index id %lu %lu, node fold %lu, rec fold %lu\n",
+ buf_frame_get_page_no(page),
+ (ulint)(node->data),
+ ut_dulint_get_high(btr_page_get_index_id(page)),
+ ut_dulint_get_low(btr_page_get_index_id(page)),
+ node->fold, rec_fold((rec_t*)(node->data),
+ block->curr_n_fields,
+ block->curr_n_bytes,
+ btr_page_get_index_id(page)));
+
+ rec_sprintf(rec_str, 450, (rec_t*)(node->data));
+
+ fprintf(stderr,
+ "InnoDB: Record %s\n"
+ "InnoDB: on that page.", rec_str);
+
+ fprintf(stderr,
+"Page mem address %lu, is hashed %lu, n fields %lu, n bytes %lu\n"
+"side %lu\n",
+ (ulint)page, block->is_hashed, block->curr_n_fields,
+ block->curr_n_bytes, block->curr_side);
+
+ if (n_page_dumps < 20) {
+ buf_page_print(page);
+ n_page_dumps++;
+ }
+ }
+
+ node = node->next;
+ }
+ }
- ut_a(ha_validate(btr_search_sys->hash_index));
+ if (!ha_validate(btr_search_sys->hash_index)) {
+
+ ok = FALSE;
+ }
rw_lock_x_unlock(&btr_search_latch);
- return(TRUE);
+ return(ok);
}
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
index e840e9f143d..663c6cefce6 100644
--- a/innobase/buf/buf0buf.c
+++ b/innobase/buf/buf0buf.c
@@ -1126,12 +1126,50 @@ buf_page_get_known_nowait(
}
/************************************************************************
+Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
+
+void
+buf_page_init_for_backup_restore(
+/*=============================*/
+ ulint space, /* in: space id */
+ ulint offset, /* in: offset of the page within space
+ in units of a page */
+ buf_block_t* block) /* in: block to init */
+{
+ /* Set the state of the block */
+ block->magic_n = BUF_BLOCK_MAGIC_N;
+
+ block->state = BUF_BLOCK_FILE_PAGE;
+ block->space = space;
+ block->offset = offset;
+
+ block->lock_hash_val = 0;
+ block->lock_mutex = NULL;
+
+ block->freed_page_clock = 0;
+
+ block->newest_modification = ut_dulint_zero;
+ block->oldest_modification = ut_dulint_zero;
+
+ block->accessed = FALSE;
+ block->buf_fix_count = 0;
+ block->io_fix = 0;
+
+ block->n_hash_helps = 0;
+ block->is_hashed = FALSE;
+ block->n_fields = 1;
+ block->n_bytes = 0;
+ block->side = BTR_SEARCH_LEFT_SIDE;
+
+ block->file_page_was_freed = FALSE;
+}
+
+/************************************************************************
Inits a page to the buffer buf_pool. */
static
void
buf_page_init(
/*==========*/
- /* out: pointer to the block */
ulint space, /* in: space id */
ulint offset, /* in: offset of the page within space
in units of a page */
@@ -1141,6 +1179,8 @@ buf_page_init(
ut_ad(block->state == BUF_BLOCK_READY_FOR_USE);
/* Set the state of the block */
+ block->magic_n = BUF_BLOCK_MAGIC_N;
+
block->state = BUF_BLOCK_FILE_PAGE;
block->space = space;
block->offset = offset;
@@ -1758,8 +1798,10 @@ buf_get_n_pending_ios(void)
Prints info of the buffer i/o. */
void
-buf_print_io(void)
-/*==============*/
+buf_print_io(
+/*=========*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end)/* in: buffer end */
{
time_t current_time;
double time_elapsed;
@@ -1767,19 +1809,28 @@ buf_print_io(void)
ut_ad(buf_pool);
+ if (buf_end - buf < 400) {
+
+ return;
+ }
+
size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE;
mutex_enter(&(buf_pool->mutex));
- printf("Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free));
- printf("LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU));
- printf("Flush list length %lu \n",
+ buf += sprintf(buf,
+ "Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free));
+ buf += sprintf(buf,
+ "LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU));
+ buf += sprintf(buf,
+ "Flush list length %lu \n",
UT_LIST_GET_LEN(buf_pool->flush_list));
- printf("Buffer pool size %lu\n", size);
+ buf += sprintf(buf, "Buffer pool size %lu\n", size);
- printf("Pending reads %lu \n", buf_pool->n_pend_reads);
+ buf += sprintf(buf, "Pending reads %lu \n", buf_pool->n_pend_reads);
- printf("Pending writes: LRU %lu, flush list %lu, single page %lu\n",
+ buf += sprintf(buf,
+ "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
buf_pool->n_flush[BUF_FLUSH_LRU],
buf_pool->n_flush[BUF_FLUSH_LIST],
buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
@@ -1789,10 +1840,10 @@ buf_print_io(void)
buf_pool->last_printout_time = current_time;
- printf("Pages read %lu, created %lu, written %lu\n",
+ buf += sprintf(buf, "Pages read %lu, created %lu, written %lu\n",
buf_pool->n_pages_read, buf_pool->n_pages_created,
buf_pool->n_pages_written);
- printf("%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
+ buf += sprintf(buf, "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
(buf_pool->n_pages_read - buf_pool->n_pages_read_old)
/ time_elapsed,
(buf_pool->n_pages_created - buf_pool->n_pages_created_old)
@@ -1801,13 +1852,14 @@ buf_print_io(void)
/ time_elapsed);
if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
- printf("Buffer pool hit rate %lu / 1000\n",
+ buf += sprintf(buf, "Buffer pool hit rate %lu / 1000\n",
1000
- ((1000 *
(buf_pool->n_pages_read - buf_pool->n_pages_read_old))
/ (buf_pool->n_page_gets - buf_pool->n_page_gets_old)));
} else {
- printf("No buffer pool activity since the last printout\n");
+ buf += sprintf(buf,
+ "No buffer pool activity since the last printout\n");
}
buf_pool->n_page_gets_old = buf_pool->n_page_gets;
diff --git a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c
index db187cdd896..475a5bd9cbd 100644
--- a/innobase/buf/buf0rea.c
+++ b/innobase/buf/buf0rea.c
@@ -100,6 +100,11 @@ buf_read_page_low(
block = buf_page_init_for_read(mode, space, offset);
if (block != NULL) {
+ if (buf_debug_prints) {
+ printf("Posting read request for page %lu, sync %lu\n",
+ offset, sync);
+ }
+
fil_io(OS_FILE_READ | wake_later,
sync, space, offset, 0, UNIV_PAGE_SIZE,
(void*)block->frame, (void*)block);
@@ -467,6 +472,12 @@ buf_read_ahead_linear(
count = 0;
+ /* Since Windows XP seems to schedule the i/o handler thread
+ very eagerly, and consequently it does not wait for the
+ full read batch to be posted, we use special heuristics here */
+
+ os_aio_simulated_put_read_threads_to_sleep();
+
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync
aio mode: hence FALSE as the first parameter */
@@ -556,16 +567,34 @@ buf_read_recv_pages(
highest page number the last in the array */
ulint n_stored) /* in: number of page numbers in the array */
{
+ ulint count;
ulint i;
for (i = 0; i < n_stored; i++) {
+ count = 0;
+
+ os_aio_print_debug = FALSE;
+
while (buf_pool->n_pend_reads >= RECV_POOL_N_FREE_BLOCKS / 2) {
os_aio_simulated_wake_handler_threads();
os_thread_sleep(500000);
+
+ count++;
+
+ if (count > 100) {
+ fprintf(stderr,
+"InnoDB: Error: InnoDB has waited for 50 seconds for pending\n"
+"InnoDB: reads to the buffer pool to be finished.\n"
+"InnoDB: Number of pending reads %lu\n", buf_pool->n_pend_reads);
+
+ os_aio_print_debug = TRUE;
+ }
}
+ os_aio_print_debug = FALSE;
+
if ((i + 1 == n_stored) && sync) {
buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space,
page_nos[i]);
diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c
index 03abcb9b6e0..8ab5acb4da7 100644
--- a/innobase/data/data0data.c
+++ b/innobase/data/data0data.c
@@ -64,6 +64,35 @@ dtuple_get_nth_field_noninline(
return(dtuple_get_nth_field(tuple, n));
}
+/*************************************************************************
+Tests if dfield data length and content is equal to the given. */
+
+ibool
+dfield_data_is_binary_equal(
+/*========================*/
+ /* out: TRUE if equal */
+ dfield_t* field, /* in: field */
+ ulint len, /* in: data length or UNIV_SQL_NULL */
+ byte* data) /* in: data */
+{
+ if (len != field->len) {
+
+ return(FALSE);
+ }
+
+ if (len == UNIV_SQL_NULL) {
+
+ return(TRUE);
+ }
+
+ if (0 != ut_memcmp(field->data, data, len)) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
/****************************************************************
Returns TRUE if lengths of two dtuples are equal and respective data fields
in them are equal when compared with collation in char fields (not as binary
@@ -154,6 +183,69 @@ dtuple_set_n_fields(
}
/**************************************************************
+Checks that a data field is typed. */
+static
+ibool
+dfield_check_typed_no_assert(
+/*=========================*/
+ /* out: TRUE if ok */
+ dfield_t* field) /* in: data field */
+{
+ if (dfield_get_type(field)->mtype > DATA_MYSQL
+ || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+
+ fprintf(stderr,
+"InnoDB: Error: data field type %lu, len %lu\n",
+ dfield_get_type(field)->mtype, dfield_get_len(field));
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/**************************************************************
+Checks that a data tuple is typed. */
+
+ibool
+dtuple_check_typed_no_assert(
+/*=========================*/
+ /* out: TRUE if ok */
+ dtuple_t* tuple) /* in: tuple */
+{
+ dfield_t* field;
+ ulint i;
+ char err_buf[1000];
+
+ if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
+ fprintf(stderr,
+"InnoDB: Error: index entry has %lu fields\n",
+ dtuple_get_n_fields(tuple));
+
+ dtuple_sprintf(err_buf, 900, tuple);
+ fprintf(stderr,
+"InnoDB: Tuple contents: %s\n", err_buf);
+
+ return(FALSE);
+ }
+
+ for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
+
+ field = dtuple_get_nth_field(tuple, i);
+
+ if (!dfield_check_typed_no_assert(field)) {
+
+ dtuple_sprintf(err_buf, 900, tuple);
+ fprintf(stderr,
+"InnoDB: Tuple contents: %s\n", err_buf);
+
+ return(FALSE);
+ }
+ }
+
+ return(TRUE);
+}
+
+/**************************************************************
Checks that a data field is typed. Asserts an error if not. */
ibool
@@ -162,8 +254,15 @@ dfield_check_typed(
/* out: TRUE if ok */
dfield_t* field) /* in: data field */
{
- ut_a(dfield_get_type(field)->mtype <= DATA_MYSQL);
- ut_a(dfield_get_type(field)->mtype >= DATA_VARCHAR);
+ if (dfield_get_type(field)->mtype > DATA_MYSQL
+ || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+
+ fprintf(stderr,
+"InnoDB: Error: data field type %lu, len %lu\n",
+ dfield_get_type(field)->mtype, dfield_get_len(field));
+
+ ut_a(0);
+ }
return(TRUE);
}
@@ -460,9 +559,21 @@ dtuple_convert_big_rec(
ibool is_externally_stored;
ulint i;
ulint j;
+ char err_buf[1000];
+ ut_a(dtuple_check_typed_no_assert(entry));
+
size = rec_get_converted_size(entry);
+ if (size > 1000000000) {
+ fprintf(stderr,
+"InnoDB: Warning: tuple size very big: %lu\n", size);
+
+ dtuple_sprintf(err_buf, 900, entry);
+ fprintf(stderr,
+"InnoDB: Tuple contents: %s\n", err_buf);
+ }
+
heap = mem_heap_create(size + dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t) + 1000);
diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c
index 0da59d39646..60beeacf435 100644
--- a/innobase/dict/dict0crea.c
+++ b/innobase/dict/dict0crea.c
@@ -153,6 +153,7 @@ dict_create_sys_tables_tuple(
if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
dfield_set_data(dfield, table->cluster_name,
ut_strlen(table->cluster_name));
+ ut_a(0); /* Oracle-style clusters are not supported yet */
} else {
dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
}
diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c
index b053724c0e3..5abec8dbb1e 100644
--- a/innobase/dict/dict0dict.c
+++ b/innobase/dict/dict0dict.c
@@ -261,7 +261,7 @@ dict_table_get_index_noninline(
{
return(dict_table_get_index(table, name));
}
-
+
/************************************************************************
Initializes the autoinc counter. It is not an error to initialize an already
initialized counter. */
@@ -2810,6 +2810,12 @@ dict_update_statistics_low(
index = dict_table_get_first_index(table);
+ if (index == NULL) {
+ /* Table definition is corrupt */
+
+ return;
+ }
+
while (index) {
size = btr_get_size(index, BTR_TOTAL_SIZE);
@@ -3201,6 +3207,14 @@ dict_print_info_on_foreign_keys(
buf2 += sprintf(buf2, ")");
+ if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) {
+ buf2 += sprintf(buf2, " ON DELETE CASCADE");
+ }
+
+ if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) {
+ buf2 += sprintf(buf2, " ON DELETE SET NULL");
+ }
+
foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
}
diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c
index 221a6c7dabb..e9caa37fecc 100644
--- a/innobase/dict/dict0load.c
+++ b/innobase/dict/dict0load.c
@@ -21,33 +21,6 @@ Created 4/24/1996 Heikki Tuuri
#include "dict0boot.h"
/************************************************************************
-Loads definitions for table columns. */
-static
-void
-dict_load_columns(
-/*==============*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap); /* in: memory heap for temporary storage */
-/************************************************************************
-Loads definitions for table indexes. */
-static
-void
-dict_load_indexes(
-/*==============*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap); /* in: memory heap for temporary storage */
-/************************************************************************
-Loads definitions for index fields. */
-static
-void
-dict_load_fields(
-/*=============*/
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index whose fields to load */
- mem_heap_t* heap); /* in: memory heap for temporary storage */
-
-
-/************************************************************************
Finds the first table name in the given database. */
char*
@@ -194,7 +167,12 @@ loop:
fprintf(stderr, "InnoDB: Failed to load table %s\n",
table_name);
} else {
- dict_update_statistics_low(table, TRUE);
+ /* The table definition was corrupt if there
+ is no index */
+
+ if (dict_table_get_first_index(table)) {
+ dict_update_statistics_low(table, TRUE);
+ }
dict_table_print_low(table);
}
@@ -208,6 +186,361 @@ loop:
}
/************************************************************************
+Loads definitions for table columns. */
+static
+void
+dict_load_columns(
+/*==============*/
+ dict_table_t* table, /* in: table */
+ mem_heap_t* heap) /* in: memory heap for temporary storage */
+{
+ dict_table_t* sys_columns;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ byte* buf;
+ char* name_buf;
+ char* name;
+ ulint mtype;
+ ulint prtype;
+ ulint col_len;
+ ulint prec;
+ ulint i;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ mtr_start(&mtr);
+
+ sys_columns = dict_table_get_low((char*) "SYS_COLUMNS");
+ sys_index = UT_LIST_GET_FIRST(sys_columns->indexes);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = mem_heap_alloc(heap, 8);
+ mach_write_to_8(buf, table->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (i = 0; i < table->n_cols - DATA_N_SYS_COLS; i++) {
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+
+ ut_a(!rec_get_deleted_flag(rec));
+
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+ ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0);
+
+ field = rec_get_nth_field(rec, 1, &len);
+ ut_ad(len == 4);
+ ut_a(i == mach_read_from_4(field));
+
+ ut_a(0 == ut_strcmp((char*) "NAME",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_columns), 4))->name));
+
+ field = rec_get_nth_field(rec, 4, &len);
+
+ name_buf = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(name_buf, field, len);
+ name_buf[len] = '\0';
+
+ name = name_buf;
+
+ field = rec_get_nth_field(rec, 5, &len);
+ mtype = mach_read_from_4(field);
+
+ field = rec_get_nth_field(rec, 6, &len);
+ prtype = mach_read_from_4(field);
+
+ field = rec_get_nth_field(rec, 7, &len);
+ col_len = mach_read_from_4(field);
+
+ ut_a(0 == ut_strcmp((char*) "PREC",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_columns), 8))->name));
+
+ field = rec_get_nth_field(rec, 8, &len);
+ prec = mach_read_from_4(field);
+
+ dict_mem_table_add_col(table, name, mtype, prtype, col_len,
+ prec);
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Loads definitions for index fields. */
+static
+void
+dict_load_fields(
+/*=============*/
+ dict_table_t* table, /* in: table */
+ dict_index_t* index, /* in: index whose fields to load */
+ mem_heap_t* heap) /* in: memory heap for temporary storage */
+{
+ dict_table_t* sys_fields;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ char* col_name;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ byte* buf;
+ ulint i;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ UT_NOT_USED(table);
+
+ mtr_start(&mtr);
+
+ sys_fields = dict_table_get_low((char*) "SYS_FIELDS");
+ sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = mem_heap_alloc(heap, 8);
+ mach_write_to_8(buf, index->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (i = 0; i < index->n_fields; i++) {
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+ if (rec_get_deleted_flag(rec)) {
+ fprintf(stderr,
+"InnoDB: Error: data dictionary entry for table %s is corrupt!\n",
+"InnoDB: An index field is delete marked.\n",
+ table->name);
+ }
+
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+ ut_a(ut_memcmp(buf, field, len) == 0);
+
+ field = rec_get_nth_field(rec, 1, &len);
+ ut_ad(len == 4);
+ ut_a(i == mach_read_from_4(field));
+
+ ut_a(0 == ut_strcmp((char*) "COL_NAME",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_fields), 4))->name));
+
+ field = rec_get_nth_field(rec, 4, &len);
+
+ col_name = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(col_name, field, len);
+ col_name[len] = '\0';
+
+ dict_mem_index_add_field(index, col_name, 0);
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Loads definitions for table indexes. Adds them to the data dictionary
+cache. */
+static
+ibool
+dict_load_indexes(
+/*==============*/
+ /* out: TRUE if ok, FALSE if corruption
+ of dictionary table */
+ dict_table_t* table, /* in: table */
+ mem_heap_t* heap) /* in: memory heap for temporary storage */
+{
+ dict_table_t* sys_indexes;
+ dict_index_t* sys_index;
+ dict_index_t* index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ ulint name_len;
+ char* name_buf;
+ ulint type;
+ ulint space;
+ ulint page_no;
+ ulint n_fields;
+ byte* buf;
+ ibool is_sys_table;
+ dulint id;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ if ((ut_dulint_get_high(table->id) == 0)
+ && (ut_dulint_get_low(table->id) < DICT_HDR_FIRST_ID)) {
+ is_sys_table = TRUE;
+ } else {
+ is_sys_table = FALSE;
+ }
+
+ mtr_start(&mtr);
+
+ sys_indexes = dict_table_get_low((char*) "SYS_INDEXES");
+ sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = mem_heap_alloc(heap, 8);
+ mach_write_to_8(buf, table->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (;;) {
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+
+ break;
+ }
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+
+ if (ut_memcmp(buf, field, len) != 0) {
+ break;
+ }
+
+ if (rec_get_deleted_flag(rec)) {
+ fprintf(stderr,
+"InnoDB: Error: data dictionary entry for table %s is corrupt!\n"
+"InnoDB: An index is delete marked.\n",
+ table->name);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(FALSE);
+ }
+
+ field = rec_get_nth_field(rec, 1, &len);
+ ut_ad(len == 8);
+ id = mach_read_from_8(field);
+
+ ut_a(0 == ut_strcmp("NAME",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_indexes), 4))->name));
+
+ field = rec_get_nth_field(rec, 4, &name_len);
+
+ name_buf = mem_heap_alloc(heap, name_len + 1);
+ ut_memcpy(name_buf, field, name_len);
+ name_buf[name_len] = '\0';
+
+ field = rec_get_nth_field(rec, 5, &len);
+ n_fields = mach_read_from_4(field);
+
+ field = rec_get_nth_field(rec, 6, &len);
+ type = mach_read_from_4(field);
+
+ field = rec_get_nth_field(rec, 7, &len);
+ space = mach_read_from_4(field);
+
+ ut_a(0 == ut_strcmp((char*) "PAGE_NO",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_indexes), 8))->name));
+
+ field = rec_get_nth_field(rec, 8, &len);
+ page_no = mach_read_from_4(field);
+
+ if (page_no == FIL_NULL) {
+
+ fprintf(stderr,
+ "InnoDB: Error: trying to load index %s for table %s\n"
+ "InnoDB: but the index tree has been freed!\n",
+ name_buf, table->name);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(FALSE);
+ }
+
+ if ((type & DICT_CLUSTERED) == 0
+ && NULL == dict_table_get_first_index(table)) {
+
+ fprintf(stderr,
+ "InnoDB: Error: trying to load index %s for table %s\n"
+ "InnoDB: but the first index was not clustered!\n",
+ name_buf, table->name);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(FALSE);
+ }
+
+ if (is_sys_table
+ && ((type & DICT_CLUSTERED)
+ || ((table == dict_sys->sys_tables)
+ && (name_len == ut_strlen("ID_IND"))
+ && (0 == ut_memcmp(name_buf, "ID_IND",
+ name_len))))) {
+
+ /* The index was created in memory already in
+ booting */
+ } else {
+ index = dict_mem_index_create(table->name, name_buf,
+ space, type, n_fields);
+ index->page_no = page_no;
+ index->id = id;
+
+ dict_load_fields(table, index, heap);
+
+ dict_index_add_to_cache(table, index);
+ }
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(TRUE);
+}
+
+/************************************************************************
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
@@ -338,7 +671,7 @@ dict_load_table(
dict_load_columns(table, heap);
dict_table_add_to_cache(table);
-
+
dict_load_indexes(table, heap);
ut_a(DB_SUCCESS == dict_load_foreigns(table->name));
@@ -436,8 +769,6 @@ dict_load_table_on_id(
/* Load the table definition to memory */
table = dict_load_table(name);
-
- ut_a(table);
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@@ -468,324 +799,6 @@ dict_load_sys_table(
}
/************************************************************************
-Loads definitions for table columns. */
-static
-void
-dict_load_columns(
-/*==============*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
-{
- dict_table_t* sys_columns;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- rec_t* rec;
- byte* field;
- ulint len;
- byte* buf;
- char* name_buf;
- char* name;
- ulint mtype;
- ulint prtype;
- ulint col_len;
- ulint prec;
- ulint i;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- mtr_start(&mtr);
-
- sys_columns = dict_table_get_low((char *) "SYS_COLUMNS");
- sys_index = UT_LIST_GET_FIRST(sys_columns->indexes);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i < table->n_cols - DATA_N_SYS_COLS; i++) {
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
-
- ut_a(!rec_get_deleted_flag(rec));
-
- field = rec_get_nth_field(rec, 0, &len);
- ut_ad(len == 8);
- ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0);
-
- field = rec_get_nth_field(rec, 1, &len);
- ut_ad(len == 4);
- ut_a(i == mach_read_from_4(field));
-
- ut_a(0 == ut_strcmp((char *) "NAME",
- dict_field_get_col(
- dict_index_get_nth_field(
- dict_table_get_first_index(sys_columns), 4))->name));
-
- field = rec_get_nth_field(rec, 4, &len);
-
- name_buf = mem_heap_alloc(heap, len + 1);
- ut_memcpy(name_buf, field, len);
- name_buf[len] = '\0';
-
- name = name_buf;
-
- field = rec_get_nth_field(rec, 5, &len);
- mtype = mach_read_from_4(field);
-
- field = rec_get_nth_field(rec, 6, &len);
- prtype = mach_read_from_4(field);
-
- field = rec_get_nth_field(rec, 7, &len);
- col_len = mach_read_from_4(field);
-
- ut_a(0 == ut_strcmp((char *) "PREC",
- dict_field_get_col(
- dict_index_get_nth_field(
- dict_table_get_first_index(sys_columns), 8))->name));
-
- field = rec_get_nth_field(rec, 8, &len);
- prec = mach_read_from_4(field);
-
- dict_mem_table_add_col(table, name, mtype, prtype, col_len,
- prec);
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/************************************************************************
-Loads definitions for table indexes. Adds them to the data dictionary cache.
-*/
-static
-void
-dict_load_indexes(
-/*==============*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
-{
- dict_table_t* sys_indexes;
- dict_index_t* sys_index;
- dict_index_t* index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- rec_t* rec;
- byte* field;
- ulint len;
- ulint name_len;
- char* name_buf;
- ulint type;
- ulint space;
- ulint page_no;
- ulint n_fields;
- byte* buf;
- ibool is_sys_table;
- dulint id;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if ((ut_dulint_get_high(table->id) == 0)
- && (ut_dulint_get_low(table->id) < DICT_HDR_FIRST_ID)) {
- is_sys_table = TRUE;
- } else {
- is_sys_table = FALSE;
- }
-
- mtr_start(&mtr);
-
- sys_indexes = dict_table_get_low((char *) "SYS_INDEXES");
- sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (;;) {
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
-
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field(rec, 0, &len);
- ut_ad(len == 8);
-
- if (ut_memcmp(buf, field, len) != 0) {
- break;
- }
-
- ut_a(!rec_get_deleted_flag(rec));
-
- field = rec_get_nth_field(rec, 1, &len);
- ut_ad(len == 8);
- id = mach_read_from_8(field);
-
- ut_a(0 == ut_strcmp((char *) "NAME",
- dict_field_get_col(
- dict_index_get_nth_field(
- dict_table_get_first_index(sys_indexes), 4))->name));
-
- field = rec_get_nth_field(rec, 4, &name_len);
-
- name_buf = mem_heap_alloc(heap, name_len + 1);
- ut_memcpy(name_buf, field, name_len);
- name_buf[name_len] = '\0';
-
- field = rec_get_nth_field(rec, 5, &len);
- n_fields = mach_read_from_4(field);
-
- field = rec_get_nth_field(rec, 6, &len);
- type = mach_read_from_4(field);
-
- field = rec_get_nth_field(rec, 7, &len);
- space = mach_read_from_4(field);
-
- ut_a(0 == ut_strcmp((char *) "PAGE_NO",
- dict_field_get_col(
- dict_index_get_nth_field(
- dict_table_get_first_index(sys_indexes), 8))->name));
-
- field = rec_get_nth_field(rec, 8, &len);
- page_no = mach_read_from_4(field);
-
- if (is_sys_table
- && ((type & DICT_CLUSTERED)
- || ((table == dict_sys->sys_tables)
- && (name_len == ut_strlen((char *) "ID_IND"))
- && (0 == ut_memcmp(name_buf, (char *) "ID_IND",
- name_len))))) {
-
- /* The index was created in memory already in
- booting */
- } else {
- index = dict_mem_index_create(table->name, name_buf,
- space, type, n_fields);
- index->page_no = page_no;
- index->id = id;
-
- dict_load_fields(table, index, heap);
-
- if (index->type & DICT_CLUSTERED == 0
- && NULL == dict_table_get_first_index(table)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to load index %s for table %s\n"
- "InnoDB: but the first index was not clustered\n",
- index->name, table->name);
- } else {
- dict_index_add_to_cache(table, index);
- }
- }
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/************************************************************************
-Loads definitions for index fields. */
-static
-void
-dict_load_fields(
-/*=============*/
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index whose fields to load */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
-{
- dict_table_t* sys_fields;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- char* col_name;
- rec_t* rec;
- byte* field;
- ulint len;
- byte* buf;
- ulint i;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- UT_NOT_USED(table);
-
- mtr_start(&mtr);
-
- sys_fields = dict_table_get_low((char *) "SYS_FIELDS");
- sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, index->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i < index->n_fields; i++) {
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
- ut_a(!rec_get_deleted_flag(rec));
-
- field = rec_get_nth_field(rec, 0, &len);
- ut_ad(len == 8);
- ut_a(ut_memcmp(buf, field, len) == 0);
-
- field = rec_get_nth_field(rec, 1, &len);
- ut_ad(len == 4);
- ut_a(i == mach_read_from_4(field));
-
- ut_a(0 == ut_strcmp((char *) "COL_NAME",
- dict_field_get_col(
- dict_index_get_nth_field(
- dict_table_get_first_index(sys_fields), 4))->name));
-
- field = rec_get_nth_field(rec, 4, &len);
-
- col_name = mem_heap_alloc(heap, len + 1);
- ut_memcpy(col_name, field, len);
- col_name[len] = '\0';
-
- dict_mem_index_add_field(index, col_name, 0);
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/************************************************************************
Loads foreign key constraint col names (also for the referenced table). */
static
void
diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c
index 52f46062065..9a4c94de885 100644
--- a/innobase/dict/dict0mem.c
+++ b/innobase/dict/dict0mem.c
@@ -65,6 +65,9 @@ dict_mem_table_create(
table->cached = FALSE;
+ table->mix_id = ut_dulint_zero;
+ table->mix_len = 0;
+
table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
* sizeof(dict_col_t));
UT_LIST_INIT(table->indexes);
diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c
index 35f3792f041..da4aee5db76 100644
--- a/innobase/fil/fil0fil.c
+++ b/innobase/fil/fil0fil.c
@@ -573,17 +573,20 @@ fil_read_flushed_lsn_and_arch_log_no(
ulint* max_arch_log_no) /* in/out: */
{
byte* buf;
+ byte* buf2;
dulint flushed_lsn;
ulint arch_log_no;
- buf = ut_malloc(UNIV_PAGE_SIZE);
-
+ buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
+ /* Align the memory for a possibel read from a raw device */
+ buf = ut_align(buf2, UNIV_PAGE_SIZE);
+
os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
arch_log_no = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO);
- ut_free(buf);
+ ut_free(buf2);
if (!one_read_already) {
*min_flushed_lsn = flushed_lsn;
diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c
index 08608731f2e..32e7f5bcad7 100644
--- a/innobase/fsp/fsp0fsp.c
+++ b/innobase/fsp/fsp0fsp.c
@@ -2608,6 +2608,7 @@ fseg_free_page_low(
ulint not_full_n_used;
ulint state;
ulint i;
+ char errbuf[200];
ut_ad(seg_inode && mtr);
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) ==
@@ -2621,8 +2622,25 @@ fseg_free_page_low(
descr = xdes_get_descriptor(space, page, mtr);
ut_a(descr);
- ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
- == FALSE);
+ if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+ != FALSE) {
+ ut_sprintf_buf(errbuf, descr, 40);
+ fprintf(stderr,
+"InnoDB: Dump of the tablespace extent descriptor: %s\n", errbuf);
+
+ fprintf(stderr,
+"InnoDB: Serious error! InnoDB is trying to free page %lu\n",
+"InnoDB: though it is already marked as free in the tablespace!\n"
+"InnoDB: The tablespace free space info is corrupt.\n"
+"InnoDB: You may need to dump your InnoDB tables and recreate the whole\n"
+"InnoDB: database!\n", page);
+
+ fprintf(stderr,
+"InnoDB: If the InnoDB recovery crashes here, see section 6.1\n"
+"InnoDB: of http://www.innodb.com/ibman.html about forcing recovery.\n");
+ ut_a(0);
+ }
+
state = xdes_get_state(descr, mtr);
if (state != XDES_FSEG) {
diff --git a/innobase/ha/ha0ha.c b/innobase/ha/ha0ha.c
index 3e4473126cf..c3ad6cdca76 100644
--- a/innobase/ha/ha0ha.c
+++ b/innobase/ha/ha0ha.c
@@ -194,7 +194,7 @@ ha_delete(
node = ha_search_with_data(table, fold, data);
- ut_ad(node);
+ ut_a(node);
ha_delete_hash_node(table, node);
}
@@ -232,6 +232,16 @@ ha_remove_all_nodes_to_page(
node = ha_chain_get_next(table, node);
}
}
+
+ /* Check that all nodes really got deleted */
+
+ node = ha_chain_get_first(table, fold);
+
+ while (node) {
+ ut_a(buf_frame_align(ha_node_get_data(node)) != page);
+
+ node = ha_chain_get_next(table, node);
+ }
}
/*****************************************************************
@@ -245,6 +255,7 @@ ha_validate(
{
hash_cell_t* cell;
ha_node_t* node;
+ ibool ok = TRUE;
ulint i;
for (i = 0; i < hash_get_n_cells(table); i++) {
@@ -254,13 +265,21 @@ ha_validate(
node = cell->node;
while (node) {
- ut_a(hash_calc_hash(node->fold, table) == i);
+ if (hash_calc_hash(node->fold, table) != i) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+"InnoDB: Error: hash table node fold value %lu does not\n"
+"InnoDB: match with the cell number %lu.\n",
+ node->fold, i);
+
+ ok = FALSE;
+ }
node = node->next;
}
}
- return(TRUE);
+ return(ok);
}
/*****************************************************************
@@ -269,16 +288,22 @@ Prints info of a hash table. */
void
ha_print_info(
/*==========*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end,/* in: buffer end */
hash_table_t* table) /* in: hash table */
{
hash_cell_t* cell;
- ha_node_t* node;
+/* ha_node_t* node; */
ulint nodes = 0;
ulint cells = 0;
ulint len = 0;
ulint max_len = 0;
ulint i;
+ if (buf_end - buf < 200) {
+ return;
+ }
+
for (i = 0; i < hash_get_n_cells(table); i++) {
cell = hash_get_nth_cell(table, i);
@@ -286,7 +311,7 @@ ha_print_info(
if (cell->node) {
cells++;
-
+/*
len = 0;
node = cell->node;
@@ -306,12 +331,10 @@ ha_print_info(
if (len > max_len) {
max_len = len;
}
+*/
}
}
- printf("Hash table size %lu, used cells %lu, nodes %lu\n",
- hash_get_n_cells(table), cells, nodes);
- printf("max chain length %lu\n", max_len);
-
- ut_a(ha_validate(table));
+ buf += sprintf(buf, "Hash table size %lu, used cells %lu\n",
+ hash_get_n_cells(table), cells);
}
diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c
index a6355ce7ca5..b7d691485cc 100644
--- a/innobase/ibuf/ibuf0ibuf.c
+++ b/innobase/ibuf/ibuf0ibuf.c
@@ -687,21 +687,21 @@ ibuf_bitmap_get_map_page(
/****************************************************************************
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap pag
+ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
UNIV_INLINE
void
ibuf_set_free_bits_low(
/*===================*/
ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is reset if the index is
- a non-clustered non-unique, and page level is 0 */
+ page_t* page, /* in: index page; free bit is set if the index is
+ non-clustered and page level is 0 */
ulint val, /* in: value to set: < 4 */
mtr_t* mtr) /* in: mtr */
{
page_t* bitmap_page;
- if (type & (DICT_CLUSTERED | DICT_UNIQUE)) {
+ if (type & DICT_CLUSTERED) {
return;
}
@@ -735,8 +735,8 @@ void
ibuf_set_free_bits(
/*===============*/
ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is reset if the index is
- a non-clustered non-unique, and page level is 0 */
+ page_t* page, /* in: index page; free bit is set if the index is
+ non-clustered and page level is 0 */
ulint val, /* in: value to set: < 4 */
ulint max_val)/* in: ULINT_UNDEFINED or a maximum value which
the bits must have before setting; this is for
@@ -745,7 +745,7 @@ ibuf_set_free_bits(
mtr_t mtr;
page_t* bitmap_page;
- if (type & (DICT_CLUSTERED | DICT_UNIQUE)) {
+ if (type & DICT_CLUSTERED) {
return;
}
@@ -2026,7 +2026,7 @@ ibuf_insert_low(
ulint n_stored;
ulint bits;
- ut_a(!(index->type & (DICT_UNIQUE | DICT_CLUSTERED)));
+ ut_a(!(index->type & DICT_CLUSTERED));
ut_ad(dtuple_check_typed(entry));
do_merge = FALSE;
@@ -2256,10 +2256,7 @@ ibuf_insert(
ut_ad(dtuple_check_typed(entry));
- if (index->type & DICT_CLUSTERED || index->type & DICT_UNIQUE) {
-
- return(FALSE);
- }
+ ut_a(!(index->type & DICT_CLUSTERED));
if (rec_get_converted_size(entry)
>= page_get_free_space_of_empty() / 2) {
@@ -2304,6 +2301,7 @@ ibuf_insert_to_index_page(
rec_t* rec;
page_t* bitmap_page;
ulint old_bits;
+ char errbuf[1000];
ut_ad(ibuf_inside());
ut_ad(dtuple_check_typed(entry));
@@ -2326,11 +2324,24 @@ ibuf_insert_to_index_page(
/* This time the record must fit */
if (!page_cur_tuple_insert(&page_cur, entry, mtr)) {
- printf(
- "Ibuf insert fails; page free %lu, dtuple size %lu\n",
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+"InnoDB: Error: Insert buffer insert fails; page free %lu, dtuple size %lu\n",
page_get_max_insert_size(page, 1),
rec_get_converted_size(entry));
+ dtuple_sprintf(errbuf, 900, entry);
+
+ fprintf(stderr,
+"InnoDB: Cannot insert index record %s\n", errbuf);
+
+ fprintf(stderr,
+"InnoDB: The table where where this index record belongs\n"
+"InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
+"InnoDB: that table.\n");
+
bitmap_page = ibuf_bitmap_get_map_page(
buf_frame_get_space_id(page),
buf_frame_get_page_no(page),
@@ -2341,9 +2352,11 @@ ibuf_insert_to_index_page(
buf_frame_get_page_no(page),
IBUF_BITMAP_FREE, mtr);
- printf("Bitmap bits %lu\n", old_bits);
-
- ut_error;
+ fprintf(stderr, "Bitmap bits %lu\n", old_bits);
+
+ fprintf(stderr,
+"InnoDB: Send a detailed bug report to mysql@lists.mysql.com!\n");
+
}
}
}
@@ -2692,22 +2705,30 @@ ibuf_validate_low(void)
Prints info of ibuf. */
void
-ibuf_print(void)
-/*============*/
+ibuf_print(
+/*=======*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end)/* in: buffer end */
{
ibuf_data_t* data;
#ifdef UNIV_IBUF_DEBUG
ulint i;
#endif
+ if (buf_end - buf < 500) {
+ return;
+ }
+
mutex_enter(&ibuf_mutex);
data = UT_LIST_GET_FIRST(ibuf->data_list);
while (data) {
- printf(
+ buf += sprintf(buf,
"Ibuf for space %lu: size %lu, free list len %lu, seg size %lu,\n",
data->space, data->size, data->free_list_len, data->seg_size);
- printf("%lu inserts, %lu merged recs, %lu merges\n",
+
+ buf += sprintf(buf,
+ "%lu inserts, %lu merged recs, %lu merges\n",
data->n_inserts, data->n_merged_recs, data->n_merges);
#ifdef UNIV_IBUF_DEBUG
for (i = 0; i < IBUF_COUNT_N_PAGES; i++) {
diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h
index d22f9d79c1c..bf433c0c264 100644
--- a/innobase/include/btr0btr.h
+++ b/innobase/include/btr0btr.h
@@ -204,16 +204,6 @@ btr_page_reorganize(
page_t* page, /* in: page to be reorganized */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
-Reorganizes an index page. */
-
-void
-btr_page_reorganize_low(
-/*====================*/
- ibool low, /* in: TRUE if locks should not be updated, i.e.,
- there cannot exist locks on the page */
- page_t* page, /* in: page to be reorganized */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to left. */
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h
index bce1f0685cc..b01cbd9a875 100644
--- a/innobase/include/btr0cur.h
+++ b/innobase/include/btr0cur.h
@@ -709,6 +709,9 @@ allowed to free an inherited external field. */
#define BTR_EXTERN_INHERITED_FLAG 64
extern ulint btr_cur_n_non_sea;
+extern ulint btr_cur_n_sea;
+extern ulint btr_cur_n_non_sea_old;
+extern ulint btr_cur_n_sea_old;
#ifndef UNIV_NONINL
#include "btr0cur.ic"
diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h
index fdf5cf375a3..14feca5d5c5 100644
--- a/innobase/include/btr0sea.h
+++ b/innobase/include/btr0sea.h
@@ -176,6 +176,7 @@ btr_search_validate(void);
/* The search info struct in an index */
struct btr_search_struct{
+ ulint magic_n; /* magic number */
/* The following 4 fields are currently not used: */
rec_t* last_search; /* pointer to the lower limit record of the
previous search; NULL if not known */
@@ -220,6 +221,8 @@ struct btr_search_struct{
ulint n_searches; /* number of searches */
};
+#define BTR_SEARCH_MAGIC_N 1112765
+
/* The hash index system */
typedef struct btr_search_sys_struct btr_search_sys_t;
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 5ddbf39335a..b80ed96f54c 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -219,6 +219,16 @@ buf_page_create(
a page */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
+Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
+
+void
+buf_page_init_for_backup_restore(
+/*=============================*/
+ ulint space, /* in: space id */
+ ulint offset, /* in: offset of the page within space
+ in units of a page */
+ buf_block_t* block); /* in: block to init */
+/************************************************************************
Decrements the bufferfix count of a buffer control block and releases
a latch, if specified. */
UNIV_INLINE
@@ -438,7 +448,7 @@ Prints info of the buffer pool data structure. */
void
buf_print(void);
-/*===========*/
+/*============*/
/*************************************************************************
Returns the number of pending buf pool ios. */
@@ -449,8 +459,10 @@ buf_get_n_pending_ios(void);
Prints info of the buffer i/o. */
void
-buf_print_io(void);
-/*==============*/
+buf_print_io(
+/*=========*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end);/* in: buffer end */
/*************************************************************************
Checks that all file pages in the buffer are in a replaceable state. */
@@ -605,6 +617,7 @@ struct buf_block_struct{
/* 1. General fields */
+ ulint magic_n; /* magic number to check */
ulint state; /* state of the control block:
BUF_BLOCK_NOT_USED, ... */
byte* frame; /* pointer to buffer frame which
@@ -729,6 +742,8 @@ struct buf_block_struct{
frees a page in buffer pool */
};
+#define BUF_BLOCK_MAGIC_N 41526563
+
/* The buffer pool structure. NOTE! The definition appears here only for
other modules of this directory (buf) to see it. Do not use from outside! */
diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
index 52bee0eb282..e5a2c480922 100644
--- a/innobase/include/buf0buf.ic
+++ b/innobase/include/buf0buf.ic
@@ -209,7 +209,7 @@ buf_block_align(
ut_ad((ulint)ptr >= (ulint)frame_zero);
- block = buf_pool_get_nth_block(buf_pool, (ptr - frame_zero)
+ block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
>> UNIV_PAGE_SIZE_SHIFT);
ut_a(block >= buf_pool->blocks);
ut_a(block < buf_pool->blocks + buf_pool->max_size);
@@ -236,7 +236,7 @@ buf_block_align_low(
ut_ad((ulint)ptr >= (ulint)frame_zero);
- block = buf_pool_get_nth_block(buf_pool, (ptr - frame_zero)
+ block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
>> UNIV_PAGE_SIZE_SHIFT);
ut_a(block >= buf_pool->blocks);
ut_a(block < buf_pool->blocks + buf_pool->max_size);
diff --git a/innobase/include/buf0rea.h b/innobase/include/buf0rea.h
index 1efe67369ab..aed965a6b21 100644
--- a/innobase/include/buf0rea.h
+++ b/innobase/include/buf0rea.h
@@ -89,7 +89,7 @@ buf_read_recv_pages(
/* The size in pages of the area which the read-ahead algorithms read if
invoked */
-#define BUF_READ_AHEAD_AREA ut_min(32, buf_pool->curr_size / 16)
+#define BUF_READ_AHEAD_AREA ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
/* Modes used in read-ahead */
#define BUF_READ_IBUF_PAGES_ONLY 131
diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h
index c19d7ea5552..e0fb06e5018 100644
--- a/innobase/include/data0data.h
+++ b/innobase/include/data0data.h
@@ -123,7 +123,7 @@ dfield_datas_are_binary_equal(
dfield_t* field2);/* in: field */
/*************************************************************************
Tests if dfield data length and content is equal to the given. */
-UNIV_INLINE
+
ibool
dfield_data_is_binary_equal(
/*========================*/
@@ -279,6 +279,14 @@ dtuple_check_typed(
/* out: TRUE if ok */
dtuple_t* tuple); /* in: tuple */
/**************************************************************
+Checks that a data tuple is typed. */
+
+ibool
+dtuple_check_typed_no_assert(
+/*=========================*/
+ /* out: TRUE if ok */
+ dtuple_t* tuple); /* in: tuple */
+/**************************************************************
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set. */
diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic
index 0750a3894d1..d356664df21 100644
--- a/innobase/include/data0data.ic
+++ b/innobase/include/data0data.ic
@@ -154,30 +154,6 @@ dfield_datas_are_binary_equal(
}
/*************************************************************************
-Tests if dfield data length and content is equal to the given. */
-UNIV_INLINE
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
- /* out: TRUE if equal */
- dfield_t* field, /* in: field */
- ulint len, /* in: data length or UNIV_SQL_NULL */
- byte* data) /* in: data */
-{
- if (len != field->len) {
-
- return(FALSE);
- }
-
- if (len != UNIV_SQL_NULL && 0 != ut_memcmp(field->data, data, len)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
Gets info bits in a data tuple. */
UNIV_INLINE
ulint
diff --git a/innobase/include/ha0ha.h b/innobase/include/ha0ha.h
index aeed7c32eff..945b1198a41 100644
--- a/innobase/include/ha0ha.h
+++ b/innobase/include/ha0ha.h
@@ -127,6 +127,8 @@ Prints info of a hash table. */
void
ha_print_info(
/*==========*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end,/* in: buffer end */
hash_table_t* table); /* in: hash table */
diff --git a/innobase/include/ibuf0ibuf.h b/innobase/include/ibuf0ibuf.h
index fac28461be4..a290e90e4db 100644
--- a/innobase/include/ibuf0ibuf.h
+++ b/innobase/include/ibuf0ibuf.h
@@ -269,8 +269,10 @@ ibuf_count_get(
Prints info of ibuf. */
void
-ibuf_print(void);
-/*============*/
+ibuf_print(
+/*=======*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end);/* in: buffer end */
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h
index 5a15b78b869..80afba97416 100644
--- a/innobase/include/lock0lock.h
+++ b/innobase/include/lock0lock.h
@@ -460,6 +460,8 @@ Prints info of a table lock. */
void
lock_table_print(
/*=============*/
+ char* buf, /* in/out: buffer where to print, must be at least
+ 500 bytes */
lock_t* lock); /* in: table type lock */
/*************************************************************************
Prints info of a record lock. */
@@ -467,13 +469,17 @@ Prints info of a record lock. */
void
lock_rec_print(
/*===========*/
+ char* buf, /* in/out: buffer where to print, must be at least
+ 500 bytes */
lock_t* lock); /* in: record type lock */
/*************************************************************************
Prints info of locks for all transactions. */
void
-lock_print_info(void);
-/*=================*/
+lock_print_info(
+/*============*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end);/* in: buffer end */
/*************************************************************************
Validates the lock queue on a table. */
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
index eeb4f2e45f1..5d848b85658 100644
--- a/innobase/include/log0log.h
+++ b/innobase/include/log0log.h
@@ -157,6 +157,14 @@ log_io_complete(
/*============*/
log_group_t* group); /* in: log group */
/**********************************************************
+Flushes the log files to the disk, using, for example, the Unix fsync.
+This function does the flush even if the user has set
+srv_flush_log_at_trx_commit = FALSE. */
+
+void
+log_flush_to_disk(void);
+/*===================*/
+/**********************************************************
This function is called, e.g., when a transaction wants to commit. It checks
that the log has been flushed to disk up to the last log entry written by the
transaction. If there is a flush running, it waits and checks if the flush
@@ -260,7 +268,9 @@ log_reset_first_header_and_checkpoint(
/*==================================*/
byte* hdr_buf,/* in: buffer which will be written to the start
of the first log file */
- dulint lsn); /* in: lsn of the start of the first log file */
+ dulint start); /* in: lsn of the start of the first log file;
+ we pretend that there is a checkpoint at
+ start + LOG_BLOCK_HDR_SIZE */
/************************************************************************
Starts an archiving operation. */
@@ -463,6 +473,15 @@ log_block_init(
byte* log_block, /* in: pointer to the log buffer */
dulint lsn); /* in: lsn within the log block */
/****************************************************************
+Initializes a log block in the log buffer in the old, < 3.23.52 format, where
+there was no checksum yet. */
+UNIV_INLINE
+void
+log_block_init_in_old_format(
+/*=========================*/
+ byte* log_block, /* in: pointer to the log buffer */
+ dulint lsn); /* in: lsn within the log block */
+/****************************************************************
Converts a lsn to a log block number. */
UNIV_INLINE
ulint
@@ -474,8 +493,10 @@ log_block_convert_lsn_to_no(
Prints info of the log. */
void
-log_print(void);
-/*===========*/
+log_print(
+/*======*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end);/* in: buffer end */
extern log_t* log_sys;
@@ -523,7 +544,10 @@ extern log_t* log_sys;
bytes */
/* Offsets of a log block trailer from the end of the block */
-#define LOG_BLOCK_TRL_NO 4 /* log block number */
+#define LOG_BLOCK_TRL_CHECKSUM 4 /* 1 byte checksum of the log block
+ contents */
+#define LOG_BLOCK_TRL_NO 3 /* 3 lowest bytes of the log block
+ number */
#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */
/* Offsets for a checkpoint field */
@@ -558,11 +582,22 @@ extern log_t* log_sys;
#define LOG_GROUP_ID 0 /* log group number */
#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this
log file */
-#define LOG_FILE_NO 12 /* 4-byte archived log file number */
+#define LOG_FILE_NO 12 /* 4-byte archived log file number;
+ this field is only defined in an
+ archived log file */
+#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16
+ /* a 32-byte field which contains
+ the string 'ibbackup' and the
+ creation time if the log file was
+ created by ibbackup --restore;
+ when mysqld is first time started
+ on the restored database, it can
+ print helpful info for the user */
#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
/* this 4-byte field is TRUE when
the writing of an archived log file
- has been completed */
+ has been completed; this field is
+ only defined in an archived log file */
#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4)
/* lsn where the archived log file
at least extends: actually the
@@ -572,7 +607,14 @@ extern log_t* log_sys;
is defined only when an archived log
file has been completely written */
#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE
+ /* first checkpoint field in the log
+ header; we write alternately to the
+ checkpoint fields when we make new
+ checkpoints; this field is only defined
+ in the first log file of a log group */
#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE)
+ /* second checkpoint field in the log
+ header */
#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
#define LOG_GROUP_OK 301
@@ -678,7 +720,7 @@ struct log_struct{
write i/o has been completed for all
log groups */
dulint flush_lsn; /* end lsn for the current flush */
- ulint flush_end_offset;/* the data in buffer ha been flushed
+ ulint flush_end_offset;/* the data in buffer has been flushed
up to this offset when the current
flush ends: this field will then
be copied to buf_next_to_write */
diff --git a/innobase/include/log0log.ic b/innobase/include/log0log.ic
index e5c313d129b..36e65239374 100644
--- a/innobase/include/log0log.ic
+++ b/innobase/include/log0log.ic
@@ -179,7 +179,7 @@ log_block_get_trl_no(
trailer */
byte* log_block) /* in: log block */
{
- return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
+ return(mach_read_from_3(log_block + OS_FILE_LOG_BLOCK_SIZE
- LOG_BLOCK_TRL_NO));
}
@@ -192,8 +192,8 @@ log_block_set_trl_no(
byte* log_block, /* in: log block */
ulint n) /* in: log block number */
{
- mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_NO,
- n);
+ mach_write_to_3(log_block + OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_NO,
+ n & 0xFFFFFF);
}
/****************************************************************
@@ -237,6 +237,29 @@ log_block_init(
log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
log_block_set_first_rec_group(log_block, 0);
}
+
+/****************************************************************
+Initializes a log block in the log buffer in the old format, where there
+was no checksum yet. */
+UNIV_INLINE
+void
+log_block_init_in_old_format(
+/*=========================*/
+ byte* log_block, /* in: pointer to the log buffer */
+ dulint lsn) /* in: lsn within the log block */
+{
+ ulint no;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ no = log_block_convert_lsn_to_no(lsn);
+
+ log_block_set_hdr_no(log_block, no);
+ mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_NO - 1, no);
+ log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
+ log_block_set_first_rec_group(log_block, 0);
+}
/****************************************************************
Writes to the log the string given. The log must be released with
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
index 01fa12955ff..b7911c5014a 100644
--- a/innobase/include/os0file.h
+++ b/innobase/include/os0file.h
@@ -16,6 +16,7 @@ Created 10/21/1995 Heikki Tuuri
os_file_write */
extern ibool os_do_not_call_flush_at_each_write;
extern ibool os_has_said_disk_full;
+extern ibool os_aio_print_debug;
#ifdef __WIN__
@@ -33,6 +34,8 @@ extern ibool os_has_said_disk_full;
typedef int os_file_t;
#endif
+extern ulint os_innodb_umask;
+
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads */
@@ -309,6 +312,15 @@ Wakes up simulated aio i/o-handler threads if they have something to do. */
void
os_aio_simulated_wake_handler_threads(void);
/*=======================================*/
+/**************************************************************************
+This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+
+void
+os_aio_simulated_put_read_threads_to_sleep(void);
+/*============================================*/
#ifdef WIN_ASYNC_IO
/**************************************************************************
@@ -391,8 +403,10 @@ os_aio_validate(void);
Prints info of the aio arrays. */
void
-os_aio_print(void);
-/*==============*/
+os_aio_print(
+/*=========*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end);/* in: buffer end */
/**************************************************************************
Checks that all slots in the system have been freed, that is, there are
no pending io operations. */
diff --git a/innobase/include/os0thread.h b/innobase/include/os0thread.h
index 2e4b6f0f6ee..0d6fa5a8f37 100644
--- a/innobase/include/os0thread.h
+++ b/innobase/include/os0thread.h
@@ -12,8 +12,10 @@ Created 9/8/1995 Heikki Tuuri
#include "univ.i"
-/* Maximum number of threads which can be created in the program */
-#define OS_THREAD_MAX_N 1000
+/* Maximum number of threads which can be created in the program;
+this is also the size of the wait slot array for MySQL threads which
+can wait inside InnoDB */
+#define OS_THREAD_MAX_N 10000
/* Possible fixed priorities for threads */
#define OS_THREAD_PRIORITY_NONE 100
diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h
index 8e68381b868..2f77127466f 100644
--- a/innobase/include/page0page.h
+++ b/innobase/include/page0page.h
@@ -328,7 +328,7 @@ page_dir_calc_reserved_space(
ulint n_recs); /* in: number of records */
/*******************************************************************
Looks for the directory slot which owns the given record. */
-UNIV_INLINE
+
ulint
page_dir_find_owner_slot(
/*=====================*/
diff --git a/innobase/include/page0page.ic b/innobase/include/page0page.ic
index f84fe5a5606..e7c0f8ee07c 100644
--- a/innobase/include/page0page.ic
+++ b/innobase/include/page0page.ic
@@ -479,6 +479,8 @@ page_rec_get_next(
offs = rec_get_next_offs(rec);
+ ut_a(offs < UNIV_PAGE_SIZE);
+
if (offs == 0) {
return(NULL);
@@ -487,40 +489,6 @@ page_rec_get_next(
return(page + offs);
}
-/*******************************************************************
-Looks for the directory slot which owns the given record. */
-UNIV_INLINE
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
- /* out: the directory slot number */
- rec_t* rec) /* in: the physical record */
-{
- ulint i;
- page_t* page;
- page_dir_slot_t* slot;
-
- ut_ad(page_rec_check(rec));
-
- while (rec_get_n_owned(rec) == 0) {
- rec = page_rec_get_next(rec);
- }
-
- page = buf_frame_align(rec);
-
- i = page_dir_get_n_slots(page) - 1;
- slot = page_dir_get_nth_slot(page, i);
-
- while (page_dir_slot_get_rec(slot) != rec) {
- ut_a(i > 0);
-
- i--;
- slot = page_dir_get_nth_slot(page, i);
- }
-
- return(i);
-}
-
/****************************************************************
Sets the pointer to the next record on the page. */
UNIV_INLINE
@@ -534,7 +502,7 @@ page_rec_set_next(
page_t* page;
ut_ad(page_rec_check(rec));
- ut_ad((next == NULL)
+ ut_a((next == NULL)
|| (buf_frame_align(rec) == buf_frame_align(next)));
page = buf_frame_align(rec);
@@ -573,7 +541,7 @@ page_rec_get_prev(
slot_no = page_dir_find_owner_slot(rec);
- ut_ad(slot_no != 0);
+ ut_a(slot_no != 0);
slot = page_dir_get_nth_slot(page, slot_no - 1);
@@ -584,7 +552,7 @@ page_rec_get_prev(
rec2 = page_rec_get_next(rec2);
}
- ut_ad(prev_rec);
+ ut_a(prev_rec);
return(prev_rec);
}
diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic
index 6b96e3056fa..aaa3c58a003 100644
--- a/innobase/include/rem0rec.ic
+++ b/innobase/include/rem0rec.ic
@@ -970,8 +970,6 @@ rec_fold(
ut_ad(n_fields <= rec_get_n_fields(rec));
ut_ad((n_fields < rec_get_n_fields(rec)) || (n_bytes == 0));
ut_ad(n_fields + n_bytes > 0);
- /* Only the page supremum and infimum records have 1 field: */
- ut_ad(rec_get_n_fields(rec) > 1);
n_fields_rec = rec_get_n_fields(rec);
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index 13b3dffd874..8152c534f48 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -230,6 +230,19 @@ row_update_cascade_for_mysql(
or set null operation */
dict_table_t* table); /* in: table where we do the operation */
/*************************************************************************
+Locks the data dictionary exclusively for performing a table create
+operation. */
+
+void
+row_mysql_lock_data_dictionary(void);
+/*================================*/
+/*************************************************************************
+Unlocks the data dictionary exclusively lock. */
+
+void
+row_mysql_unlock_data_dictionary(void);
+/*==================================*/
+/*************************************************************************
Does a table creation operation for MySQL. If the name of the created
table ends to characters INNODB_MONITOR, then this also starts
printing of monitor output by the master thread. */
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index 6777a24e7db..178c7b6971f 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -357,6 +357,14 @@ srv_error_monitor_thread(
/* out: a dummy parameter */
void* arg); /* in: a dummy parameter required by
os_thread_create */
+/**********************************************************************
+Sprintfs to a buffer the output of the InnoDB Monitor. */
+
+void
+srv_sprintf_innodb_monitor(
+/*=======================*/
+ char* buf, /* in/out: buffer which must be at least 4 kB */
+ ulint len); /* in: length of the buffer */
/* Types for the threads existing in the system. Threads of types 4 - 9
diff --git a/innobase/include/sync0arr.h b/innobase/include/sync0arr.h
index f0134894997..765ad33afea 100644
--- a/innobase/include/sync0arr.h
+++ b/innobase/include/sync0arr.h
@@ -114,6 +114,8 @@ Prints info of the wait array. */
void
sync_array_print_info(
/*==================*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end,/* in: buffer end */
sync_array_t* arr); /* in: wait array */
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
index 4f55709a5d7..5bfa0bc2d48 100644
--- a/innobase/include/sync0sync.h
+++ b/innobase/include/sync0sync.h
@@ -117,14 +117,18 @@ FUNCTION PROTOTYPES FOR DEBUGGING */
Prints wait info of the sync system. */
void
-sync_print_wait_info(void);
-/*======================*/
+sync_print_wait_info(
+/*=================*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end); /* in: buffer end */
/***********************************************************************
Prints info of the sync system. */
void
-sync_print(void);
-/*============*/
+sync_print(
+/*=======*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end); /* in: buffer end */
/**********************************************************************
Checks that the mutex has been initialized. */
diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h
index c456768e820..820af4cd014 100644
--- a/innobase/include/trx0roll.h
+++ b/innobase/include/trx0roll.h
@@ -102,11 +102,13 @@ trx_rollback(
calling function can start running
a new query thread */
/***********************************************************************
-Rollback uncommitted transactions which have no user session. */
+Rollback or clean up transactions which have no user session. If the
+transaction already was committed, then we clean up a possible insert
+undo log. If the transaction was not yet committed, then we roll it back. */
void
-trx_rollback_all_without_sess(void);
-/*===============================*/
+trx_rollback_or_clean_all_without_sess(void);
+/*========================================*/
/********************************************************************
Finishes a transaction rollback. */
diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h
index 60d5adb72d1..b08df7f6901 100644
--- a/innobase/include/trx0sys.h
+++ b/innobase/include/trx0sys.h
@@ -24,6 +24,14 @@ Created 3/26/1996 Heikki Tuuri
#include "fsp0fsp.h"
#include "read0types.h"
+/* In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. We have successfully got the updates to InnoDB
+up to this position. If .._pos is -1, it means no crash recovery was needed,
+or there was no master log position info inside InnoDB. */
+
+extern char trx_sys_mysql_master_log_name[];
+extern ib_longlong trx_sys_mysql_master_log_pos;
+
/* The transaction system */
extern trx_sys_t* trx_sys;
@@ -229,13 +237,18 @@ trx_in_trx_list(
trx_t* in_trx);/* in: trx */
/*********************************************************************
Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. */
+which corresponds to the transaction just being committed. In a MySQL
+replication slave updates the latest master binlog position up to which
+replication has proceeded. */
void
trx_sys_update_mysql_binlog_offset(
/*===============================*/
- trx_t* trx, /* in: transaction being committed */
- mtr_t* mtr); /* in: mtr */
+ char* file_name,/* in: MySQL log file name */
+ ib_longlong offset, /* in: position in that log file */
+ ulint field, /* in: offset of the MySQL log info field in
+ the trx sys header */
+ mtr_t* mtr); /* in: mtr */
/*********************************************************************
Prints to stderr the MySQL binlog offset info in the trx system header if
the magic number shows it valid. */
@@ -243,15 +256,17 @@ the magic number shows it valid. */
void
trx_sys_print_mysql_binlog_offset(void);
/*===================================*/
+/*********************************************************************
+Prints to stderr the MySQL master log offset info in the trx system header if
+the magic number shows it valid. */
+
+void
+trx_sys_print_mysql_master_log_pos(void);
+/*====================================*/
/* The automatically created system rollback segment has this id */
#define TRX_SYS_SYSTEM_RSEG_ID 0
-/* Max number of rollback segments: the number of segment specification slots
-in the transaction system array; rollback segment id must fit in one byte,
-therefore 256 */
-#define TRX_SYS_N_RSEGS 256
-
/* Space id and page no where the trx system file copy resides */
#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
@@ -277,22 +292,29 @@ therefore 256 */
segment specification slots */
/*-------------------------------------------------------------*/
-#define TRX_SYS_MYSQL_LOG_NAME_LEN 32
+/* Max number of rollback segments: the number of segment specification slots
+in the transaction system array; rollback segment id must fit in one byte,
+therefore 256; each slot is currently 8 bytes in size */
+#define TRX_SYS_N_RSEGS 256
+
+#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
+/* The offset of the MySQL replication info on the trx system header page;
+this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
+#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
+
/* The offset of the MySQL binlog offset info on the trx system header page */
-#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 300)
+#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /* magic number which shows
if we have valid data in the
MySQL binlog info; the value
is ..._MAGIC_N if yes */
-#define TRX_SYS_MYSQL_LOG_NAME 4 /* MySQL log file name */
-#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH (4 + TRX_SYS_MYSQL_LOG_NAME_LEN)
- /* high 4 bytes of the offset
+#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /* high 4 bytes of the offset
within that file */
-#define TRX_SYS_MYSQL_LOG_OFFSET_LOW (8 + TRX_SYS_MYSQL_LOG_NAME_LEN)
- /* low 4 bytes of the offset
+#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /* low 4 bytes of the offset
within that file */
+#define TRX_SYS_MYSQL_LOG_NAME 12 /* MySQL log file name */
/* The offset of the doublewrite buffer header on the trx system header page */
#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
index 261f33d3dc3..83789966514 100644
--- a/innobase/include/trx0trx.h
+++ b/innobase/include/trx0trx.h
@@ -124,6 +124,15 @@ void
trx_commit_off_kernel(
/*==================*/
trx_t* trx); /* in: transaction */
+/********************************************************************
+Cleans up a transaction at database startup. The cleanup is needed if
+the transaction already got to the middle of a commit when the database
+crashed, andf we cannot roll it back. */
+
+void
+trx_cleanup_at_db_startup(
+/*======================*/
+ trx_t* trx); /* in: transaction */
/**************************************************************************
Does the transaction commit for MySQL. */
@@ -252,7 +261,9 @@ own the kernel mutex. */
void
trx_print(
/*======*/
- trx_t* trx); /* in: transaction */
+ char* buf, /* in/out: buffer where to print, must be at least
+ 500 bytes */
+ trx_t* trx); /* in: transaction */
/* Signal to a transaction */
@@ -322,13 +333,24 @@ struct trx_struct{
void* mysql_thd; /* MySQL thread handle corresponding
to this trx, or NULL */
char* mysql_log_file_name;
- /* If MySQL binlog is used, this field
+ /* if MySQL binlog is used, this field
contains a pointer to the latest file
name; this is NULL if binlog is not
used */
- ib_longlong mysql_log_offset;/* If MySQL binlog is used, this field
+ ib_longlong mysql_log_offset;/* if MySQL binlog is used, this field
contains the end offset of the binlog
entry */
+ char* mysql_master_log_file_name;
+ /* if the database server is a MySQL
+ replication slave, we have here the
+ master binlog name up to which
+ replication has processed; otherwise
+ this is a pointer to a null character */
+ ib_longlong mysql_master_log_pos;
+ /* if the database server is a MySQL
+ replication slave, this is the
+ position in the log file up to which
+ replication has processed */
os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
with this transaction object */
/*------------------------------*/
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
index 160a435319a..c852741d5ac 100644
--- a/innobase/include/univ.i
+++ b/innobase/include/univ.i
@@ -9,41 +9,26 @@ Created 1/20/1994 Heikki Tuuri
#ifndef univ_i
#define univ_i
-#if (defined(_WIN32) || defined(_WIN64)) && !defined(MYSQL_SERVER)
+#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER)
#define __WIN__
#include <windows.h>
-/* When compiling for Itanium IA64, undefine the flag below to prevent use
-of 32-bit assembler */
-
-#ifndef WIN64
+#if !defined(WIN64) && !defined(_WIN64)
#define UNIV_CAN_USE_X86_ASSEMBLER
#endif
-/* If you want to check for errors with compiler level -W4,
-comment out the above include of windows.h and let the following defines
-be defined:
-#define HANDLE void*
-#define CRITICAL_SECTION ulint
-*/
-
#ifdef _NT_
#define __NT__
#endif
#else
-/* The Unix version */
-
-/* Most C compilers other than gcc do not know 'extern inline' */
-#if !defined(__GNUC__) && !defined(__WIN__)
-#undef UNIV_MUST_NOT_INLINE
-#define UNIV_MUST_NOT_INLINE
-#endif
+/* The defines used with MySQL */
/* Include two header files from MySQL to make the Unix flavor used
-in compiling more Posix-compatible. We assume that 'innobase' is a
-subdirectory of 'mysql'. */
+in compiling more Posix-compatible. These headers also define __WIN__
+if we are compiling on Windows. */
+
#include <my_global.h>
#include <my_pthread.h>
@@ -60,6 +45,20 @@ subdirectory of 'mysql'. */
#include <sched.h>
#endif
+/* When compiling for Itanium IA64, undefine the flag below to prevent use
+of the 32-bit x86 assembler in mutex operations. */
+
+#if defined(__WIN__) && !defined(WIN64) && !defined(_WIN64)
+#define UNIV_CAN_USE_X86_ASSEMBLER
+#endif
+
+/* We only try to do explicit inlining of functions with gcc and
+Microsoft Visual C++ */
+
+#if !defined(__GNUC__) && !defined(__WIN__)
+#define UNIV_MUST_NOT_INLINE
+#endif
+
#ifdef HAVE_PREAD
#define HAVE_PWRITE
#endif
diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h
index 338460d7de9..408788016c1 100644
--- a/innobase/include/ut0ut.h
+++ b/innobase/include/ut0ut.h
@@ -114,7 +114,7 @@ ut_2_exp(
ulint n); /* in: number */
/*****************************************************************
Calculates fast the number rounded up to the nearest power of 2. */
-UNIV_INLINE
+
ulint
ut_2_power_up(
/*==========*/
@@ -155,6 +155,13 @@ ut_print_timestamp(
/*===============*/
FILE* file); /* in: file where to print */
/**************************************************************
+Sprintfs a timestamp to a buffer. */
+
+void
+ut_sprintf_timestamp(
+/*=================*/
+ char* buf); /* in: buffer where to sprintf */
+/**************************************************************
Returns current year, month, day. */
void
diff --git a/innobase/include/ut0ut.ic b/innobase/include/ut0ut.ic
index 90f25d2b382..9d7dd283f29 100644
--- a/innobase/include/ut0ut.ic
+++ b/innobase/include/ut0ut.ic
@@ -172,25 +172,3 @@ ut_2_exp(
{
return(1 << n);
}
-
-/*****************************************************************
-Calculates fast the number rounded up to the nearest power of 2. */
-UNIV_INLINE
-ulint
-ut_2_power_up(
-/*==========*/
- /* out: first power of 2 which is >= n */
- ulint n) /* in: number != 0 */
-{
- ulint res;
-
- res = 1;
-
- ut_ad(n > 0);
-
- while (res < n) {
- res = res * 2;
- }
-
- return(res);
-}
diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c
index b168ba41771..7588a576a86 100644
--- a/innobase/lock/lock0lock.c
+++ b/innobase/lock/lock0lock.c
@@ -1542,6 +1542,15 @@ lock_rec_enqueue_waiting(
trx = thr_get_trx(thr);
+ if (trx->dict_operation) {
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: Error: a record lock wait happens in a dictionary operation!\n"
+"InnoDB: Table name %s. Send a bug report to mysql@lists.mysql.com\n",
+index->table_name);
+ }
+
/* Enqueue the lock request that will wait to be granted */
lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx);
@@ -2915,7 +2924,7 @@ lock_table_enqueue_waiting(
trx_t* trx;
ut_ad(mutex_own(&kernel_mutex));
-
+
/* Test if there already is some other reason to suspend thread:
we do not enqueue a lock request if the query thread should be
stopped anyway */
@@ -2927,6 +2936,15 @@ lock_table_enqueue_waiting(
}
trx = thr_get_trx(thr);
+
+ if (trx->dict_operation) {
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: Error: a table lock wait happens in a dictionary operation!\n"
+"InnoDB: Table name %s. Send a bug report to mysql@lists.mysql.com\n",
+table->name);
+ }
/* Enqueue the lock request that will wait to be granted */
@@ -3309,34 +3327,37 @@ Prints info of a table lock. */
void
lock_table_print(
/*=============*/
+ char* buf, /* in/out: buffer where to print, must be at least
+ 500 bytes */
lock_t* lock) /* in: table type lock */
{
ut_ad(mutex_own(&kernel_mutex));
ut_a(lock_get_type(lock) == LOCK_TABLE);
- printf("TABLE LOCK table %s trx id %lu %lu",
+ buf += sprintf(buf, "TABLE LOCK table %s trx id %lu %lu",
lock->un_member.tab_lock.table->name,
(lock->trx)->id.high, (lock->trx)->id.low);
if (lock_get_mode(lock) == LOCK_S) {
- printf(" lock mode S");
+ buf += sprintf(buf, " lock mode S");
} else if (lock_get_mode(lock) == LOCK_X) {
- printf(" lock_mode X");
+ buf += sprintf(buf, " lock_mode X");
} else if (lock_get_mode(lock) == LOCK_IS) {
- printf(" lock_mode IS");
+ buf += sprintf(buf, " lock_mode IS");
} else if (lock_get_mode(lock) == LOCK_IX) {
- printf(" lock_mode IX");
+ buf += sprintf(buf, " lock_mode IX");
} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
- printf(" lock_mode AUTO-INC");
+ buf += sprintf(buf, " lock_mode AUTO-INC");
} else {
- printf(" unknown lock_mode %lu", lock_get_mode(lock));
+ buf += sprintf(buf,
+ " unknown lock_mode %lu", lock_get_mode(lock));
}
if (lock_get_wait(lock)) {
- printf(" waiting");
+ buf += sprintf(buf, " waiting");
}
- printf("\n");
+ buf += sprintf(buf, "\n");
}
/*************************************************************************
@@ -3345,6 +3366,8 @@ Prints info of a record lock. */
void
lock_rec_print(
/*===========*/
+ char* buf, /* in/out: buffer where to print, must be at least
+ 500 bytes */
lock_t* lock) /* in: record type lock */
{
page_t* page;
@@ -3352,8 +3375,7 @@ lock_rec_print(
ulint page_no;
ulint i;
ulint count = 0;
- ulint len;
- char buf[200];
+ char* buf_start = buf;
mtr_t mtr;
ut_ad(mutex_own(&kernel_mutex));
@@ -3362,32 +3384,32 @@ lock_rec_print(
space = lock->un_member.rec_lock.space;
page_no = lock->un_member.rec_lock.page_no;
- printf("RECORD LOCKS space id %lu page no %lu n bits %lu",
+ buf += sprintf(buf, "RECORD LOCKS space id %lu page no %lu n bits %lu",
space, page_no, lock_rec_get_n_bits(lock));
- printf(" table %s index %s trx id %lu %lu",
+ buf += sprintf(buf, " table %s index %s trx id %lu %lu",
lock->index->table->name, lock->index->name,
(lock->trx)->id.high, (lock->trx)->id.low);
if (lock_get_mode(lock) == LOCK_S) {
- printf(" lock mode S");
+ buf += sprintf(buf, " lock mode S");
} else if (lock_get_mode(lock) == LOCK_X) {
- printf(" lock_mode X");
+ buf += sprintf(buf, " lock_mode X");
} else {
ut_error;
}
if (lock_rec_get_gap(lock)) {
- printf(" gap type lock");
+ buf += sprintf(buf, " gap type lock");
}
if (lock_get_wait(lock)) {
- printf(" waiting");
+ buf += sprintf(buf, " waiting");
}
mtr_start(&mtr);
- printf("\n");
+ buf += sprintf(buf, "\n");
/* If the page is not in the buffer pool, we cannot load it
because we have the kernel mutex and ibuf operations would
@@ -3406,28 +3428,28 @@ lock_rec_print(
for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+ if (buf - buf_start > 300) {
+
+ buf += sprintf(buf,
+ "Suppressing further record lock prints for this page\n");
+ return;
+ }
+
if (lock_rec_get_nth_bit(lock, i)) {
- printf("Record lock, heap no %lu ", i);
+ buf += sprintf(buf, "Record lock, heap no %lu ", i);
if (page) {
- len = rec_sprintf(buf, 190,
+ buf += rec_sprintf(buf, 120,
page_find_rec_with_heap_no(page, i));
- buf[len] = '\0';
- printf("%s", buf);
+ *buf = '\0';
}
- printf("\n");
+ buf += sprintf(buf, "\n");
count++;
}
-
- if (count >= 3) {
- printf(
- "3 LOCKS PRINTED FOR THIS TRX AND PAGE: SUPPRESSING FURTHER PRINTS\n");
- goto end_prints;
- }
}
-end_prints:
+
mtr_commit(&mtr);
}
@@ -3462,8 +3484,10 @@ lock_get_n_rec_locks(void)
Prints info of locks for all transactions. */
void
-lock_print_info(void)
-/*=================*/
+lock_print_info(
+/*============*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end)/* in: buffer end */
{
lock_t* lock;
trx_t* trx;
@@ -3476,11 +3500,15 @@ lock_print_info(void)
ulint i;
mtr_t mtr;
- printf("Trx id counter %lu %lu\n",
+ if (buf_end - buf < 600) {
+ return;
+ }
+
+ buf += sprintf(buf, "Trx id counter %lu %lu\n",
ut_dulint_get_high(trx_sys->max_trx_id),
ut_dulint_get_low(trx_sys->max_trx_id));
- printf(
+ buf += sprintf(buf,
"Purge done for trx's n:o < %lu %lu undo n:o < %lu %lu\n",
ut_dulint_get_high(purge_sys->purge_trx_no),
ut_dulint_get_low(purge_sys->purge_trx_no),
@@ -3489,7 +3517,8 @@ lock_print_info(void)
lock_mutex_enter_kernel();
- printf("Total number of lock structs in row lock hash table %lu\n",
+ buf += sprintf(buf,
+ "Total number of lock structs in row lock hash table %lu\n",
lock_get_n_rec_locks());
/* First print info on non-active transactions */
@@ -3497,9 +3526,15 @@ lock_print_info(void)
trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
while (trx) {
+ if (buf_end - buf < 600) {
+ return;
+ }
+
if (trx->conc_state == TRX_NOT_STARTED) {
- printf("---");
- trx_print(trx);
+ buf += sprintf(buf, "---");
+ trx_print(buf, trx);
+
+ buf += strlen(buf);
}
trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
@@ -3528,12 +3563,22 @@ loop:
return;
}
+ if (buf_end - buf < 600) {
+ return;
+ }
+
if (nth_lock == 0) {
- printf("---");
- trx_print(trx);
+ buf += sprintf(buf, "---");
+ trx_print(buf, trx);
+ buf += strlen(buf);
+
+ if (buf_end - buf < 500) {
+ return;
+ }
+
if (trx->read_view) {
- printf(
+ buf += sprintf(buf,
"Trx read view will not see trx with id >= %lu %lu, sees < %lu %lu\n",
ut_dulint_get_high(trx->read_view->low_limit_id),
ut_dulint_get_low(trx->read_view->low_limit_id),
@@ -3542,16 +3587,17 @@ loop:
}
if (trx->que_state == TRX_QUE_LOCK_WAIT) {
- printf(
+ buf += sprintf(buf,
"------------------TRX IS WAITING FOR THE LOCK:\n");
if (lock_get_type(trx->wait_lock) == LOCK_REC) {
- lock_rec_print(trx->wait_lock);
+ lock_rec_print(buf, trx->wait_lock);
} else {
- lock_table_print(trx->wait_lock);
+ lock_table_print(buf, trx->wait_lock);
}
- printf(
+ buf += strlen(buf);
+ buf += sprintf(buf,
"------------------\n");
}
}
@@ -3580,6 +3626,10 @@ loop:
goto loop;
}
+ if (buf_end - buf < 500) {
+ return;
+ }
+
if (lock_get_type(lock) == LOCK_REC) {
space = lock->un_member.rec_lock.space;
page_no = lock->un_member.rec_lock.page_no;
@@ -3600,19 +3650,21 @@ loop:
goto loop;
}
- lock_rec_print(lock);
+ lock_rec_print(buf, lock);
} else {
ut_ad(lock_get_type(lock) == LOCK_TABLE);
- lock_table_print(lock);
+ lock_table_print(buf, lock);
}
+ buf += strlen(buf);
+
load_page_first = TRUE;
nth_lock++;
if (nth_lock >= 10) {
- printf(
+ buf += sprintf(buf,
"10 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n");
nth_trx++;
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c
index 5ec1274d117..9748384183c 100644
--- a/innobase/log/log0log.c
+++ b/innobase/log/log0log.c
@@ -162,6 +162,8 @@ log_reserve_and_open(
ulint archived_lsn_age;
ulint count = 0;
ulint dummy;
+
+ ut_a(len < log->buf_size / 2);
loop:
mutex_enter(&(log->mutex));
@@ -663,6 +665,8 @@ log_init(void)
log_sys->buf_next_to_write = 0;
+ log_sys->flush_lsn = ut_dulint_zero;
+
log_sys->written_to_some_lsn = log_sys->lsn;
log_sys->written_to_all_lsn = log_sys->lsn;
@@ -777,9 +781,15 @@ log_group_init(
*(group->file_header_bufs + i) = ut_align(
mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
OS_FILE_LOG_BLOCK_SIZE);
+
+ memset(*(group->file_header_bufs + i), '\0',
+ LOG_FILE_HDR_SIZE);
+
*(group->archive_file_header_bufs + i) = ut_align(
mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
OS_FILE_LOG_BLOCK_SIZE);
+ memset(*(group->archive_file_header_bufs + i), '\0',
+ LOG_FILE_HDR_SIZE);
}
group->archive_space_id = archive_space_id;
@@ -791,6 +801,8 @@ log_group_init(
mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
OS_FILE_LOG_BLOCK_SIZE);
+ memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
+
UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
ut_a(log_calc_max_ages());
@@ -839,7 +851,7 @@ log_group_check_flush_completion(
{
ut_ad(mutex_own(&(log_sys->mutex)));
- if (!log_sys->one_flushed && (group->n_pending_writes == 0)) {
+ if (!log_sys->one_flushed && group->n_pending_writes == 0) {
if (log_debug_writes) {
printf("Log flushed first to group %lu\n", group->id);
@@ -933,16 +945,20 @@ log_io_complete(
return;
}
+ ut_a(0); /* We currently use synchronous writing of the
+ logs and cannot end up here! */
+
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
+ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
+ && srv_flush_log_at_trx_commit != 2) {
fil_flush(group->space_id);
}
mutex_enter(&(log_sys->mutex));
- ut_ad(group->n_pending_writes > 0);
- ut_ad(log_sys->n_pending_writes > 0);
+ ut_a(group->n_pending_writes > 0);
+ ut_a(log_sys->n_pending_writes > 0);
group->n_pending_writes--;
log_sys->n_pending_writes--;
@@ -956,6 +972,57 @@ log_io_complete(
}
/**********************************************************
+Flushes the log files to the disk, using, for example, the Unix fsync.
+This function does the flush even if the user has set
+srv_flush_log_at_trx_commit = FALSE. */
+
+void
+log_flush_to_disk(void)
+/*===================*/
+{
+ log_group_t* group;
+loop:
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->n_pending_writes > 0) {
+ /* A log file write is running */
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Wait for the log file write to complete and try again */
+
+ os_event_wait(log_sys->no_flush_event);
+
+ goto loop;
+ }
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ log_sys->n_pending_writes++;
+ group->n_pending_writes++;
+
+ os_event_reset(log_sys->no_flush_event);
+ os_event_reset(log_sys->one_flushed_event);
+
+ mutex_exit(&(log_sys->mutex));
+
+ fil_flush(group->space_id);
+
+ mutex_enter(&(log_sys->mutex));
+
+ ut_a(group->n_pending_writes == 1);
+ ut_a(log_sys->n_pending_writes == 1);
+
+ group->n_pending_writes--;
+ log_sys->n_pending_writes--;
+
+ os_event_set(log_sys->no_flush_event);
+ os_event_set(log_sys->one_flushed_event);
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/**********************************************************
Writes a log file header to a log file space. */
static
void
@@ -970,7 +1037,6 @@ log_group_file_header_flush(
{
byte* buf;
ulint dest_offset;
- ibool sync;
ut_ad(mutex_own(&(log_sys->mutex)));
@@ -981,15 +1047,11 @@ log_group_file_header_flush(
mach_write_to_4(buf + LOG_GROUP_ID, group->id);
mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
- dest_offset = nth_file * group->file_size;
-
- sync = FALSE;
+ /* Wipe over possible label of ibbackup --restore */
+ memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
- if (type == LOG_RECOVER) {
+ dest_offset = nth_file * group->file_size;
- sync = TRUE;
- }
-
if (log_debug_writes) {
printf(
"Writing log file header to group %lu file %lu\n", group->id,
@@ -997,14 +1059,9 @@ log_group_file_header_flush(
}
if (log_do_write) {
- if (type == LOG_FLUSH) {
- log_sys->n_pending_writes++;
- group->n_pending_writes++;
- }
-
log_sys->n_log_ios++;
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
dest_offset / UNIV_PAGE_SIZE,
dest_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
@@ -1013,6 +1070,31 @@ log_group_file_header_flush(
}
/**********************************************************
+Stores a 1-byte checksum to the trailer checksum field of a log block
+before writing it to a log file. This checksum is used in recovery to
+check the consistency of a log block. The checksum is simply the 8 low
+bits of 1 + the sum of the bytes in the log block except the trailer bytes. */
+static
+void
+log_block_store_checksum(
+/*=====================*/
+ byte* block) /* in/out: pointer to a log block */
+{
+ ulint i;
+ ulint sum;
+
+ sum = 1;
+
+ for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
+ sum += (ulint)(*(block + i));
+ }
+
+ mach_write_to_1(block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM,
+ 0xFF & sum);
+}
+
+/**********************************************************
Writes a buffer to a log file group. */
void
@@ -1032,20 +1114,13 @@ log_group_write_buf(
header */
{
ulint write_len;
- ibool sync;
ibool write_header;
ulint next_offset;
+ ulint i;
ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- sync = FALSE;
-
- if (type == LOG_RECOVER) {
-
- sync = TRUE;
- }
+ ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
if (new_data_offset == 0) {
write_header = TRUE;
@@ -1076,7 +1151,6 @@ loop:
}
if (log_debug_writes) {
- ulint i;
printf(
"Writing log file segment to group %lu offset %lu len %lu\n"
@@ -1100,15 +1174,17 @@ loop:
}
}
- if (log_do_write) {
- if (type == LOG_FLUSH) {
- log_sys->n_pending_writes++;
- group->n_pending_writes++;
- }
+ /* Calculate the checksums for each log block and write them to
+ the trailer fields of the log blocks */
+
+ for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
+ log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
+ }
+ if (log_do_write) {
log_sys->n_log_ios++;
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
next_offset / UNIV_PAGE_SIZE,
next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
}
@@ -1126,15 +1202,15 @@ loop:
/**********************************************************
This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been flushed to disk up to the last log entry written by the
-transaction. If there is a flush running, it waits and checks if the flush
-flushed enough. If not, starts a new flush. */
+that the log has been written to the log file up to the last log entry written
+by the transaction. If there is a flush running, it waits and checks if the
+flush flushed enough. If not, starts a new flush. */
void
log_flush_up_to(
/*============*/
dulint lsn, /* in: log sequence number up to which the log should
- be flushed, ut_dulint_max if not specified */
+ be written, ut_dulint_max if not specified */
ulint wait) /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
or LOG_WAIT_ALL_GROUPS */
{
@@ -1144,6 +1220,7 @@ log_flush_up_to(
ulint area_start;
ulint area_end;
ulint loop_count;
+ ulint unlock;
if (recv_no_ibuf_operations) {
/* Recovery is running and no operations on the log files are
@@ -1209,6 +1286,12 @@ loop:
ut_dulint_get_low(log_sys->lsn));
}
+ log_sys->n_pending_writes++;
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ group->n_pending_writes++; /* We assume here that we have only
+ one log group! */
+
os_event_reset(log_sys->no_flush_event);
os_event_reset(log_sys->one_flushed_event);
@@ -1254,6 +1337,36 @@ loop:
group = UT_LIST_GET_NEXT(log_groups, group);
}
+ mutex_exit(&(log_sys->mutex));
+
+ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
+ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
+ && srv_flush_log_at_trx_commit != 2) {
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ fil_flush(group->space_id);
+ }
+
+ mutex_enter(&(log_sys->mutex));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ ut_a(group->n_pending_writes == 1);
+ ut_a(log_sys->n_pending_writes == 1);
+
+ group->n_pending_writes--;
+ log_sys->n_pending_writes--;
+
+ unlock = log_group_check_flush_completion(group);
+ unlock = unlock | log_sys_check_flush_completion();
+
+ log_flush_do_unlocks(unlock);
+
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+
do_waits:
mutex_exit(&(log_sys->mutex));
@@ -1539,15 +1652,23 @@ log_reset_first_header_and_checkpoint(
/*==================================*/
byte* hdr_buf,/* in: buffer which will be written to the start
of the first log file */
- dulint lsn) /* in: lsn of the start of the first log file
- + LOG_BLOCK_HDR_SIZE */
+ dulint start) /* in: lsn of the start of the first log file;
+ we pretend that there is a checkpoint at
+ start + LOG_BLOCK_HDR_SIZE */
{
ulint fold;
byte* buf;
-
+ dulint lsn;
+
mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
- mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, lsn);
+ mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
+
+ lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE);
+ /* Write the label of ibbackup --restore */
+ sprintf(hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "ibbackup ");
+ ut_sprintf_timestamp(hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
+ + strlen("ibbackup "));
buf = hdr_buf + LOG_CHECKPOINT_1;
mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero);
@@ -2965,15 +3086,22 @@ log_check_log_recs(
Prints info of the log. */
void
-log_print(void)
-/*===========*/
+log_print(
+/*======*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end)/* in: buffer end */
{
double time_elapsed;
time_t current_time;
+ if (buf_end - buf < 300) {
+
+ return;
+ }
+
mutex_enter(&(log_sys->mutex));
- printf("Log sequence number %lu %lu\n"
+ buf += sprintf(buf, "Log sequence number %lu %lu\n"
"Log flushed up to %lu %lu\n"
"Last checkpoint at %lu %lu\n",
ut_dulint_get_high(log_sys->lsn),
@@ -2987,7 +3115,7 @@ log_print(void)
time_elapsed = difftime(current_time, log_sys->last_printout_time);
- printf(
+ buf += sprintf(buf,
"%lu pending log writes, %lu pending chkp writes\n"
"%lu log i/o's done, %.2f log i/o's/second\n",
log_sys->n_pending_writes,
diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c
index c31719f7bb0..53f75c176ea 100644
--- a/innobase/log/log0recv.c
+++ b/innobase/log/log0recv.c
@@ -568,6 +568,55 @@ recv_read_cp_info_for_backup(
return(TRUE);
}
+/**********************************************************
+Checks the 1-byte checksum to the trailer checksum field of a log block.
+We also accept a log block in the old format where the checksum field
+contained the highest byte of the log block number. */
+static
+ibool
+log_block_checksum_is_ok_or_old_format(
+/*===================================*/
+ /* out: TRUE if ok, or if the log block may be in the
+ format of InnoDB version < 3.23.52 */
+ byte* block) /* in: pointer to a log block */
+{
+ ulint i;
+ ulint sum;
+
+ sum = 1;
+
+ for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
+ sum += (ulint)(*(block + i));
+ }
+
+/* printf("Checksum %lu, byte %lu\n", 0xFF & sum,
+ mach_read_from_1(block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM));
+*/
+ if (mach_read_from_1(block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM)
+ == (0xFF & sum)) {
+
+ return(TRUE);
+ }
+
+ if (((0xFF000000 & log_block_get_hdr_no(block)) >> 24)
+ == mach_read_from_1(block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM)) {
+
+ /* We assume the log block is in the format of
+ InnoDB version < 3.23.52 and the block is ok */
+/*
+ fprintf(stderr,
+"InnoDB: Scanned old format < InnoDB-3.23.52 log block number %lu\n",
+ log_block_get_hdr_no(block));
+*/
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
/***********************************************************************
Scans the log segment and n_bytes_scanned is set to the length of valid
log scanned. */
@@ -598,12 +647,13 @@ recv_scan_log_seg_for_backup(
no = log_block_get_hdr_no(log_block);
- /* fprintf(stderr, "Log block header no %lu\n", no); */
+/* fprintf(stderr, "Log block header no %lu\n", no); */
- if (no != log_block_get_trl_no(log_block)
- || no != log_block_convert_lsn_to_no(*scanned_lsn)) {
-
-/* printf(
+ if ((no & 0xFFFFFF) != log_block_get_trl_no(log_block)
+ || no != log_block_convert_lsn_to_no(*scanned_lsn)
+ || !log_block_checksum_is_ok_or_old_format(log_block)) {
+/*
+ printf(
"Log block n:o %lu, trailer n:o %lu, scanned lsn n:o %lu\n",
no, log_block_get_trl_no(log_block),
log_block_convert_lsn_to_no(*scanned_lsn));
@@ -611,8 +661,8 @@ recv_scan_log_seg_for_backup(
/* Garbage or an incompletely written log block */
log_block += OS_FILE_LOG_BLOCK_SIZE;
-
-/* printf(
+/*
+ printf(
"Next log block n:o %lu, trailer n:o %lu\n",
log_block_get_hdr_no(log_block),
log_block_get_trl_no(log_block));
@@ -629,11 +679,11 @@ recv_scan_log_seg_for_backup(
/* Garbage from a log buffer flush which was made
before the most recent database recovery */
-
+/*
printf("Scanned cp n:o %lu, block cp n:o %lu\n",
*scanned_checkpoint_no,
log_block_get_checkpoint_no(log_block));
-
+*/
break;
}
@@ -1011,7 +1061,7 @@ recv_recover_page(
page_lsn = page_newest_lsn;
}
} else {
- /* In recovery from a backup we do not use the buffer
+ /* In recovery from a backup we do not really use the buffer
pool */
page_newest_lsn = ut_dulint_zero;
@@ -1361,6 +1411,14 @@ recv_apply_log_recs_for_backup(
nth_page_in_file >> (32 - UNIV_PAGE_SIZE_SHIFT),
UNIV_PAGE_SIZE);
+ /* We simulate a page read made by the buffer pool,
+ to make sure recovery works ok. We must init the
+ block corresponding to buf_pool->frame_zero
+ (== page) */
+
+ buf_page_init_for_backup_restore(0, i,
+ buf_block_align(page));
+
recv_recover_page(TRUE, FALSE, page, 0, i);
buf_flush_init_for_writing(page,
@@ -2037,8 +2095,33 @@ recv_scan_log_recs(
/* fprintf(stderr, "Log block header no %lu\n", no); */
- if (no != log_block_get_trl_no(log_block)
- || no != log_block_convert_lsn_to_no(scanned_lsn)) {
+ if ((no & 0xFFFFFF) != log_block_get_trl_no(log_block)
+ || no != log_block_convert_lsn_to_no(scanned_lsn)
+ || !log_block_checksum_is_ok_or_old_format(log_block)) {
+
+ if ((no & 0xFFFFFF) == log_block_get_trl_no(log_block)
+ && no == log_block_convert_lsn_to_no(scanned_lsn)
+ && !log_block_checksum_is_ok_or_old_format(
+ log_block)) {
+ fprintf(stderr,
+"InnoDB: Log block no %lu at lsn %lu %lu has\n"
+"InnoDB: ok header and trailer, but checksum field contains %lu\n",
+ no, ut_dulint_get_high(scanned_lsn),
+ ut_dulint_get_low(scanned_lsn),
+ mach_read_from_1(log_block
+ + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM));
+ }
+
+ if ((no & 0xFFFFFF)
+ != log_block_get_trl_no(log_block)) {
+ fprintf(stderr,
+"InnoDB: Log block with header no %lu at lsn %lu %lu has\n"
+"InnoDB: trailer no %lu\n",
+ no, ut_dulint_get_high(scanned_lsn),
+ ut_dulint_get_low(scanned_lsn),
+ log_block_get_trl_no(log_block));
+ }
/* Garbage or an incompletely written log block */
@@ -2241,6 +2324,7 @@ recv_recovery_from_checkpoint_start(
dulint archived_lsn;
ulint capacity;
byte* buf;
+ byte log_hdr_buf[LOG_FILE_HDR_SIZE];
ulint err;
ut_ad((type != LOG_CHECKPOINT)
@@ -2288,6 +2372,33 @@ recv_recovery_from_checkpoint_start(
checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
+ /* Read the first log file header to print a note if this is
+ a recovery from a restored InnoDB Hot Backup */
+
+ fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id,
+ 0, 0, LOG_FILE_HDR_SIZE,
+ log_hdr_buf, max_cp_group);
+
+ if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
+ "ibbackup", ut_strlen("ibbackup"))) {
+ /* This log file was created by ibbackup --restore: print
+ a note to the user about it */
+
+ fprintf(stderr,
+ "InnoDB: The log file was created by ibbackup --restore at\n"
+ "InnoDB: %s\n", log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
+
+ /* Wipe over the label now */
+
+ ut_memcpy(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
+ " ", 4);
+ /* Write to the log file to wipe over the label */
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
+ max_cp_group->space_id,
+ 0, 0, OS_FILE_LOG_BLOCK_SIZE,
+ log_hdr_buf, max_cp_group);
+ }
+
group = UT_LIST_GET_FIRST(log_sys->log_groups);
while (group) {
@@ -2471,7 +2582,7 @@ recv_recovery_from_checkpoint_finish(void)
/* Rollback the uncommitted transactions which have no user session */
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
- trx_rollback_all_without_sess();
+ trx_rollback_or_clean_all_without_sess();
}
/* Apply the hashed log records to the respective file pages */
@@ -2487,6 +2598,7 @@ recv_recovery_from_checkpoint_finish(void)
}
if (recv_needed_recovery) {
+ trx_sys_print_mysql_master_log_pos();
trx_sys_print_mysql_binlog_offset();
}
@@ -2614,10 +2726,9 @@ recv_reset_log_files_for_backup(
/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
- log_reset_first_header_and_checkpoint(buf,
- ut_dulint_add(lsn, LOG_BLOCK_HDR_SIZE));
+ log_reset_first_header_and_checkpoint(buf, lsn);
- log_block_init(buf + LOG_FILE_HDR_SIZE, lsn);
+ log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
LOG_BLOCK_HDR_SIZE);
sprintf(name, "%sib_logfile%lu", log_dir, 0);
@@ -2754,7 +2865,7 @@ ask_again:
if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
fprintf(stderr,
"InnoDB: Archive log file %s starts from too big a lsn\n",
- name);
+ name);
return(TRUE);
}
@@ -2765,7 +2876,7 @@ ask_again:
fprintf(stderr,
"InnoDB: Archive log file %s starts from a wrong lsn\n",
- name);
+ name);
return(TRUE);
}
diff --git a/innobase/mtr/mtr0log.c b/innobase/mtr/mtr0log.c
index 26f5a5d1cb7..b582afc5710 100644
--- a/innobase/mtr/mtr0log.c
+++ b/innobase/mtr/mtr0log.c
@@ -290,7 +290,7 @@ mlog_write_string(
ut_a(0);
}
ut_ad(ptr && mtr);
- ut_ad(len < UNIV_PAGE_SIZE);
+ ut_a(len < UNIV_PAGE_SIZE);
ut_memcpy(ptr, str, len);
@@ -338,9 +338,13 @@ mlog_parse_string(
offset = mach_read_from_2(ptr);
ptr += 2;
+ ut_a(offset < UNIV_PAGE_SIZE);
+
len = mach_read_from_2(ptr);
ptr += 2;
+ ut_a(len + offset < UNIV_PAGE_SIZE);
+
if (end_ptr < ptr + len) {
return(NULL);
diff --git a/innobase/mtr/mtr0mtr.c b/innobase/mtr/mtr0mtr.c
index 6aa1f3509d4..e9a6e39d98f 100644
--- a/innobase/mtr/mtr0mtr.c
+++ b/innobase/mtr/mtr0mtr.c
@@ -315,7 +315,7 @@ mtr_log_reserve_and_write(
}
data_size = dyn_array_get_data_size(mlog);
-
+
/* Open the database log for log_write_low */
mtr->start_lsn = log_reserve_and_open(data_size);
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index ee4045febde..ae3c8a45f62 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -22,6 +22,16 @@ Created 10/21/1995 Heikki Tuuri
#endif
+/* This specifies the file permissions InnoDB uses when it craetes files in
+Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
+my_umask */
+
+#ifndef __WIN__
+ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+#else
+ulint os_innodb_umask = 0;
+#endif
+
/* If the following is set to TRUE, we do not call os_file_flush in every
os_file_write. We can set this TRUE if the doublewrite buffer is used. */
ibool os_do_not_call_flush_at_each_write = FALSE;
@@ -32,7 +42,7 @@ OS does not provide an atomic pread or pwrite, or similar */
os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
/* In simulated aio, merge at most this many consecutive i/os */
-#define OS_AIO_MERGE_N_CONSECUTIVE 32
+#define OS_AIO_MERGE_N_CONSECUTIVE 64
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
@@ -40,6 +50,8 @@ use simulated aio we build below with threads */
ibool os_aio_use_native_aio = FALSE;
+ibool os_aio_print_debug = FALSE;
+
/* The aio array slot structure */
typedef struct os_aio_slot_struct os_aio_slot_t;
@@ -115,7 +127,12 @@ os_aio_array_t* os_aio_sync_array = NULL;
ulint os_aio_n_segments = ULINT_UNDEFINED;
+/* If the following is TRUE, read i/o handler threads try to
+wait until a batch of new read requests have been posted */
+ibool os_aio_recommend_sleep_for_read_threads = FALSE;
+
ulint os_n_file_reads = 0;
+ulint os_bytes_read_since_printout = 0;
ulint os_n_file_writes = 0;
ulint os_n_fsyncs = 0;
ulint os_n_file_reads_old = 0;
@@ -412,8 +429,8 @@ try_again:
}
if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR | S_IRGRP
- | S_IWGRP | S_IROTH | S_IWOTH);
+ file = open(name, create_flag, S_IRUSR | S_IWUSR
+ | S_IRGRP | S_IWGRP);
} else {
file = open(name, create_flag);
}
@@ -548,8 +565,7 @@ try_again:
}
#endif
if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR | S_IRGRP
- | S_IWGRP | S_IROTH | S_IWOTH);
+ file = open(name, create_flag, os_innodb_umask);
} else {
file = open(name, create_flag);
}
@@ -673,6 +689,7 @@ os_file_set_size(
ulint n_bytes;
ibool ret;
byte* buf;
+ byte* buf2;
ulint i;
ut_a(size == (size & 0xFFFFFFFF));
@@ -680,7 +697,10 @@ os_file_set_size(
/* We use a very big 8 MB buffer in writing because Linux may be
extremely slow in fsync on 1 MB writes */
- buf = ut_malloc(UNIV_PAGE_SIZE * 512);
+ buf2 = ut_malloc(UNIV_PAGE_SIZE * 513);
+
+ /* Align the buffer for possible raw i/o */
+ buf = ut_align(buf2, UNIV_PAGE_SIZE);
/* Write buffer full of zeros */
for (i = 0; i < UNIV_PAGE_SIZE * 512; i++) {
@@ -702,13 +722,13 @@ os_file_set_size(
(ulint)(offset >> 32),
n_bytes);
if (!ret) {
- ut_free(buf);
+ ut_free(buf2);
goto error_handling;
}
offset += n_bytes;
}
- ut_free(buf);
+ ut_free(buf2);
ret = os_file_flush(file);
@@ -734,6 +754,8 @@ os_file_flush(
ut_a(file);
+ os_n_fsyncs++;
+
ret = FlushFileBuffers(file);
if (ret) {
@@ -742,6 +764,10 @@ os_file_flush(
os_file_handle_error(file, NULL);
+ /* It is a fatal error if a file flush does not succeed, because then
+ the database can get corrupt on disk */
+ ut_a(0);
+
return(FALSE);
#else
int ret;
@@ -764,11 +790,17 @@ os_file_flush(
return(TRUE);
}
+ ut_print_timestamp(stderr);
+
fprintf(stderr,
- "InnoDB: Error: the OS said file flush did not succeed\n");
+ " InnoDB: Error: the OS said file flush did not succeed\n");
os_file_handle_error(file, NULL);
+ /* It is a fatal error if a file flush does not succeed, because then
+ the database can get corrupt on disk */
+ ut_a(0);
+
return(FALSE);
#endif
}
@@ -954,6 +986,7 @@ os_file_read(
ut_a((offset & 0xFFFFFFFF) == offset);
os_n_file_reads++;
+ os_bytes_read_since_printout += n;
try_again:
ut_ad(file);
@@ -1062,7 +1095,9 @@ os_file_write(
fprintf(stderr,
" InnoDB: Error: File pointer positioning to file %s failed at\n"
-"InnoDB: offset %lu %lu. Operating system error number %lu.\n",
+"InnoDB: offset %lu %lu. Operating system error number %lu.\n"
+"InnoDB: Look from section 13.2 at http://www.innodb.com/ibman.html\n"
+"InnoDB: what the error number means.\n",
name, offset_high, offset,
(ulint)GetLastError());
@@ -1093,8 +1128,10 @@ os_file_write(
" InnoDB: Error: Write to file %s failed at offset %lu %lu.\n"
"InnoDB: %lu bytes should have been written, only %lu were written.\n"
"InnoDB: Operating system error number %lu.\n"
+"InnoDB: Look from section 13.2 at http://www.innodb.com/ibman.html\n"
+"InnoDB: what the error number means.\n"
"InnoDB: Check that your OS and file system support files of this size.\n"
-"InnoDB: Check also the disk is not full or a disk quota exceeded.\n",
+"InnoDB: Check also that the disk is not full or a disk quota exceeded.\n",
name, offset_high, offset, n, len,
(ulint)GetLastError());
@@ -1120,10 +1157,12 @@ os_file_write(
" InnoDB: Error: Write to file %s failed at offset %lu %lu.\n"
"InnoDB: %lu bytes should have been written, only %lu were written.\n"
"InnoDB: Operating system error number %lu.\n"
+"InnoDB: Look from section 13.2 at http://www.innodb.com/ibman.html\n"
+"InnoDB: what the error number means or use the perror program of MySQL.\n"
"InnoDB: Check that your OS and file system support files of this size.\n"
-"InnoDB: Check also the disk is not full or a disk quota exceeded.\n",
- name, offset_high, offset, n, ret, (ulint)errno);
-
+"InnoDB: Check also that the disk is not full or a disk quota exceeded.\n",
+ name, offset_high, offset, n, (ulint)ret,
+ (ulint)errno);
os_has_said_disk_full = TRUE;
}
@@ -1623,13 +1662,40 @@ os_aio_simulated_wake_handler_threads(void)
/* We do not use simulated aio: do nothing */
return;
- }
+ }
+
+ os_aio_recommend_sleep_for_read_threads = FALSE;
for (i = 0; i < os_aio_n_segments; i++) {
os_aio_simulated_wake_handler_thread(i);
}
}
+/**************************************************************************
+This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+
+void
+os_aio_simulated_put_read_threads_to_sleep(void)
+/*============================================*/
+{
+ os_aio_array_t* array;
+ ulint g;
+
+ os_aio_recommend_sleep_for_read_threads = TRUE;
+
+ for (g = 0; g < os_aio_n_segments; g++) {
+ os_aio_get_array_and_local_segment(&array, g);
+
+ if (array == os_aio_read_array) {
+
+ os_event_reset(os_aio_segment_wait_events[g]);
+ }
+ }
+}
+
/***********************************************************************
Requests an asynchronous i/o operation. */
@@ -1685,7 +1751,6 @@ os_aio(
ut_ad(buf);
ut_ad(n > 0);
ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad((ulint)buf % OS_FILE_LOG_BLOCK_SIZE == 0)
ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(os_aio_validate());
@@ -2036,18 +2101,14 @@ os_aio_simulated_handle(
ulint offs;
ulint lowest_offset;
byte* combined_buf;
+ byte* combined_buf2;
ibool ret;
ulint n;
ulint i;
-
+
segment = os_aio_get_array_and_local_segment(&array, global_segment);
restart:
- /* Give other threads chance to add several i/os to the array
- at once */
-
- os_thread_yield();
-
/* NOTE! We only access constant fields in os_aio_array. Therefore
we do not have to acquire the protecting mutex yet */
@@ -2058,6 +2119,15 @@ restart:
/* Look through n slots after the segment * n'th slot */
+ if (array == os_aio_read_array
+ && os_aio_recommend_sleep_for_read_threads) {
+
+ /* Give other threads chance to add several i/os to the array
+ at once. */
+
+ goto recommended_sleep;
+ }
+
os_mutex_enter(array->mutex);
/* Check if there is a slot for which the i/o has already been
@@ -2068,6 +2138,11 @@ restart:
if (slot->reserved && slot->io_already_done) {
+ if (os_aio_print_debug) {
+ fprintf(stderr,
+"InnoDB: i/o for slot %lu already done, returning\n", i);
+ }
+
ret = TRUE;
goto slot_io_done;
@@ -2149,9 +2224,11 @@ consecutive_loop:
/* We can use the buffer of the i/o request */
combined_buf = slot->buf;
} else {
- combined_buf = ut_malloc(total_len);
+ combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
+
+ ut_a(combined_buf2);
- ut_a(combined_buf);
+ combined_buf = ut_align(combined_buf2, UNIV_PAGE_SIZE);
}
/* We release the array mutex for the time of the i/o: NOTE that
@@ -2174,6 +2251,13 @@ consecutive_loop:
srv_io_thread_op_info[global_segment] = (char*) "doing file i/o";
+ if (os_aio_print_debug) {
+ fprintf(stderr,
+"InnoDB: doing i/o of type %lu at offset %lu %lu, length %lu\n",
+ slot->type, slot->offset_high, slot->offset,
+ total_len);
+ }
+
/* Do the i/o with ordinary, synchronous i/o functions: */
if (slot->type == OS_FILE_WRITE) {
ret = os_file_write(slot->name, slot->file, combined_buf,
@@ -2203,7 +2287,7 @@ consecutive_loop:
}
if (n_consecutive > 1) {
- ut_free(combined_buf);
+ ut_free(combined_buf2);
}
os_mutex_enter(array->mutex);
@@ -2241,10 +2325,18 @@ wait_for_io:
os_mutex_exit(array->mutex);
- srv_io_thread_op_info[global_segment] = (char*) "waiting for i/o request";
+recommended_sleep:
+ srv_io_thread_op_info[global_segment] =
+ (char*)"waiting for i/o request";
os_event_wait(os_aio_segment_wait_events[global_segment]);
+ if (os_aio_print_debug) {
+ fprintf(stderr,
+"InnoDB: i/o handler thread for i/o segment %lu wakes up\n",
+ global_segment);
+ }
+
goto restart;
}
@@ -2305,22 +2397,30 @@ os_aio_validate(void)
Prints info of the aio arrays. */
void
-os_aio_print(void)
-/*==============*/
+os_aio_print(
+/*=========*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end)/* in: buffer end */
{
os_aio_array_t* array;
os_aio_slot_t* slot;
ulint n_reserved;
time_t current_time;
double time_elapsed;
+ double avg_bytes_read;
ulint i;
+ if (buf_end - buf < 1000) {
+
+ return;
+ }
+
for (i = 0; i < srv_n_file_io_threads; i++) {
- printf("I/O thread %lu state: %s\n", i,
+ buf += sprintf(buf, "I/O thread %lu state: %s\n", i,
srv_io_thread_op_info[i]);
}
- printf("Pending normal aio reads:");
+ buf += sprintf(buf, "Pending normal aio reads:");
array = os_aio_read_array;
loop:
@@ -2347,12 +2447,12 @@ loop:
ut_a(array->n_reserved == n_reserved);
- printf(" %lu", n_reserved);
+ buf += sprintf(buf, " %lu", n_reserved);
os_mutex_exit(array->mutex);
if (array == os_aio_read_array) {
- printf(", aio writes:");
+ buf += sprintf(buf, ", aio writes:");
array = os_aio_write_array;
@@ -2360,38 +2460,50 @@ loop:
}
if (array == os_aio_write_array) {
- printf(",\n ibuf aio reads:");
+ buf += sprintf(buf, ",\n ibuf aio reads:");
array = os_aio_ibuf_array;
goto loop;
}
if (array == os_aio_ibuf_array) {
- printf(", log i/o's:");
+ buf += sprintf(buf, ", log i/o's:");
array = os_aio_log_array;
goto loop;
}
if (array == os_aio_log_array) {
- printf(", sync i/o's:");
+ buf += sprintf(buf, ", sync i/o's:");
array = os_aio_sync_array;
goto loop;
}
- printf("\n");
+ buf += sprintf(buf, "\n");
current_time = time(NULL);
time_elapsed = difftime(current_time, os_last_printout);
- printf("Pending flushes (fsync) log: %lu; buffer pool: %lu\n",
+ buf += sprintf(buf,
+ "Pending flushes (fsync) log: %lu; buffer pool: %lu\n",
fil_n_pending_log_flushes, fil_n_pending_tablespace_flushes);
- printf("%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
+ buf += sprintf(buf,
+ "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
os_n_file_reads, os_n_file_writes, os_n_fsyncs);
- printf("%.2f reads/s, %.2f writes/s, %.2f fsyncs/s\n",
+
+ if (os_n_file_reads == os_n_file_reads_old) {
+ avg_bytes_read = 0.0;
+ } else {
+ avg_bytes_read = os_bytes_read_since_printout /
+ (os_n_file_reads - os_n_file_reads_old);
+ }
+
+ buf += sprintf(buf,
+"%.2f reads/s, %lu avg bytes/read, %.2f writes/s, %.2f fsyncs/s\n",
(os_n_file_reads - os_n_file_reads_old)
/ time_elapsed,
+ (ulint)avg_bytes_read,
(os_n_file_writes - os_n_file_writes_old)
/ time_elapsed,
(os_n_fsyncs - os_n_fsyncs_old)
@@ -2400,6 +2512,7 @@ loop:
os_n_file_reads_old = os_n_file_reads;
os_n_file_writes_old = os_n_file_writes;
os_n_fsyncs_old = os_n_fsyncs;
+ os_bytes_read_since_printout = 0;
os_last_printout = current_time;
}
diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c
index 2cf5160d055..c1345de0d55 100644
--- a/innobase/os/os0sync.c
+++ b/innobase/os/os0sync.c
@@ -435,7 +435,7 @@ os_fast_mutex_init(
InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
#else
- pthread_mutex_init(fast_mutex, NULL);
+ pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST);
#endif
}
diff --git a/innobase/page/page0cur.c b/innobase/page/page0cur.c
index 85beffbcc94..0eaf96f7e51 100644
--- a/innobase/page/page0cur.c
+++ b/innobase/page/page0cur.c
@@ -403,6 +403,8 @@ page_cur_insert_rec_write_log(
byte* log_ptr;
ulint i;
+ ut_a(rec_size < UNIV_PAGE_SIZE);
+
log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN);
if (log_ptr == NULL) {
@@ -491,6 +493,8 @@ page_cur_insert_rec_write_log(
mlog_close(mtr, log_ptr);
+ ut_a(rec_size - i < UNIV_PAGE_SIZE);
+
if (rec_size - i >= MLOG_BUF_MARGIN) {
mlog_catenate_string(mtr, ins_ptr, rec_size - i);
}
@@ -602,6 +606,9 @@ page_cur_parse_insert_rec(
/* Build the inserted record to buf */
+ ut_a(mismatch_index < UNIV_PAGE_SIZE);
+ ut_a(end_seg_len < UNIV_PAGE_SIZE);
+
ut_memcpy(buf, rec_get_start(cursor_rec), mismatch_index);
ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
@@ -938,6 +945,8 @@ page_copy_rec_list_end_to_created_page(
log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
+ ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
+
mach_write_to_4(log_ptr, log_data_len);
rec_set_next_offs(insert_rec, PAGE_SUPREMUM);
diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c
index bf8af45a00a..ed74736c8da 100644
--- a/innobase/page/page0page.c
+++ b/innobase/page/page0page.c
@@ -17,6 +17,7 @@ Created 2/2/1994 Heikki Tuuri
#include "lock0lock.h"
#include "fut0lst.h"
#include "btr0sea.h"
+#include "buf0buf.h"
/* A cached template page used in page_create */
page_t* page_template = NULL;
@@ -63,6 +64,65 @@ Assuming a page size of 8 kB, a typical index page of a secondary
index contains 300 index entries, and the size of the page directory
is 50 x 4 bytes = 200 bytes. */
+/*******************************************************************
+Looks for the directory slot which owns the given record. */
+
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+ /* out: the directory slot number */
+ rec_t* rec) /* in: the physical record */
+{
+ ulint i;
+ ulint steps = 0;
+ page_t* page;
+ page_dir_slot_t* slot;
+ rec_t* original_rec = rec;
+ char err_buf[1000];
+
+ ut_ad(page_rec_check(rec));
+
+ while (rec_get_n_owned(rec) == 0) {
+ steps++;
+ rec = page_rec_get_next(rec);
+ }
+
+ page = buf_frame_align(rec);
+
+ i = page_dir_get_n_slots(page) - 1;
+ slot = page_dir_get_nth_slot(page, i);
+
+ while (page_dir_slot_get_rec(slot) != rec) {
+
+ if (i == 0) {
+ fprintf(stderr,
+ "InnoDB: Probable data corruption on page %lu\n",
+ buf_frame_get_page_no(page));
+
+ rec_sprintf(err_buf, 900, original_rec);
+
+ fprintf(stderr,
+ "InnoDB: Original record %s\n"
+ "InnoDB: on that page. Steps %lu.\n", err_buf, steps);
+
+ rec_sprintf(err_buf, 900, rec);
+
+ fprintf(stderr,
+ "InnoDB: Cannot find the dir slot for record %s\n"
+ "InnoDB: on that page!\n", err_buf);
+
+ buf_page_print(page);
+
+ ut_a(0);
+ }
+
+ i--;
+ slot = page_dir_get_nth_slot(page, i);
+ }
+
+ return(i);
+}
+
/******************************************************************
Used to check the consistency of a directory slot. */
static
diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c
index c27af604d04..c50516dfc8b 100644
--- a/innobase/rem/rem0cmp.c
+++ b/innobase/rem/rem0cmp.c
@@ -104,7 +104,9 @@ cmp_types_are_equal(
if ((type1->mtype == DATA_VARCHAR && type2->mtype == DATA_CHAR)
|| (type1->mtype == DATA_CHAR && type2->mtype == DATA_VARCHAR)
|| (type1->mtype == DATA_FIXBINARY && type2->mtype == DATA_BINARY)
- || (type1->mtype == DATA_BINARY && type2->mtype == DATA_FIXBINARY)) {
+ || (type1->mtype == DATA_BINARY && type2->mtype == DATA_FIXBINARY)
+ || (type1->mtype == DATA_MYSQL && type2->mtype == DATA_VARMYSQL)
+ || (type1->mtype == DATA_VARMYSQL && type2->mtype == DATA_MYSQL)) {
return(TRUE);
}
@@ -124,14 +126,9 @@ cmp_types_are_equal(
return(FALSE);
}
- if (type1->mtype == DATA_MYSQL
- || type1->mtype == DATA_VARMYSQL) {
+ if (type1->mtype == DATA_INT && type1->len != type2->len) {
- if ((type1->prtype & ~DATA_NOT_NULL)
- != (type2->prtype & ~DATA_NOT_NULL)) {
-
- return(FALSE);
- }
+ return(FALSE);
}
return(TRUE);
diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c
index ed4df08fcf3..2e6dde6db65 100644
--- a/innobase/row/row0ins.c
+++ b/innobase/row/row0ins.c
@@ -392,6 +392,19 @@ row_ins_foreign_delete_or_set_null(
node = thr->run_node;
+ ut_a(que_node_get_type(node) == QUE_NODE_UPDATE);
+
+ if (!node->is_delete) {
+ /* According to SQL-92 an UPDATE with respect to FOREIGN
+ KEY constraints is not semantically equivalent to a
+ DELETE + INSERT. Therefore we do not perform any action
+ here and consequently the child rows would be left
+ orphaned if we would let the UPDATE happen. Thus we return
+ an error. */
+
+ return(DB_ROW_IS_REFERENCED);
+ }
+
if (node->cascade_node == NULL) {
/* Extend our query graph by creating a child to current
update node. The child is used in the cascade or set null
@@ -609,7 +622,7 @@ the caller must have a shared latch on dict_foreign_key_check_lock. */
ulint
row_ins_check_foreign_constraint(
/*=============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ /* out: DB_SUCCESS,
DB_NO_REFERENCED_ROW,
or DB_ROW_IS_REFERENCED */
ibool check_ref,/* in: TRUE if we want to check that
@@ -627,6 +640,7 @@ row_ins_check_foreign_constraint(
dict_table_t* check_table;
dict_index_t* check_index;
ulint n_fields_cmp;
+ ibool timeout_expired;
rec_t* rec;
btr_pcur_t pcur;
ibool moved;
@@ -635,6 +649,7 @@ row_ins_check_foreign_constraint(
ulint i;
mtr_t mtr;
+run_again:
ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED));
if (thr_get_trx(thr)->check_foreigns == FALSE) {
@@ -682,7 +697,7 @@ row_ins_check_foreign_constraint(
if (err != DB_SUCCESS) {
- return(err);
+ goto do_possible_lock_wait;
}
}
@@ -727,6 +742,11 @@ row_ins_check_foreign_constraint(
if (!rec_get_deleted_flag(rec)) {
/* Found a matching record */
+/* printf(
+"FOREIGN: Found matching record from %s %s\n",
+ check_index->table_name, check_index->name);
+ rec_print(rec);
+*/
if (check_ref) {
err = DB_SUCCESS;
@@ -779,6 +799,22 @@ next_rec:
/* Restore old value */
dtuple_set_n_fields_cmp(entry, n_fields_cmp);
+do_possible_lock_wait:
+ if (err == DB_LOCK_WAIT) {
+ thr_get_trx(thr)->error_state = err;
+
+ que_thr_stop_for_mysql(thr);
+
+ timeout_expired = srv_suspend_mysql_thread(thr);
+
+ if (!timeout_expired) {
+
+ goto run_again;
+ }
+
+ err = DB_LOCK_WAIT_TIMEOUT;
+ }
+
return(err);
}
@@ -792,8 +828,7 @@ static
ulint
row_ins_check_foreign_constraints(
/*==============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- code */
+ /* out: DB_SUCCESS or error code */
dict_table_t* table, /* in: table */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry for index */
diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c
index d6c8d7ab412..e0737f53213 100644
--- a/innobase/row/row0mysql.c
+++ b/innobase/row/row0mysql.c
@@ -934,6 +934,7 @@ row_update_for_mysql(
ut_ad(!prebuilt->sql_stat_start);
que_thr_move_to_run_state_for_mysql(thr, trx);
+
run_again:
thr->run_node = node;
thr->prev_node = node;
@@ -998,7 +999,6 @@ row_update_cascade_for_mysql(
trx_t* trx;
trx = thr_get_trx(thr);
-
run_again:
thr->run_node = node;
thr->prev_node = node;
@@ -1131,6 +1131,35 @@ row_mysql_recover_tmp_table(
}
/*************************************************************************
+Locks the data dictionary exclusively for performing a table create
+operation. */
+
+void
+row_mysql_lock_data_dictionary(void)
+/*================================*/
+{
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks or lock waits can occur then in these operations */
+
+ rw_lock_x_lock(&(dict_foreign_key_check_lock));
+ mutex_enter(&(dict_sys->mutex));
+}
+
+/*************************************************************************
+Unlocks the data dictionary exclusively lock. */
+
+void
+row_mysql_unlock_data_dictionary(void)
+/*==================================*/
+{
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations */
+
+ mutex_exit(&(dict_sys->mutex));
+ rw_lock_x_unlock(&(dict_foreign_key_check_lock));
+}
+
+/*************************************************************************
Does a table creation operation for MySQL. If the name of the created
table ends to characters INNODB_MONITOR, then this also starts
printing of monitor output by the master thread. */
@@ -1150,6 +1179,7 @@ row_create_table_for_mysql(
ulint err;
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+ ut_ad(mutex_own(&(dict_sys->mutex)));
if (srv_created_new_raw || srv_force_recovery) {
fprintf(stderr,
@@ -1265,19 +1295,13 @@ row_create_table_for_mysql(
"to use this feature you must compile InnoDB with\n"
"UNIV_MEM_DEBUG defined in univ.i and the server must be\n"
"quiet because allocation from a mem heap is not protected\n"
- "by any semaphore.\n");
+ "by any semaphore.\n");
ut_a(mem_validate());
printf("Memory validated\n");
}
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- rw_lock_x_lock(&(dict_foreign_key_check_lock));
- mutex_enter(&(dict_sys->mutex));
-
heap = mem_heap_create(512);
trx->dict_operation = TRUE;
@@ -1327,9 +1351,6 @@ row_create_table_for_mysql(
trx->error_state = DB_SUCCESS;
}
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&(dict_foreign_key_check_lock));
-
que_graph_free((que_t*) que_node_get_parent(thr));
trx->op_info = (char *) "";
@@ -1356,6 +1377,7 @@ row_create_index_for_mysql(
ulint keywordlen;
ulint err;
+ ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
trx->op_info = (char *) "creating index";
@@ -1374,12 +1396,6 @@ row_create_index_for_mysql(
return(DB_SUCCESS);
}
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- rw_lock_x_lock(&(dict_foreign_key_check_lock));
- mutex_enter(&(dict_sys->mutex));
-
heap = mem_heap_create(512);
trx->dict_operation = TRUE;
@@ -1407,9 +1423,6 @@ row_create_index_for_mysql(
trx->error_state = DB_SUCCESS;
}
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&(dict_foreign_key_check_lock));
-
que_graph_free((que_t*) que_node_get_parent(thr));
trx->op_info = (char *) "";
@@ -1443,6 +1456,7 @@ row_table_add_foreign_constraints(
ulint keywordlen;
ulint err;
+ ut_ad(mutex_own(&(dict_sys->mutex)));
ut_a(sql_string);
trx->op_info = (char *) "adding foreign keys";
@@ -1461,12 +1475,6 @@ row_table_add_foreign_constraints(
return(DB_SUCCESS);
}
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- rw_lock_x_lock(&(dict_foreign_key_check_lock));
- mutex_enter(&(dict_sys->mutex));
-
trx->dict_operation = TRUE;
err = dict_create_foreign_constraints(trx, sql_string, name);
@@ -1488,9 +1496,6 @@ row_table_add_foreign_constraints(
trx->error_state = DB_SUCCESS;
}
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&(dict_foreign_key_check_lock));
-
return((int) err);
}
@@ -1922,6 +1927,13 @@ row_drop_table_for_mysql(
ut_a(0);
} else {
dict_table_remove_from_cache(table);
+
+ if (dict_load_table(name) != NULL) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error: dropping of table %s failed!\n", name);
+
+ }
}
funct_exit:
rw_lock_s_unlock(&(purge_sys->purge_is_running));
@@ -1979,6 +1991,7 @@ loop:
if (table->n_mysql_handles_opened > 0) {
mutex_exit(&(dict_sys->mutex));
+ rw_lock_x_unlock(&(dict_foreign_key_check_lock));
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -2409,6 +2422,14 @@ row_check_table_for_mysql(
index = dict_table_get_next_index(index);
}
+ /* We validate also the whole adaptive hash index for all tables
+ at every CHECK TABLE */
+
+ if (!btr_search_validate()) {
+
+ ret = DB_ERROR;
+ }
+
prebuilt->trx->op_info = (char *) "";
return(ret);
diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c
index 1dca017c349..60e057b816e 100644
--- a/innobase/row/row0purge.c
+++ b/innobase/row/row0purge.c
@@ -511,6 +511,14 @@ row_purge_parse_undo_rec(
clust_index = dict_table_get_first_index(node->table);
+ if (clust_index == NULL) {
+ /* The table was corrupt in the data dictionary */
+
+ rw_lock_x_unlock(&(purge_sys->purge_is_running));
+
+ return(FALSE);
+ }
+
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
node->heap);
diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c
index 631f238a72d..b84e55ca643 100644
--- a/innobase/row/row0umod.c
+++ b/innobase/row/row0umod.c
@@ -437,11 +437,12 @@ row_undo_mod_del_unmark_sec(
rec_sprintf(err_buf, 900, btr_pcur_get_rec(&pcur));
fprintf(stderr, "InnoDB: record %s\n", err_buf);
+ trx_print(err_buf, thr_get_trx(thr));
fprintf(stderr,
- "InnoDB: Make a detailed bug report and send it\n");
+ "%s\nInnoDB: Make a detailed bug report and send it\n",
+ err_buf);
fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
- trx_print(thr_get_trx(thr));
} else {
btr_cur = btr_pcur_get_btr_cur(&pcur);
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c
index 1d5319a182b..25c82f39da9 100644
--- a/innobase/row/row0upd.c
+++ b/innobase/row/row0upd.c
@@ -129,8 +129,7 @@ static
ulint
row_upd_check_references_constraints(
/*=================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or an error
- code */
+ /* out: DB_SUCCESS or an error code */
btr_pcur_t* pcur, /* in: cursor positioned on a record; NOTE: the
cursor position is lost in this function! */
dict_table_t* table, /* in: table in question */
@@ -626,7 +625,7 @@ row_upd_index_parse(
/*******************************************************************
Returns TRUE if ext_vec contains i. */
-UNIV_INLINE
+static
ibool
upd_ext_vec_contains(
/*=================*/
@@ -738,6 +737,7 @@ row_upd_build_difference_binary(
ulint n_diff;
ulint roll_ptr_pos;
ulint trx_id_pos;
+ ibool extern_bit;
ulint i;
/* This function is used only for a clustered index */
@@ -763,9 +763,10 @@ row_upd_build_difference_binary(
goto skip_compare;
}
+
+ extern_bit = rec_get_nth_field_extern_bit(rec, i);
- if (rec_get_nth_field_extern_bit(rec, i)
- != upd_ext_vec_contains(ext_vec, n_ext_vec, i)
+ if (extern_bit != upd_ext_vec_contains(ext_vec, n_ext_vec, i)
|| !dfield_data_is_binary_equal(dfield, len, data)) {
upd_field = upd_get_nth_field(update, n_diff);
@@ -1094,11 +1095,12 @@ row_upd_sec_index_entry(
rec_sprintf(err_buf, 900, rec);
fprintf(stderr, "InnoDB: record %s\n", err_buf);
+ trx_print(err_buf, thr_get_trx(thr));
+
fprintf(stderr,
- "InnoDB: Make a detailed bug report and send it\n");
+ "%s\nInnoDB: Make a detailed bug report and send it\n",
+ err_buf);
fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
-
- trx_print(thr_get_trx(thr));
} else {
/* Delete mark the old index record; it can already be
delete marked if we return after a lock wait in
@@ -1362,7 +1364,7 @@ ulint
row_upd_del_mark_clust_rec(
/*=======================*/
/* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
+ completed, else error code */
upd_node_t* node, /* in: row update node */
dict_index_t* index, /* in: clustered index */
que_thr_t* thr, /* in: query thread */
@@ -1381,8 +1383,6 @@ row_upd_del_mark_clust_rec(
pcur = node->pcur;
btr_cur = btr_pcur_get_btr_cur(pcur);
- ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
-
/* Store row because we have to build also the secondary index
entries */
@@ -1391,11 +1391,11 @@ row_upd_del_mark_clust_rec(
/* Mark the clustered index record deleted; we do not have to check
locks, because we assume that we have an x-lock on the record */
- err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur,
- TRUE, thr, mtr);
+ err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
+ btr_cur, TRUE, thr, mtr);
if (err == DB_SUCCESS && check_ref) {
- /* NOTE that the following call loses
- the position of pcur ! */
+ /* NOTE that the following call loses the position of pcur ! */
+
err = row_upd_check_references_constraints(pcur, index->table,
index, thr, mtr);
if (err != DB_SUCCESS) {
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
index 39f3566eac8..3efb82eb8eb 100644
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@@ -61,7 +61,7 @@ ulint srv_activity_count = 0;
ibool srv_lock_timeout_and_monitor_active = FALSE;
ibool srv_error_monitor_active = FALSE;
-char* srv_main_thread_op_info = (char *) "";
+char* srv_main_thread_op_info = (char*) "";
/* Server parameters which are read from the initfile */
@@ -238,15 +238,14 @@ ulint srv_n_rows_updated_old = 0;
ulint srv_n_rows_deleted_old = 0;
ulint srv_n_rows_read_old = 0;
-ibool srv_print_innodb_monitor = FALSE;
-ibool srv_print_innodb_lock_monitor = FALSE;
-ibool srv_print_innodb_tablespace_monitor = FALSE;
-
/*
Set the following to 0 if you want InnoDB to write messages on
stderr on startup/shutdown
*/
ibool srv_print_verbose_log = TRUE;
+ibool srv_print_innodb_monitor = FALSE;
+ibool srv_print_innodb_lock_monitor = FALSE;
+ibool srv_print_innodb_tablespace_monitor = FALSE;
ibool srv_print_innodb_table_monitor = FALSE;
/* The parameters below are obsolete: */
@@ -278,6 +277,10 @@ i/o handler thread */
char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
+time_t srv_last_monitor_time;
+
+mutex_t srv_innodb_monitor_mutex;
+
/*
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
=========================================
@@ -645,7 +648,7 @@ srv_release_threads(
slot = srv_table_get_nth_slot(i);
- if ((slot->type == type) && slot->suspended) {
+ if (slot->in_use && slot->type == type && slot->suspended) {
slot->suspended = FALSE;
@@ -987,7 +990,6 @@ srv_communication_init(
/*************************************************************************
Implements the recovery utility. */
-#ifdef NOT_USED
static
ulint
srv_recovery_thread(
@@ -1025,7 +1027,7 @@ srv_recovery_thread(
return(0);
}
-#endif
+
/*************************************************************************
Implements the purge utility. */
@@ -1077,7 +1079,6 @@ srv_create_utility_threads(void)
/*************************************************************************
Implements the communication threads. */
-#ifdef NOT_USED
static
ulint
srv_com_thread(
@@ -1125,7 +1126,7 @@ srv_com_thread(
return(0);
}
-#endif
+
/*************************************************************************
Creates the communication threads. */
@@ -1147,7 +1148,6 @@ srv_create_com_threads(void)
/*************************************************************************
Implements the worker threads. */
-#ifdef NOT_USED
static
ulint
srv_worker_thread(
@@ -1190,7 +1190,7 @@ srv_worker_thread(
return(0);
}
-#endif
+
/*************************************************************************
Creates the worker threads. */
@@ -1625,13 +1625,16 @@ srv_init(void)
kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
mutex_create(&kernel_mutex);
mutex_set_level(&kernel_mutex, SYNC_KERNEL);
+
+ mutex_create(&srv_innodb_monitor_mutex);
+ mutex_set_level(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_table_get_nth_slot(i);
slot->in_use = FALSE;
- slot->type=0; /* Avoid purify errors */
+ slot->type=0; /* Avoid purify errors */
slot->event = os_event_create(NULL);
ut_a(slot->event);
}
@@ -1641,6 +1644,7 @@ srv_init(void)
for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_mysql_table + i;
slot->in_use = FALSE;
+ slot->type = 0;
slot->event = os_event_create(NULL);
ut_a(slot->event);
}
@@ -1900,7 +1904,6 @@ srv_conc_exit_innodb(
trx_t* trx) /* in: transaction object associated with the
thread */
{
-
if (srv_thread_concurrency >= 500) {
return;
@@ -2004,7 +2007,31 @@ srv_table_reserve_slot_for_mysql(void)
while (slot->in_use) {
i++;
- ut_a(i < OS_THREAD_MAX_N);
+
+ if (i >= OS_THREAD_MAX_N) {
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: There appear to be %lu MySQL threads currently waiting\n"
+"InnoDB: inside InnoDB, which is the upper limit. Cannot continue operation.\n"
+"InnoDB: We intentionally generate a seg fault to print a stack trace\n"
+"InnoDB: on Linux. But first we print a list of waiting threads.\n", i);
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = srv_mysql_table + i;
+
+ fprintf(stderr,
+"Slot %lu: thread id %lu, type %lu, in use %lu, susp %lu, time %lu\n",
+ i, (ulint)(slot->id),
+ slot->type, slot->in_use,
+ slot->suspended,
+ (ulint)difftime(ut_time(), slot->suspend_time));
+ }
+
+ ut_a(0);
+ }
slot = srv_mysql_table + i;
}
@@ -2141,104 +2168,113 @@ srv_release_mysql_thread_if_suspended(
/* not found */
}
-/*************************************************************************
-A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors. */
+/**********************************************************************
+Sprintfs to a buffer the output of the InnoDB Monitor. */
-#ifndef __WIN__
-void*
-#else
-ulint
-#endif
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
- /* out: a dummy parameter */
- void* arg) /* in: a dummy parameter required by
- os_thread_create */
+void
+srv_sprintf_innodb_monitor(
+/*=======================*/
+ char* buf, /* in/out: buffer which must be at least 4 kB */
+ ulint len) /* in: length of the buffer */
{
- srv_slot_t* slot;
- double time_elapsed;
- time_t current_time;
- time_t last_monitor_time;
- time_t last_table_monitor_time;
- ibool some_waits;
- double wait_time;
- ulint i;
-
- UT_NOT_USED(arg);
- last_monitor_time = time(NULL);
- last_table_monitor_time = time(NULL);
-loop:
- srv_lock_timeout_and_monitor_active = TRUE;
+ char* buf_end = buf + len - 2000;
+ double time_elapsed;
+ time_t current_time;
- /* When someone is waiting for a lock, we wake up every second
- and check if a timeout has passed for a lock wait */
+ mutex_enter(&srv_innodb_monitor_mutex);
- os_thread_sleep(1000000);
+ current_time = time(NULL);
- /* In case mutex_exit is not a memory barrier, it is
- theoretically possible some threads are left waiting though
- the semaphore is already released. Wake up those threads: */
+ /* We add 0.001 seconds to time_elapsed to prevent division
+ by zero if two users happen to call SHOW INNODB STATUS at the same
+ time */
- sync_arr_wake_threads_if_sema_free();
+ time_elapsed = difftime(current_time, srv_last_monitor_time)
+ + 0.001;
- current_time = time(NULL);
+ srv_last_monitor_time = time(NULL);
- time_elapsed = difftime(current_time, last_monitor_time);
-
- if (time_elapsed > 15) {
+ ut_a(len >= 4096);
- if (srv_print_innodb_monitor) {
+ buf += sprintf(buf, "\n=====================================\n");
- last_monitor_time = time(NULL);
-
- printf("=====================================\n");
- ut_print_timestamp(stdout);
+ ut_sprintf_timestamp(buf);
+ buf = buf + strlen(buf);
- printf(" INNODB MONITOR OUTPUT\n"
+ buf += sprintf(buf, " INNODB MONITOR OUTPUT\n"
"=====================================\n");
- printf("----------\n"
+
+ buf += sprintf(buf,
+"Per second values calculated from the last %lu seconds\n",
+ (ulint)time_elapsed);
+
+ buf += sprintf(buf, "----------\n"
"SEMAPHORES\n"
"----------\n");
- sync_print();
- printf("------------\n"
+ sync_print(buf, buf_end);
+
+ buf = buf + strlen(buf);
+
+ buf += sprintf(buf, "------------\n"
"TRANSACTIONS\n"
"------------\n");
- lock_print_info();
- printf("--------\n"
+ lock_print_info(buf, buf_end);
+ buf = buf + strlen(buf);
+
+ buf += sprintf(buf, "--------\n"
"FILE I/O\n"
"--------\n");
- os_aio_print();
- printf("-------------\n"
- "INSERT BUFFER\n"
- "-------------\n");
- ibuf_print();
- printf("---\n"
+ os_aio_print(buf, buf_end);
+ buf = buf + strlen(buf);
+
+ buf += sprintf(buf, "-------------------------------------\n"
+ "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
+ "-------------------------------------\n");
+ ibuf_print(buf, buf_end);
+ buf = buf + strlen(buf);
+
+ ha_print_info(buf, buf_end, btr_search_sys->hash_index);
+ buf = buf + strlen(buf);
+
+ buf += sprintf(buf,
+ "%.2f hash searches/s, %.2f non-hash searches/s\n",
+ (btr_cur_n_sea - btr_cur_n_sea_old)
+ / time_elapsed,
+ (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
+ / time_elapsed);
+ btr_cur_n_sea_old = btr_cur_n_sea;
+ btr_cur_n_non_sea_old = btr_cur_n_non_sea;
+
+ buf += sprintf(buf,"---\n"
"LOG\n"
"---\n");
- log_print();
- printf("----------------------\n"
+ log_print(buf, buf_end);
+ buf = buf + strlen(buf);
+
+ buf += sprintf(buf, "----------------------\n"
"BUFFER POOL AND MEMORY\n"
"----------------------\n");
- printf(
+ buf += sprintf(buf,
"Total memory allocated %lu; in additional pool allocated %lu\n",
ut_total_allocated_memory,
mem_pool_get_reserved(mem_comm_pool));
- buf_print_io();
- printf("--------------\n"
+ buf_print_io(buf, buf_end);
+ buf = buf + strlen(buf);
+
+ buf += sprintf(buf, "--------------\n"
"ROW OPERATIONS\n"
"--------------\n");
- printf(
+ buf += sprintf(buf,
"%ld queries inside InnoDB, %ld queries in queue; main thread: %s\n",
srv_conc_n_threads, srv_conc_n_waiting_threads,
srv_main_thread_op_info);
- printf(
+ buf += sprintf(buf,
"Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n",
srv_n_rows_inserted,
srv_n_rows_updated,
srv_n_rows_deleted,
srv_n_rows_read);
- printf(
+ buf += sprintf(buf,
"%.2f inserts/s, %.2f updates/s, %.2f deletes/s, %.2f reads/s\n",
(srv_n_rows_inserted - srv_n_rows_inserted_old)
/ time_elapsed,
@@ -2254,9 +2290,71 @@ loop:
srv_n_rows_deleted_old = srv_n_rows_deleted;
srv_n_rows_read_old = srv_n_rows_read;
- printf("----------------------------\n"
+ buf += sprintf(buf, "----------------------------\n"
"END OF INNODB MONITOR OUTPUT\n"
"============================\n");
+ mutex_exit(&srv_innodb_monitor_mutex);
+}
+
+/*************************************************************************
+A thread which wakes up threads whose lock wait may have lasted too long.
+This also prints the info output by various InnoDB monitors. */
+
+#ifndef __WIN__
+void*
+#else
+ulint
+#endif
+srv_lock_timeout_and_monitor_thread(
+/*================================*/
+ /* out: a dummy parameter */
+ void* arg) /* in: a dummy parameter required by
+ os_thread_create */
+{
+ srv_slot_t* slot;
+ double time_elapsed;
+ time_t current_time;
+ time_t last_table_monitor_time;
+ time_t last_monitor_time;
+ ibool some_waits;
+ double wait_time;
+ char* buf;
+ ulint i;
+
+ UT_NOT_USED(arg);
+ srv_last_monitor_time = time(NULL);
+ last_table_monitor_time = time(NULL);
+ last_monitor_time = time(NULL);
+loop:
+ srv_lock_timeout_and_monitor_active = TRUE;
+
+ /* When someone is waiting for a lock, we wake up every second
+ and check if a timeout has passed for a lock wait */
+
+ os_thread_sleep(1000000);
+
+ /* In case mutex_exit is not a memory barrier, it is
+ theoretically possible some threads are left waiting though
+ the semaphore is already released. Wake up those threads: */
+
+ sync_arr_wake_threads_if_sema_free();
+
+ current_time = time(NULL);
+
+ time_elapsed = difftime(current_time, last_monitor_time);
+
+ if (time_elapsed > 15) {
+ last_monitor_time = time(NULL);
+
+ if (srv_print_innodb_monitor) {
+
+ buf = mem_alloc(100000);
+
+ srv_sprintf_innodb_monitor(buf, 100000);
+
+ printf("%s", buf);
+
+ mem_free(buf);
}
if (srv_print_innodb_tablespace_monitor
@@ -2491,7 +2589,7 @@ srv_master_thread(
os_event_set(srv_sys->operational);
loop:
- srv_main_thread_op_info = (char *) "reserving kernel mutex";
+ srv_main_thread_op_info = (char*) "reserving kernel mutex";
n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read
+ buf_pool->n_pages_written;
@@ -2507,18 +2605,19 @@ loop:
for (i = 0; i < 10; i++) {
n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
+ buf_pool->n_pages_written;
- srv_main_thread_op_info = (char *) "sleeping";
+ srv_main_thread_op_info = (char*)"sleeping";
os_thread_sleep(1000000);
/* ALTER TABLE in MySQL requires on Unix that the table handler
can drop tables lazily after there no longer are SELECT
queries to them. */
- srv_main_thread_op_info = (char*) "doing background drop tables";
+ srv_main_thread_op_info =
+ (char*)"doing background drop tables";
row_drop_tables_for_mysql_in_background();
- srv_main_thread_op_info = (char*) "";
+ srv_main_thread_op_info = (char*)"";
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
@@ -2529,8 +2628,9 @@ loop:
is issued or the we have specified in my.cnf no flush
at transaction commit */
- srv_main_thread_op_info = (char *) "flushing log";
+ srv_main_thread_op_info = (char*)"flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
/* If there were less than 10 i/os during the
one second sleep, we assume that there is free
@@ -2543,11 +2643,13 @@ loop:
+ buf_pool->n_pages_written;
if (n_pend_ios < 3 && (n_ios - n_ios_old < 10)) {
srv_main_thread_op_info =
- (char *) "doing insert buffer merge";
+ (char*)"doing insert buffer merge";
ibuf_contract_for_n_pages(TRUE, 5);
- srv_main_thread_op_info = (char *) "flushing log";
+ srv_main_thread_op_info =
+ (char*)"flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
}
if (srv_fast_shutdown && srv_shutdown_state > 0) {
@@ -2583,21 +2685,23 @@ loop:
+ buf_pool->n_pages_written;
if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
- srv_main_thread_op_info =(char *) "flushing buffer pool pages";
+ srv_main_thread_op_info = (char*) "flushing buffer pool pages";
buf_flush_batch(BUF_FLUSH_LIST, 50, ut_dulint_max);
- srv_main_thread_op_info = (char *) "flushing log";
+ srv_main_thread_op_info = (char*) "flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
}
/* We run a batch of insert buffer merge every 10 seconds,
even if the server were active */
- srv_main_thread_op_info = (char *) "doing insert buffer merge";
+ srv_main_thread_op_info = (char*)"doing insert buffer merge";
ibuf_contract_for_n_pages(TRUE, 5);
- srv_main_thread_op_info = (char *) "flushing log";
+ srv_main_thread_op_info = (char*)"flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
/* We run a full purge every 10 seconds, even if the server
were active */
@@ -2613,15 +2717,16 @@ loop:
goto background_loop;
}
- srv_main_thread_op_info = (char *) "purging";
+ srv_main_thread_op_info = (char*)"purging";
n_pages_purged = trx_purge();
current_time = time(NULL);
if (difftime(current_time, last_flush_time) > 1) {
- srv_main_thread_op_info = (char *) "flushing log";
+ srv_main_thread_op_info = (char*) "flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
last_flush_time = current_time;
}
}
@@ -2630,25 +2735,25 @@ background_loop:
/* In this loop we run background operations when the server
is quiet and we also come here about once in 10 seconds */
- srv_main_thread_op_info = (char*) "doing background drop tables";
+ srv_main_thread_op_info = (char*)"doing background drop tables";
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
- srv_main_thread_op_info = (char*) "";
+ srv_main_thread_op_info = (char*)"";
- srv_main_thread_op_info = (char*) "flushing buffer pool pages";
+ srv_main_thread_op_info = (char*)"flushing buffer pool pages";
/* Flush a few oldest pages to make the checkpoint younger */
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, ut_dulint_max);
- srv_main_thread_op_info = (char*) "making checkpoint";
+ srv_main_thread_op_info = (char*)"making checkpoint";
/* Make a new checkpoint about once in 10 seconds */
log_checkpoint(TRUE, FALSE);
- srv_main_thread_op_info = (char *) "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2661,11 +2766,11 @@ background_loop:
/* The server has been quiet for a while: start running background
operations */
- srv_main_thread_op_info = (char *) "purging";
+ srv_main_thread_op_info = (char*)"purging";
n_pages_purged = trx_purge();
- srv_main_thread_op_info = (char *) "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2674,10 +2779,10 @@ background_loop:
}
mutex_exit(&kernel_mutex);
- srv_main_thread_op_info = (char *) "doing insert buffer merge";
+ srv_main_thread_op_info = (char*)"doing insert buffer merge";
n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
- srv_main_thread_op_info = (char *) "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2686,10 +2791,10 @@ background_loop:
}
mutex_exit(&kernel_mutex);
- srv_main_thread_op_info = (char *) "flushing buffer pool pages";
+ srv_main_thread_op_info = (char*)"flushing buffer pool pages";
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
- srv_main_thread_op_info = (char *) "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2698,15 +2803,14 @@ background_loop:
}
mutex_exit(&kernel_mutex);
- srv_main_thread_op_info =
- (char *) "waiting for buffer pool flush to end";
+ srv_main_thread_op_info = (char*) "waiting for buffer pool flush to end";
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
- srv_main_thread_op_info = (char *) "making checkpoint";
+ srv_main_thread_op_info = (char*)"making checkpoint";
log_checkpoint(TRUE, FALSE);
- srv_main_thread_op_info = (char *) "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2716,7 +2820,7 @@ background_loop:
mutex_exit(&kernel_mutex);
srv_main_thread_op_info =
- (char *) "archiving log (if log archive is on)";
+ (char*)"archiving log (if log archive is on)";
log_archive_do(FALSE, &n_bytes_archived);
@@ -2742,7 +2846,7 @@ background_loop:
master thread to wait for more server activity */
suspend_thread:
- srv_main_thread_op_info = (char *) "suspending";
+ srv_main_thread_op_info = (char*)"suspending";
mutex_enter(&kernel_mutex);
@@ -2756,7 +2860,7 @@ suspend_thread:
mutex_exit(&kernel_mutex);
- srv_main_thread_op_info = (char *) "waiting for server activity";
+ srv_main_thread_op_info = (char*)"waiting for server activity";
os_event_wait(event);
diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c
index f5a0c62aaf9..55e734be3bd 100644
--- a/innobase/srv/srv0start.c
+++ b/innobase/srv/srv0start.c
@@ -433,8 +433,7 @@ Normalizes a directory path for Windows: converts slashes to backslashes. */
void
srv_normalize_path_for_win(
/*=======================*/
- char* str __attribute__((unused)))
- /* in/out: null-terminated character string */
+ char* str __attribute__((unused))) /* in/out: null-terminated character string */
{
#ifdef __WIN__
ulint i;
@@ -619,8 +618,7 @@ open_or_create_log_file(
if (k == 0 && i == 0) {
arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
- fil_space_create((char *) "arch_log_space", arch_space_id,
- FIL_LOG);
+ fil_space_create((char*) "arch_log_space", arch_space_id, FIL_LOG);
} else {
arch_space_id = ULINT_UNDEFINED;
}
@@ -839,7 +837,6 @@ open_or_create_data_files(
/*********************************************************************
This thread is used to measure contention of latches. */
-#ifdef NOT_USED
static
ulint
test_measure_cont(
@@ -899,7 +896,7 @@ test_measure_cont(
"Mutex res. l %lu, p %lu, k %lu s x %lu s s %lu s mut %lu of %lu\n",
lcount, pcount, kcount, s_xcount, s_scount, s_mcount, j);
- sync_print_wait_info();
+/* sync_print_wait_info(); */
fprintf(stderr,
"log i/o %lu n non sea %lu n succ %lu n h fail %lu\n",
@@ -909,7 +906,7 @@ test_measure_cont(
return(0);
}
-#endif
+
/********************************************************************
Starts InnoDB and creates a new database if database files
are not found and the user wants. Server parameters are
@@ -935,27 +932,44 @@ innobase_start_or_create_for_mysql(void)
ulint k;
mtr_t mtr;
+#ifdef UNIV_DEBUG
+ fprintf(stderr,
+"InnoDB: !!!!!!!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!!!!!!!\n");
+#endif
+
+#ifdef UNIV_SYNC_DEBUG
+ fprintf(stderr,
+"InnoDB: !!!!!!!!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!!!!!!!\n");
+#endif
+
+#ifdef UNIV_SEARCH_DEBUG
+ fprintf(stderr,
+"InnoDB: !!!!!!!!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!!!!!!!\n");
+#endif
+
+#ifdef UNIV_MEM_DEBUG
+ fprintf(stderr,
+"InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n");
+#endif
+
log_do_write = TRUE;
/* yydebug = TRUE; */
srv_is_being_started = TRUE;
srv_startup_is_before_trx_rollback_phase = TRUE;
- if (0 == ut_strcmp(srv_unix_file_flush_method_str,
- (char *) "fdatasync")) {
- srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
+ if (0 == ut_strcmp(srv_unix_file_flush_method_str, "fdatasync")) {
+ srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
- } else if (0 == ut_strcmp(srv_unix_file_flush_method_str,
- (char *) "O_DSYNC")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
+ } else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "O_DSYNC")) {
+ srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str,
- (char *) "littlesync")) {
- srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
+ "littlesync")) {
+ srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
- } else if (0 == ut_strcmp(srv_unix_file_flush_method_str,
- (char *) "nosync")) {
- srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
+ } else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "nosync")) {
+ srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
} else {
fprintf(stderr,
"InnoDB: Unrecognized value %s for innodb_flush_method\n",
@@ -1005,7 +1019,7 @@ innobase_start_or_create_for_mysql(void)
os_aio_use_native_aio = FALSE;
if (!os_aio_use_native_aio) {
- os_aio_init(4 * SRV_N_PENDING_IOS_PER_THREAD
+ os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
* srv_n_file_io_threads,
srv_n_file_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS);
@@ -1350,9 +1364,15 @@ innobase_shutdown_for_mysql(void)
"InnoDB: inside InnoDB at shutdown\n",
srv_conc_n_threads);
}
-
+
+ /*
+ TODO: We should exit the i/o-handler and other utility threads
+ before freeing all memory. Now this can potentially cause a seg
+ fault!
+ */
#ifdef NOT_WORKING_YET
- ut_free_all_mem();
-#endif
+ ut_free_all_mem();
+#endif
+
return((int) DB_SUCCESS);
}
diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
index 4487fa72995..36dd8a7d80c 100644
--- a/innobase/sync/sync0arr.c
+++ b/innobase/sync/sync0arr.c
@@ -441,7 +441,8 @@ static
void
sync_array_cell_print(
/*==================*/
- FILE* file, /* in: file where to print */
+ char* buf, /* in: buffer where to print, must be
+ at least 400 characters */
sync_cell_t* cell) /* in: sync cell */
{
mutex_t* mutex;
@@ -451,7 +452,7 @@ sync_array_cell_print(
type = cell->request_type;
- fprintf(file,
+ buf += sprintf(buf,
"--Thread %lu has waited at %s line %lu for %.2f seconds the semaphore:\n",
(ulint)cell->thread, cell->file, cell->line,
difftime(time(NULL), cell->reservation_time));
@@ -461,54 +462,58 @@ sync_array_cell_print(
been freed meanwhile */
mutex = cell->old_wait_mutex;
- fprintf(file,
+ buf += sprintf(buf,
"Mutex at %lx created file %s line %lu, lock var %lu\n",
(ulint)mutex, mutex->cfile_name, mutex->cline,
mutex->lock_word);
- fprintf(file,
+ buf += sprintf(buf,
"Last time reserved in file %s line %lu, waiters flag %lu\n",
mutex->file_name, mutex->line, mutex->waiters);
} else if (type == RW_LOCK_EX || type == RW_LOCK_SHARED) {
if (type == RW_LOCK_EX) {
- fprintf(file, "X-lock on");
+ buf += sprintf(buf, "X-lock on");
} else {
- fprintf(file, "S-lock on");
+ buf += sprintf(buf, "S-lock on");
}
rwlock = cell->old_wait_rw_lock;
- fprintf(file, " RW-latch at %lx created in file %s line %lu\n",
+ buf += sprintf(buf,
+ " RW-latch at %lx created in file %s line %lu\n",
(ulint)rwlock, rwlock->cfile_name, rwlock->cline);
if (rwlock->writer != RW_LOCK_NOT_LOCKED) {
- fprintf(file,
+ buf += sprintf(buf,
"a writer (thread id %lu) has reserved it in mode",
(ulint)rwlock->writer_thread);
if (rwlock->writer == RW_LOCK_EX) {
- fprintf(file, " exclusive\n");
+ buf += sprintf(buf, " exclusive\n");
} else {
- fprintf(file, " wait exclusive\n");
+ buf += sprintf(buf, " wait exclusive\n");
}
}
- fprintf(file, "number of readers %lu, waiters flag %lu\n",
+ buf += sprintf(buf,
+ "number of readers %lu, waiters flag %lu\n",
rwlock->reader_count, rwlock->waiters);
- fprintf(file, "Last time read locked in file %s line %lu\n",
+ buf += sprintf(buf,
+ "Last time read locked in file %s line %lu\n",
rwlock->last_s_file_name, rwlock->last_s_line);
- fprintf(file, "Last time write locked in file %s line %lu\n",
+ buf += sprintf(buf,
+ "Last time write locked in file %s line %lu\n",
rwlock->last_x_file_name, rwlock->last_x_line);
} else {
ut_error;
}
if (!cell->waiting) {
- fprintf(file, "wait has ended\n");
+ buf += sprintf(buf, "wait has ended\n");
}
if (cell->event_set) {
- fprintf(file, "wait is ending\n");
+ buf += sprintf(buf, "wait is ending\n");
}
}
@@ -610,6 +615,7 @@ sync_array_detect_deadlock(
os_thread_id_t thread;
ibool ret;
rw_lock_debug_t* debug;
+ char buf[500];
ut_a(arr && start && cell);
ut_ad(cell->wait_object);
@@ -642,11 +648,12 @@ sync_array_detect_deadlock(
ret = sync_array_deadlock_step(arr, start, thread, 0,
depth);
if (ret) {
+ sync_array_cell_print(buf, cell);
printf(
- "Mutex %lx owned by thread %lu file %s line %lu\n",
+ "Mutex %lx owned by thread %lu file %s line %lu\n%s",
(ulint)mutex, mutex->thread_id,
- mutex->file_name, mutex->line);
- sync_array_cell_print(stdout, cell);
+ mutex->file_name, mutex->line,
+ buf);
return(TRUE);
}
}
@@ -678,9 +685,9 @@ sync_array_detect_deadlock(
debug->pass,
depth);
if (ret) {
- printf("rw-lock %lx ", (ulint) lock);
+ sync_array_cell_print(buf, cell);
+ printf("rw-lock %lx %s ", (ulint) lock, buf);
rw_lock_debug_print(debug);
- sync_array_cell_print(stdout, cell);
return(TRUE);
}
@@ -711,9 +718,9 @@ sync_array_detect_deadlock(
debug->pass,
depth);
if (ret) {
- printf("rw-lock %lx ", (ulint) lock);
+ sync_array_cell_print(buf, cell);
+ printf("rw-lock %lx %s ", (ulint) lock, buf);
rw_lock_debug_print(debug);
- sync_array_cell_print(stdout, cell);
return(TRUE);
}
@@ -898,6 +905,7 @@ sync_array_print_long_waits(void)
sync_cell_t* cell;
ibool old_val;
ibool noticed = FALSE;
+ char buf[500];
ulint i;
for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
@@ -907,9 +915,10 @@ sync_array_print_long_waits(void)
if (cell->wait_object != NULL
&& difftime(time(NULL), cell->reservation_time) > 240) {
+ sync_array_cell_print(buf, cell);
+
fprintf(stderr,
- "InnoDB: Warning: a long semaphore wait:\n");
- sync_array_cell_print(stderr, cell);
+ "InnoDB: Warning: a long semaphore wait:\n%s", buf);
noticed = TRUE;
}
@@ -948,6 +957,8 @@ static
void
sync_array_output_info(
/*===================*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end,/* in: buffer end */
sync_array_t* arr) /* in: wait array; NOTE! caller must own the
mutex */
{
@@ -955,18 +966,29 @@ sync_array_output_info(
ulint count;
ulint i;
- printf("OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
+ if (buf_end - buf < 500) {
+ return;
+ }
+
+ buf += sprintf(buf,
+ "OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
arr->res_count, arr->sg_count);
i = 0;
count = 0;
while (count < arr->n_reserved) {
+ if (buf_end - buf < 500) {
+ return;
+ }
+
cell = sync_array_get_nth_cell(arr, i);
if (cell->wait_object != NULL) {
count++;
- sync_array_cell_print(stdout, cell);
+ sync_array_cell_print(buf, cell);
+
+ buf = buf + strlen(buf);
}
i++;
@@ -979,11 +1001,13 @@ Prints info of the wait array. */
void
sync_array_print_info(
/*==================*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end,/* in: buffer end */
sync_array_t* arr) /* in: wait array */
{
sync_array_enter(arr);
- sync_array_output_info(arr);
+ sync_array_output_info(buf, buf_end, arr);
sync_array_exit(arr);
}
diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c
index 144ed263db9..14a2a6f8cc1 100644
--- a/innobase/sync/sync0sync.c
+++ b/innobase/sync/sync0sync.c
@@ -874,6 +874,7 @@ sync_thread_levels_empty_gen(
sync_level_t* slot;
rw_lock_t* lock;
mutex_t* mutex;
+ char* buf;
ulint i;
if (!sync_order_checks_on) {
@@ -907,7 +908,9 @@ sync_thread_levels_empty_gen(
mutex = slot->latch;
mutex_exit(&sync_thread_mutex);
- sync_print();
+ buf = mem_alloc(20000);
+
+ sync_print(buf, buf + 18000);
ut_error;
return(FALSE);
@@ -1243,14 +1246,21 @@ sync_close(void)
Prints wait info of the sync system. */
void
-sync_print_wait_info(void)
-/*======================*/
+sync_print_wait_info(
+/*=================*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end) /* in: buffer end */
{
#ifdef UNIV_SYNC_DEBUG
printf("Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
#endif
- printf(
+ if (buf_end - buf < 500) {
+
+ return;
+ }
+
+ sprintf(buf,
"Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
"RW-shared spins %lu, OS waits %lu; RW-excl spins %lu, OS waits %lu\n",
mutex_spin_wait_count, mutex_spin_round_count,
@@ -1263,11 +1273,18 @@ sync_print_wait_info(void)
Prints info of the sync system. */
void
-sync_print(void)
-/*============*/
+sync_print(
+/*=======*/
+ char* buf, /* in/out: buffer where to print */
+ char* buf_end) /* in: buffer end */
{
mutex_list_print_info();
+
rw_lock_list_print_info();
- sync_array_print_info(sync_primary_wait_array);
- sync_print_wait_info();
+
+ sync_array_print_info(buf, buf_end, sync_primary_wait_array);
+
+ buf = buf + strlen(buf);
+
+ sync_print_wait_info(buf, buf_end);
}
diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c
index 97cc2dbff1a..4c2ee5dc9be 100644
--- a/innobase/trx/trx0roll.c
+++ b/innobase/trx/trx0roll.c
@@ -160,11 +160,13 @@ trx_rollback_last_sql_stat_for_mysql(
}
/***********************************************************************
-Rollback uncommitted transactions which have no user session. */
+Rollback or clean up transactions which have no user session. If the
+transaction already was committed, then we clean up a possible insert
+undo log. If the transaction was not yet committed, then we roll it back. */
void
-trx_rollback_all_without_sess(void)
-/*===============================*/
+trx_rollback_or_clean_all_without_sess(void)
+/*========================================*/
{
mem_heap_t* heap;
que_fork_t* fork;
@@ -217,6 +219,19 @@ loop:
trx->sess = trx_dummy_sess;
+ if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
+
+ fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n",
+ ut_dulint_get_high(trx->id),
+ ut_dulint_get_low(trx->id));
+
+ trx_cleanup_at_db_startup(trx);
+
+ mem_heap_free(heap);
+
+ goto loop;
+ }
+
fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
fork->trx = trx;
@@ -264,9 +279,17 @@ loop:
/* If the transaction was for a dictionary operation, we
drop the relevant table, if it still exists */
+ fprintf(stderr,
+"InnoDB: Dropping table with id %lu %lu in recovery if it exists\n",
+ ut_dulint_get_high(trx->table_id),
+ ut_dulint_get_low(trx->table_id));
+
table = dict_table_get_on_id_low(trx->table_id, trx);
if (table) {
+ fprintf(stderr,
+"InnoDB: Table found: dropping table %s in recovery\n", table->name);
+
err = row_drop_table_for_mysql(table->name, trx,
TRUE);
ut_a(err == (int) DB_SUCCESS);
diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c
index 32a1db48488..675cdf1b7e4 100644
--- a/innobase/trx/trx0sys.c
+++ b/innobase/trx/trx0sys.c
@@ -26,6 +26,14 @@ Created 3/26/1996 Heikki Tuuri
trx_sys_t* trx_sys = NULL;
trx_doublewrite_t* trx_doublewrite = NULL;
+/* In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. We have successfully got the updates to InnoDB
+up to this position. If .._pos is -1, it means no crash recovery was needed,
+or there was no master log position info inside InnoDB. */
+
+char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
+ib_longlong trx_sys_mysql_master_log_pos = -1;
+
/********************************************************************
Determines if a page number is located inside the doublewrite buffer. */
@@ -427,75 +435,62 @@ trx_sys_flush_max_trx_id(void)
/*********************************************************************
Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. */
+which corresponds to the transaction just being committed. In a MySQL
+replication slave updates the latest master binlog position up to which
+replication has proceeded. */
void
trx_sys_update_mysql_binlog_offset(
/*===============================*/
- trx_t* trx, /* in: transaction being committed */
- mtr_t* mtr) /* in: mtr */
+ char* file_name,/* in: MySQL log file name */
+ ib_longlong offset, /* in: position in that log file */
+ ulint field, /* in: offset of the MySQL log info field in
+ the trx sys header */
+ mtr_t* mtr) /* in: mtr */
{
trx_sysf_t* sys_header;
- char namebuf[TRX_SYS_MYSQL_LOG_NAME_LEN];
-
- ut_ad(trx->mysql_log_file_name);
- memset(namebuf, ' ', TRX_SYS_MYSQL_LOG_NAME_LEN - 1);
- namebuf[TRX_SYS_MYSQL_LOG_NAME_LEN - 1] = '\0';
+ if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
- /* Copy the whole MySQL log file name to the buffer, or only the
- last characters, if it does not fit */
+ /* We cannot fit the name to the 512 bytes we have reserved */
- if (ut_strlen(trx->mysql_log_file_name)
- > TRX_SYS_MYSQL_LOG_NAME_LEN - 1) {
- ut_memcpy(namebuf, trx->mysql_log_file_name
- + ut_strlen(trx->mysql_log_file_name)
- - (TRX_SYS_MYSQL_LOG_NAME_LEN - 1),
- TRX_SYS_MYSQL_LOG_NAME_LEN - 1);
- } else {
- ut_memcpy(namebuf, trx->mysql_log_file_name,
- 1 + ut_strlen(trx->mysql_log_file_name));
+ return;
}
- namebuf[TRX_SYS_MYSQL_LOG_NAME_LEN - 1] = '\0';
-
sys_header = trx_sysf_get(mtr);
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ if (mach_read_from_4(sys_header + field
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
- mlog_write_ulint(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ mlog_write_ulint(sys_header + field
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
TRX_SYS_MYSQL_LOG_MAGIC_N,
MLOG_4BYTES, mtr);
}
- if (0 != ut_memcmp(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME,
- namebuf, TRX_SYS_MYSQL_LOG_NAME_LEN)) {
+ if (0 != ut_memcmp(sys_header + field + TRX_SYS_MYSQL_LOG_NAME,
+ file_name, 1 + ut_strlen(file_name))) {
- mlog_write_string(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ mlog_write_string(sys_header + field
+ TRX_SYS_MYSQL_LOG_NAME,
- namebuf, TRX_SYS_MYSQL_LOG_NAME_LEN, mtr);
+ file_name, 1 + ut_strlen(file_name), mtr);
}
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ if (mach_read_from_4(sys_header + field
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
- || (trx->mysql_log_offset >> 32) > 0) {
+ || (offset >> 32) > 0) {
- mlog_write_ulint(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ mlog_write_ulint(sys_header + field
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
- (ulint)(trx->mysql_log_offset >> 32),
+ (ulint)(offset >> 32),
MLOG_4BYTES, mtr);
}
- mlog_write_ulint(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ mlog_write_ulint(sys_header + field
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW,
- (ulint)(trx->mysql_log_offset & 0xFFFFFFFF),
+ (ulint)(offset & 0xFFFFFFFF),
MLOG_4BYTES, mtr);
-
- trx->mysql_log_file_name = NULL;
}
/*********************************************************************
@@ -533,6 +528,58 @@ trx_sys_print_mysql_binlog_offset(void)
mtr_commit(&mtr);
}
+/*********************************************************************
+Prints to stderr the MySQL master log offset info in the trx system header if
+the magic number shows it valid. */
+
+void
+trx_sys_print_mysql_master_log_pos(void)
+/*====================================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+ != TRX_SYS_MYSQL_LOG_MAGIC_N) {
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ fprintf(stderr,
+"InnoDB: In a MySQL replication slave the last master binlog file\n"
+"InnoDB: position %lu %lu, file name %s\n",
+ mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
+ mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME);
+ /* Copy the master log position info to global variables we can
+ use in ha_innobase.cc to initialize glob_mi to right values */
+
+ ut_memcpy(trx_sys_mysql_master_log_name,
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME,
+ TRX_SYS_MYSQL_LOG_NAME_LEN);
+
+ trx_sys_mysql_master_log_pos =
+ (((ib_longlong)mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH))
+ << 32)
+ + (ib_longlong)
+ mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
+ mtr_commit(&mtr);
+}
+
/********************************************************************
Looks for a free slot for a rollback segment in the trx system file copy. */
@@ -660,7 +707,7 @@ trx_sys_init_at_db_start(void)
if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
fprintf(stderr,
- "InnoDB: %lu uncommitted transaction(s) which must be rolled back\n",
+ "InnoDB: %lu transaction(s) which must be rolled back or cleaned up\n",
UT_LIST_GET_LEN(trx_sys->trx_list));
fprintf(stderr, "InnoDB: Trx id counter is %lu %lu\n",
diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
index 87b82cbee3a..8d84967a49d 100644
--- a/innobase/trx/trx0trx.c
+++ b/innobase/trx/trx0trx.c
@@ -26,9 +26,11 @@ Created 3/26/1996 Heikki Tuuri
/* Copy of the prototype for innobase_mysql_print_thd: this
-copy must be equal to the one in mysql/sql/ha_innobase.cc ! */
+copy MUST be equal to the one in mysql/sql/ha_innobase.cc ! */
-void innobase_mysql_print_thd(void* thd);
+void innobase_mysql_print_thd(
+ char* buf,
+ void* thd);
/* Dummy session used currently in MySQL interface */
sess_t* trx_dummy_sess = NULL;
@@ -83,6 +85,8 @@ trx_create(
trx->mysql_log_file_name = NULL;
trx->mysql_log_offset = 0;
+ trx->mysql_master_log_file_name = "";
+ trx->mysql_master_log_pos = 0;
trx->ignore_duplicates_in_insert = FALSE;
@@ -363,16 +367,31 @@ trx_lists_init_at_db_start(void)
trx = trx_create(NULL);
+ trx->id = undo->trx_id;
+
+ trx->insert_undo = undo;
+ trx->rseg = rseg;
+
if (undo->state != TRX_UNDO_ACTIVE) {
trx->conc_state = TRX_COMMITTED_IN_MEMORY;
+
+ /* We give a dummy value for the trx no;
+ this should have no relevance since purge
+ is not interested in committed transaction
+ numbers, unless they are in the history
+ list, in which case it looks the number
+ from the disk based undo log structure */
+
+ trx->no = trx->id;
} else {
trx->conc_state = TRX_ACTIVE;
- }
- trx->id = undo->trx_id;
- trx->insert_undo = undo;
- trx->rseg = rseg;
+ /* A running transaction always has the number
+ field inited to ut_dulint_max */
+
+ trx->no = ut_dulint_max;
+ }
if (undo->dict_operation) {
trx->dict_operation = undo->dict_operation;
@@ -397,14 +416,25 @@ trx_lists_init_at_db_start(void)
if (NULL == trx) {
trx = trx_create(NULL);
+ trx->id = undo->trx_id;
+
if (undo->state != TRX_UNDO_ACTIVE) {
trx->conc_state =
TRX_COMMITTED_IN_MEMORY;
+ /* We give a dummy value for the trx
+ number */
+
+ trx->no = trx->id;
} else {
trx->conc_state = TRX_ACTIVE;
+
+ /* A running transaction always has
+ the number field inited to
+ ut_dulint_max */
+
+ trx->no = ut_dulint_max;
}
- trx->id = undo->trx_id;
trx->rseg = rseg;
trx_list_insert_ordered(trx);
@@ -583,7 +613,7 @@ trx_commit_off_kernel(
if (undo) {
mutex_enter(&kernel_mutex);
#ifdef notdefined
- /* ########## There is a bug here: purge and rollback
+ /* !!!!!!!!! There is a bug here: purge and rollback
need the whole stack of old record versions even if no
consistent read would need them!! This is because they
decide on the basis of the old versions when we can
@@ -627,12 +657,25 @@ trx_commit_off_kernel(
mutex_exit(&(rseg->mutex));
/* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on */
+ in trx sys header if MySQL binlogging is on or the database
+ server is a MySQL replication slave */
if (trx->mysql_log_file_name) {
- trx_sys_update_mysql_binlog_offset(trx, &mtr);
+ trx_sys_update_mysql_binlog_offset(
+ trx->mysql_log_file_name,
+ trx->mysql_log_offset,
+ TRX_SYS_MYSQL_LOG_INFO, &mtr);
+ trx->mysql_log_file_name = NULL;
}
-
+
+ if (trx->mysql_master_log_file_name[0] != '\0') {
+ /* This database server is a MySQL replication slave */
+ trx_sys_update_mysql_binlog_offset(
+ trx->mysql_master_log_file_name,
+ trx->mysql_master_log_pos,
+ TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr);
+ }
+
/* If we did not take the shortcut, the following call
commits the mini-transaction, making the whole transaction
committed in the file-based world at this log sequence number;
@@ -707,12 +750,12 @@ trx_commit_off_kernel(
/*-------------------------------------*/
- /* Most MySQL users run with srv_flush.. set to FALSE: */
+ /* Most MySQL users run with srv_flush_.. set to FALSE: */
if (srv_flush_log_at_trx_commit) {
log_flush_up_to(lsn, LOG_WAIT_ONE_GROUP);
- }
+ }
/*-------------------------------------*/
@@ -730,6 +773,29 @@ trx_commit_off_kernel(
UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
}
+/********************************************************************
+Cleans up a transaction at database startup. The cleanup is needed if
+the transaction already got to the middle of a commit when the database
+crashed, andf we cannot roll it back. */
+
+void
+trx_cleanup_at_db_startup(
+/*======================*/
+ trx_t* trx) /* in: transaction */
+{
+ if (trx->insert_undo != NULL) {
+
+ trx_undo_insert_cleanup(trx);
+ }
+
+ trx->conc_state = TRX_NOT_STARTED;
+ trx->rseg = NULL;
+ trx->undo_no = ut_dulint_zero;
+ trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
+
+ UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
+}
+
/************************************************************************
Assigns a read view for a consistent read query. All the consistent reads
within the same transaction will get the same read view, which is created
@@ -1395,54 +1461,63 @@ own the kernel mutex. */
void
trx_print(
/*======*/
+ char* buf, /* in/out: buffer where to print, must be at least
+ 500 bytes */
trx_t* trx) /* in: transaction */
{
- printf("TRANSACTION %lu %lu, OS thread id %lu",
+ buf += sprintf(buf, "TRANSACTION %lu %lu, OS thread id %lu",
ut_dulint_get_high(trx->id),
ut_dulint_get_low(trx->id),
(ulint)trx->mysql_thread_id);
if (ut_strlen(trx->op_info) > 0) {
- printf(" %s", trx->op_info);
+ buf += sprintf(buf, " %s", trx->op_info);
}
if (trx->type != TRX_USER) {
- printf(" purge trx");
+ buf += sprintf(buf, " purge trx");
}
switch (trx->conc_state) {
- case TRX_NOT_STARTED: printf(", not started"); break;
- case TRX_ACTIVE: printf(", active"); break;
- case TRX_COMMITTED_IN_MEMORY: printf(", committed in memory");
+ case TRX_NOT_STARTED: buf += sprintf(buf,
+ ", not started"); break;
+ case TRX_ACTIVE: buf += sprintf(buf,
+ ", active"); break;
+ case TRX_COMMITTED_IN_MEMORY: buf += sprintf(buf,
+ ", committed in memory");
break;
- default: printf(" state %lu", trx->conc_state);
+ default: buf += sprintf(buf, " state %lu", trx->conc_state);
}
switch (trx->que_state) {
- case TRX_QUE_RUNNING: printf(", runs or sleeps"); break;
- case TRX_QUE_LOCK_WAIT: printf(", lock wait"); break;
- case TRX_QUE_ROLLING_BACK: printf(", rolling back"); break;
- case TRX_QUE_COMMITTING: printf(", committing"); break;
- default: printf(" que state %lu", trx->que_state);
+ case TRX_QUE_RUNNING: buf += sprintf(buf,
+ ", runs or sleeps"); break;
+ case TRX_QUE_LOCK_WAIT: buf += sprintf(buf,
+ ", lock wait"); break;
+ case TRX_QUE_ROLLING_BACK: buf += sprintf(buf,
+ ", rolling back"); break;
+ case TRX_QUE_COMMITTING: buf += sprintf(buf,
+ ", committing"); break;
+ default: buf += sprintf(buf, " que state %lu", trx->que_state);
}
if (0 < UT_LIST_GET_LEN(trx->trx_locks)) {
- printf(", has %lu lock struct(s)",
+ buf += sprintf(buf, ", has %lu lock struct(s)",
UT_LIST_GET_LEN(trx->trx_locks));
}
if (trx->has_search_latch) {
- printf(", holds adaptive hash latch");
+ buf += sprintf(buf, ", holds adaptive hash latch");
}
if (ut_dulint_cmp(trx->undo_no, ut_dulint_zero) != 0) {
- printf(", undo log entries %lu",
+ buf += sprintf(buf, ", undo log entries %lu",
ut_dulint_get_low(trx->undo_no));
}
- printf("\n");
+ buf += sprintf(buf, "\n");
if (trx->mysql_thd != NULL) {
- innobase_mysql_print_thd(trx->mysql_thd);
+ innobase_mysql_print_thd(buf, trx->mysql_thd);
}
}
diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c
index aae31f3726b..6303c5bbcdd 100644
--- a/innobase/trx/trx0undo.c
+++ b/innobase/trx/trx0undo.c
@@ -1147,7 +1147,7 @@ trx_undo_mem_create_at_db_start(
/* If the log segment is being freed, the page list is inconsistent! */
if (state == TRX_UNDO_TO_FREE) {
- return(undo);
+ goto add_to_list;
}
last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr);
@@ -1166,7 +1166,7 @@ trx_undo_mem_create_at_db_start(
undo->top_offset = rec - last_page;
undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
}
-
+add_to_list:
if (type == TRX_UNDO_INSERT) {
if (state != TRX_UNDO_CACHED) {
UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list,
diff --git a/innobase/ut/ut0mem.c b/innobase/ut/ut0mem.c
index a1320e8b5bc..2a7643551ad 100644
--- a/innobase/ut/ut0mem.c
+++ b/innobase/ut/ut0mem.c
@@ -38,6 +38,8 @@ os_fast_mutex_t ut_list_mutex; /* this protects the list */
ibool ut_mem_block_list_inited = FALSE;
+ulint* ut_mem_null_ptr = NULL;
+
/**************************************************************************
Initializes the mem block list at database startup. */
static
@@ -83,12 +85,16 @@ ut_malloc_low(
"InnoDB: Check if you should increase the swap file or\n"
"InnoDB: ulimits of your operating system.\n"
"InnoDB: On FreeBSD check you have compiled the OS with\n"
- "InnoDB: a big enough maximum process size.\n",
+ "InnoDB: a big enough maximum process size.\n"
+ "InnoDB: We now intentionally generate a seg fault so that\n"
+ "InnoDB: on Linux we get a stack trace.\n",
n, ut_total_allocated_memory, errno);
os_fast_mutex_unlock(&ut_list_mutex);
- exit(1);
+ /* Make an intentional seg fault so that we get a stack
+ trace */
+ printf("%lu\n", *ut_mem_null_ptr);
}
if (set_to_zero) {
diff --git a/innobase/ut/ut0ut.c b/innobase/ut/ut0ut.c
index cd21491dcf1..c164afa0573 100644
--- a/innobase/ut/ut0ut.c
+++ b/innobase/ut/ut0ut.c
@@ -111,6 +111,49 @@ ut_print_timestamp(
}
/**************************************************************
+Sprintfs a timestamp to a buffer. */
+
+void
+ut_sprintf_timestamp(
+/*=================*/
+ char* buf) /* in: buffer where to sprintf */
+{
+#ifdef __WIN__
+ SYSTEMTIME cal_tm;
+
+ GetLocalTime(&cal_tm);
+
+ sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
+ (int)cal_tm.wYear % 100,
+ (int)cal_tm.wMonth,
+ (int)cal_tm.wDay,
+ (int)cal_tm.wHour,
+ (int)cal_tm.wMinute,
+ (int)cal_tm.wSecond);
+#else
+ struct tm cal_tm;
+ struct tm* cal_tm_ptr;
+ time_t tm;
+
+ time(&tm);
+
+#ifdef HAVE_LOCALTIME_R
+ localtime_r(&tm, &cal_tm);
+ cal_tm_ptr = &cal_tm;
+#else
+ cal_tm_ptr = localtime(&tm);
+#endif
+ sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
+ cal_tm_ptr->tm_year % 100,
+ cal_tm_ptr->tm_mon + 1,
+ cal_tm_ptr->tm_mday,
+ cal_tm_ptr->tm_hour,
+ cal_tm_ptr->tm_min,
+ cal_tm_ptr->tm_sec);
+#endif
+}
+
+/**************************************************************
Returns current year, month, day. */
void
@@ -258,3 +301,26 @@ ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high)
UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high,
ut_ulint_cmp);
}
+
+/*****************************************************************
+Calculates fast the number rounded up to the nearest power of 2. */
+
+ulint
+ut_2_power_up(
+/*==========*/
+ /* out: first power of 2 which is >= n */
+ ulint n) /* in: number != 0 */
+{
+ ulint res;
+
+ res = 1;
+
+ ut_ad(n > 0);
+
+ while (res < n) {
+ res = res * 2;
+ }
+
+ return(res);
+}
+