summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <heikki@hundin.mysql.fi>2002-06-22 20:41:14 +0300
committerunknown <heikki@hundin.mysql.fi>2002-06-22 20:41:14 +0300
commit1081513a12851d432103eaabcb6533c84062196c (patch)
treecd94be28879d52101bd7438dc97dc04856d81739
parentb7b988b3c36fa7a1f0a284d6478bbccbe706c09f (diff)
downloadmariadb-git-1081513a12851d432103eaabcb6533c84062196c.tar.gz
Many files:
Merge 3.23.52 innobase/btr/btr0btr.c: Merge 3.23.52 innobase/btr/btr0cur.c: Merge 3.23.52 innobase/btr/btr0sea.c: Merge 3.23.52 innobase/include/btr0btr.h: Merge 3.23.52 innobase/include/btr0cur.h: Merge 3.23.52 innobase/include/btr0sea.h: Merge 3.23.52 innobase/include/buf0buf.h: Merge 3.23.52 innobase/include/buf0rea.h: Merge 3.23.52 innobase/include/data0data.h: Merge 3.23.52 innobase/include/data0data.ic: Merge 3.23.52 innobase/include/log0log.h: Merge 3.23.52 innobase/include/log0log.ic: Merge 3.23.52 innobase/include/os0file.h: Merge 3.23.52 innobase/include/page0page.h: Merge 3.23.52 innobase/include/page0page.ic: Merge 3.23.52 innobase/include/row0mysql.h: Merge 3.23.52 innobase/include/trx0roll.h: Merge 3.23.52 innobase/include/trx0sys.h: Merge 3.23.52 innobase/include/trx0trx.h: Merge 3.23.52 innobase/include/ut0ut.h: Merge 3.23.52 innobase/include/univ.i: Merge 3.23.52 innobase/include/ut0ut.ic: Merge 3.23.52 innobase/buf/buf0buf.c: Merge 3.23.52 innobase/buf/buf0rea.c: Merge 3.23.52 innobase/data/data0data.c: Merge 3.23.52 innobase/dict/dict0crea.c: Merge 3.23.52 innobase/dict/dict0dict.c: Merge 3.23.52 innobase/dict/dict0load.c: Merge 3.23.52 innobase/dict/dict0mem.c: Merge 3.23.52 innobase/fsp/fsp0fsp.c: Merge 3.23.52 innobase/ibuf/ibuf0ibuf.c: Merge 3.23.52 innobase/lock/lock0lock.c: Merge 3.23.52 innobase/log/log0log.c: Merge 3.23.52 innobase/log/log0recv.c: Merge 3.23.52 innobase/mtr/mtr0log.c: Merge 3.23.52 innobase/mtr/mtr0mtr.c: Merge 3.23.52 innobase/os/os0file.c: Merge 3.23.52 innobase/page/page0cur.c: Merge 3.23.52 innobase/page/page0page.c: Merge 3.23.52 innobase/rem/rem0cmp.c: Merge 3.23.52 innobase/row/row0ins.c: Merge 3.23.52 innobase/row/row0mysql.c: Merge 3.23.52 innobase/row/row0purge.c: Merge 3.23.52 innobase/row/row0upd.c: Merge 3.23.52 innobase/srv/srv0srv.c: Merge 3.23.52 innobase/srv/srv0start.c: Merge 3.23.52 innobase/trx/trx0roll.c: Merge 3.23.52 innobase/trx/trx0sys.c: Merge 3.23.52 innobase/trx/trx0trx.c: Merge 3.23.52 innobase/trx/trx0undo.c: Merge 3.23.52 innobase/ut/ut0mem.c: Merge 3.23.52 innobase/ut/ut0ut.c: Merge 3.23.52
-rw-r--r--innobase/btr/btr0btr.c27
-rw-r--r--innobase/btr/btr0cur.c85
-rw-r--r--innobase/btr/btr0sea.c79
-rw-r--r--innobase/buf/buf0buf.c42
-rw-r--r--innobase/buf/buf0rea.c29
-rw-r--r--innobase/data/data0data.c115
-rw-r--r--innobase/dict/dict0crea.c1
-rw-r--r--innobase/dict/dict0dict.c14
-rw-r--r--innobase/dict/dict0load.c701
-rw-r--r--innobase/dict/dict0mem.c3
-rw-r--r--innobase/fsp/fsp0fsp.c22
-rw-r--r--innobase/ibuf/ibuf0ibuf.c47
-rw-r--r--innobase/include/btr0btr.h10
-rw-r--r--innobase/include/btr0cur.h1
-rw-r--r--innobase/include/btr0sea.h3
-rw-r--r--innobase/include/buf0buf.h13
-rw-r--r--innobase/include/buf0rea.h2
-rw-r--r--innobase/include/data0data.h10
-rw-r--r--innobase/include/data0data.ic24
-rw-r--r--innobase/include/log0log.h50
-rw-r--r--innobase/include/log0log.ic29
-rw-r--r--innobase/include/os0file.h12
-rw-r--r--innobase/include/page0page.h2
-rw-r--r--innobase/include/page0page.ic42
-rw-r--r--innobase/include/row0mysql.h13
-rw-r--r--innobase/include/trx0roll.h8
-rw-r--r--innobase/include/trx0sys.h52
-rw-r--r--innobase/include/trx0trx.h24
-rw-r--r--innobase/include/univ.i40
-rw-r--r--innobase/include/ut0ut.h9
-rw-r--r--innobase/include/ut0ut.ic22
-rw-r--r--innobase/lock/lock0lock.c20
-rw-r--r--innobase/log/log0log.c207
-rw-r--r--innobase/log/log0recv.c147
-rw-r--r--innobase/mtr/mtr0log.c6
-rw-r--r--innobase/mtr/mtr0mtr.c2
-rw-r--r--innobase/os/os0file.c110
-rw-r--r--innobase/page/page0cur.c9
-rw-r--r--innobase/page/page0page.c60
-rw-r--r--innobase/rem/rem0cmp.c13
-rw-r--r--innobase/row/row0ins.c24
-rw-r--r--innobase/row/row0mysql.c70
-rw-r--r--innobase/row/row0purge.c8
-rw-r--r--innobase/row/row0upd.c23
-rw-r--r--innobase/srv/srv0srv.c72
-rw-r--r--innobase/srv/srv0start.c22
-rw-r--r--innobase/trx/trx0roll.c29
-rw-r--r--innobase/trx/trx0sys.c121
-rw-r--r--innobase/trx/trx0trx.c86
-rw-r--r--innobase/trx/trx0undo.c4
-rw-r--r--innobase/ut/ut0mem.c10
-rw-r--r--innobase/ut/ut0ut.c66
52 files changed, 1879 insertions, 761 deletions
diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c
index b9253562fe6..88472d6dbe0 100644
--- a/innobase/btr/btr0btr.c
+++ b/innobase/btr/btr0btr.c
@@ -572,6 +572,13 @@ btr_page_get_father_for_rec(
if (btr_node_ptr_get_child_page_no(node_ptr) !=
buf_frame_get_page_no(page)) {
+ fprintf(stderr,
+"InnoDB: Dump of the child page:\n");
+ buf_page_print(buf_frame_align(page));
+ fprintf(stderr,
+"InnoDB: Dump of the parent page:\n");
+ buf_page_print(buf_frame_align(node_ptr));
+
fprintf(stderr,
"InnoDB: Corruption of an index tree: table %s, index %s,\n"
"InnoDB: father ptr page no %lu, child page no %lu\n",
@@ -581,6 +588,12 @@ btr_page_get_father_for_rec(
buf_frame_get_page_no(page));
page_rec_print(page_rec_get_next(page_get_infimum_rec(page)));
page_rec_print(node_ptr);
+
+ fprintf(stderr,
+"InnoDB: You should dump + drop + reimport the table to fix the\n"
+"InnoDB: corruption. If the crash happens at the database startup, see\n"
+"InnoDB: section 6.1 of http://www.innodb.com/ibman.html about forcing\n"
+"InnoDB: recovery. Then dump + drop + reimport.\n");
}
ut_a(btr_node_ptr_get_child_page_no(node_ptr) ==
@@ -780,12 +793,14 @@ top_loop:
/*****************************************************************
Reorganizes an index page. */
-
+static
void
btr_page_reorganize_low(
/*====================*/
- ibool low, /* in: TRUE if locks should not be updated, i.e.,
- there cannot exist locks on the page */
+ ibool recovery,/* in: TRUE if called in recovery: locks should not
+ be updated, i.e., there cannot exist locks on the
+ page, and a hash index should not be dropped: it
+ cannot exist */
page_t* page, /* in: page to be reorganized */
mtr_t* mtr) /* in: mtr */
{
@@ -805,7 +820,9 @@ btr_page_reorganize_low(
/* Copy the old page to temporary space */
buf_frame_copy(new_page, page);
- btr_search_drop_page_hash_index(page);
+ if (!recovery) {
+ btr_search_drop_page_hash_index(page);
+ }
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
@@ -820,7 +837,7 @@ btr_page_reorganize_low(
/* Copy max trx id to recreated page */
page_set_max_trx_id(page, page_get_max_trx_id(new_page));
- if (!low) {
+ if (!recovery) {
/* Update the record lock bitmaps */
lock_move_reorganize_page(page, new_page);
}
diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
index daebb8dbbaf..67b74967e8d 100644
--- a/innobase/btr/btr0cur.c
+++ b/innobase/btr/btr0cur.c
@@ -36,9 +36,14 @@ Created 10/16/1994 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "lock0lock.h"
+/* If the following is set to TRUE, this module prints a lot of
+trace information of individual record operations */
+ibool btr_cur_print_record_ops = FALSE;
+
ulint btr_cur_rnd = 0;
ulint btr_cur_n_non_sea = 0;
+ulint btr_cur_n_sea = 0;
/* In the optimistic insert, if the insert does not fit, but this much space
can be released by page reorganize, then it is reorganized */
@@ -187,11 +192,7 @@ btr_cur_search_to_nth_level(
tuple must be set so that it cannot get
compared to the node ptr page number field! */
ulint mode, /* in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page relative to the
- record! Inserts should always be made using
+ Inserts should always be made using
PAGE_CUR_LE to search the position! */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
BTR_INSERT and BTR_ESTIMATE;
@@ -268,7 +269,7 @@ btr_cur_search_to_nth_level(
#ifdef UNIV_SEARCH_PERF_STAT
info->n_searches++;
#endif
- if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED
+ if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& !estimate
&& btr_search_guess_on_hash(index, info, tuple, mode,
@@ -283,14 +284,14 @@ btr_cur_search_to_nth_level(
|| mode != PAGE_CUR_LE);
ut_ad(cursor->low_match != ULINT_UNDEFINED
|| mode != PAGE_CUR_LE);
+ btr_cur_n_sea++;
+
return;
}
#endif
#endif
-
-#ifdef UNIV_SEARCH_PERF_STAT
btr_cur_n_non_sea++;
-#endif
+
/* If the hash search did not succeed, do binary search down the
tree */
@@ -796,15 +797,28 @@ btr_cur_optimistic_insert(
ulint data_size;
ulint extra_size;
ulint type;
- ulint err;
-
- ut_ad(dtuple_check_typed(entry));
+ ulint err;
*big_rec = NULL;
page = btr_cur_get_page(cursor);
index = cursor->index;
+ if (!dtuple_check_typed_no_assert(entry)) {
+ fprintf(stderr,
+"InnoDB: Error in a tuple to insert into table %lu index %lu\n",
+ index->table_name, index->name);
+ }
+
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to insert to table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ dtuple_print(entry);
+ }
+
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
max_size = page_get_max_insert_size_after_reorganize(page, 1);
@@ -928,7 +942,7 @@ calculate_sizes_again:
buf_frame_get_page_no(page), max_size,
rec_size + PAGE_DIR_SLOT_SIZE, type);
*/
- if (!(type & (DICT_CLUSTERED | DICT_UNIQUE))) {
+ if (!(type & DICT_CLUSTERED)) {
/* We have added a record to page: update its free bits */
ibuf_update_free_bits_if_full(cursor->index, page, max_size,
rec_size + PAGE_DIR_SLOT_SIZE);
@@ -1258,6 +1272,15 @@ btr_cur_update_sec_rec_in_place(
rec = btr_cur_get_rec(cursor);
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to update table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ rec_print(rec);
+ }
+
err = lock_sec_rec_modify_check_and_lock(0, rec, index, thr);
if (err != DB_SUCCESS) {
@@ -1312,6 +1335,15 @@ btr_cur_update_in_place(
index = cursor->index;
trx = thr_get_trx(thr);
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to update table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ rec_print(rec);
+ }
+
/* Do lock checking and undo logging */
err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
thr, &roll_ptr);
@@ -1398,6 +1430,15 @@ btr_cur_optimistic_update(
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to update table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ rec_print(rec);
+ }
+
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
if (!row_upd_changes_field_size(rec, index, update)) {
@@ -1973,6 +2014,15 @@ btr_cur_del_mark_set_clust_rec(
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to del mark table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ index->table_name, index->name);
+ rec_print(rec);
+ }
+
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(rec_get_deleted_flag(rec) == FALSE);
@@ -2102,6 +2152,15 @@ btr_cur_del_mark_set_sec_rec(
rec = btr_cur_get_rec(cursor);
+ if (btr_cur_print_record_ops && thr) {
+ printf(
+ "Trx with id %lu %lu going to del mark table %s index %s\n",
+ ut_dulint_get_high(thr_get_trx(thr)->id),
+ ut_dulint_get_low(thr_get_trx(thr)->id),
+ cursor->index->table_name, cursor->index->name);
+ rec_print(rec);
+ }
+
err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index,
thr);
if (err != DB_SUCCESS) {
diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c
index aac86f45ec9..5e1c8401e28 100644
--- a/innobase/btr/btr0sea.c
+++ b/innobase/btr/btr0sea.c
@@ -15,6 +15,7 @@ Created 2/17/1996 Heikki Tuuri
#include "page0page.h"
#include "page0cur.h"
#include "btr0cur.h"
+#include "btr0pcur.h"
#include "btr0btr.h"
ulint btr_search_n_succ = 0;
@@ -145,6 +146,8 @@ btr_search_info_create(
info = mem_heap_alloc(heap, sizeof(btr_search_t));
+ info->magic_n = BTR_SEARCH_MAGIC_N;
+
info->last_search = NULL;
info->n_direction = 0;
info->root_guess = NULL;
@@ -159,6 +162,12 @@ btr_search_info_create(
info->n_patt_succ = 0;
info->n_searches = 0;
+ /* Set some sensible values */
+ info->n_fields = 1;
+ info->n_bytes = 0;
+
+ info->side = BTR_SEARCH_LEFT_SIDE;
+
return(info);
}
@@ -197,7 +206,7 @@ btr_search_info_update_hash(
/* Test if the search would have succeeded using the recommended
hash prefix */
- if ((info->n_fields >= n_unique) && (cursor->up_match >= n_unique)) {
+ if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
info->n_hash_potential++;
@@ -207,8 +216,8 @@ btr_search_info_update_hash(
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
cursor->low_match, cursor->low_bytes);
- if (((info->side == BTR_SEARCH_LEFT_SIDE) && (cmp <= 0))
- || ((info->side == BTR_SEARCH_RIGHT_SIDE) && (cmp > 0))) {
+ if ((info->side == BTR_SEARCH_LEFT_SIDE && cmp <= 0)
+ || (info->side == BTR_SEARCH_RIGHT_SIDE && cmp > 0)) {
goto set_new_recomm;
}
@@ -216,8 +225,8 @@ btr_search_info_update_hash(
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
cursor->up_match, cursor->up_bytes);
- if (((info->side == BTR_SEARCH_LEFT_SIDE) && (cmp > 0))
- || ((info->side == BTR_SEARCH_RIGHT_SIDE) && (cmp <= 0))) {
+ if ((info->side == BTR_SEARCH_LEFT_SIDE && cmp > 0)
+ || (info->side == BTR_SEARCH_RIGHT_SIDE && cmp <= 0)) {
goto set_new_recomm;
}
@@ -233,19 +242,18 @@ set_new_recomm:
info->hash_analysis = 0;
- if ((cursor->up_match >= n_unique)
- || (cursor->low_match >= n_unique)) {
- info->n_fields = n_unique;
- info->n_bytes = 0;
-
- info->side = BTR_SEARCH_LEFT_SIDE;
- }
-
cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
cursor->low_match, cursor->low_bytes);
if (cmp == 0) {
info->n_hash_potential = 0;
+ /* For extra safety, we set some sensible values here */
+
+ info->n_fields = 1;
+ info->n_bytes = 0;
+
+ info->side = BTR_SEARCH_LEFT_SIDE;
+
} else if (cmp > 0) {
info->n_hash_potential = 1;
@@ -305,6 +313,9 @@ btr_search_update_block_hash_info(
info->last_hash_succ = FALSE;
+ ut_a(block->magic_n == BUF_BLOCK_MAGIC_N);
+ ut_a(info->magic_n == BTR_SEARCH_MAGIC_N);
+
if ((block->n_hash_helps > 0)
&& (info->n_hash_potential > 0)
&& (block->n_fields == info->n_fields)
@@ -622,6 +633,7 @@ btr_search_guess_on_hash(
dulint tree_id;
#ifdef notdefined
btr_cur_t cursor2;
+ btr_pcur_t pcur;
#endif
ut_ad(index && info && tuple && cursor && mtr);
ut_ad((latch_mode == BTR_SEARCH_LEAF)
@@ -754,7 +766,26 @@ btr_search_guess_on_hash(
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
&cursor2, 0, mtr);
- ut_a(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
+ if (mode == PAGE_CUR_GE
+ && btr_cur_get_rec(&cursor2) == page_get_supremum_rec(
+ buf_frame_align(btr_cur_get_rec(&cursor2)))) {
+
+ /* If mode is PAGE_CUR_GE, then the binary search
+ in the index tree may actually take us to the supremum
+ of the previous page */
+
+ info->last_hash_succ = FALSE;
+
+ btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
+ &pcur, mtr);
+ ut_a(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
+ } else {
+ ut_a(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
+ }
+
+ /* NOTE that it is theoretically possible that the above assertions
+ fail if the page of the cursor gets removed from the buffer pool
+ meanwhile! Thus it might not be a bug. */
info->last_hash_succ = TRUE;
#endif
@@ -835,6 +866,8 @@ btr_search_drop_page_hash_index(
n_fields = block->curr_n_fields;
n_bytes = block->curr_n_bytes;
+ ut_a(n_fields + n_bytes > 0);
+
rw_lock_s_unlock(&btr_search_latch);
n_recs = page_get_n_recs(page);
@@ -851,6 +884,14 @@ btr_search_drop_page_hash_index(
rec = page_get_infimum_rec(page);
rec = page_rec_get_next(rec);
+ if (rec != sup) {
+ ut_a(n_fields <= rec_get_n_fields(rec));
+
+ if (n_bytes > 0) {
+ ut_a(n_fields < rec_get_n_fields(rec));
+ }
+ }
+
tree_id = btr_page_get_index_id(page);
prev_fold = 0;
@@ -980,6 +1021,8 @@ btr_search_build_page_hash_index(
return;
}
+ ut_a(n_fields + n_bytes > 0);
+
/* Calculate and cache fold values and corresponding records into
an array for fast insertion to the hash index */
@@ -995,6 +1038,14 @@ btr_search_build_page_hash_index(
rec = page_get_infimum_rec(page);
rec = page_rec_get_next(rec);
+ if (rec != sup) {
+ ut_a(n_fields <= rec_get_n_fields(rec));
+
+ if (n_bytes > 0) {
+ ut_a(n_fields < rec_get_n_fields(rec));
+ }
+ }
+
/* FIXME: in a mixed tree, all records may not have enough ordering
fields: */
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
index f1a2d915d46..7d001a6953d 100644
--- a/innobase/buf/buf0buf.c
+++ b/innobase/buf/buf0buf.c
@@ -1126,12 +1126,50 @@ buf_page_get_known_nowait(
}
/************************************************************************
+Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
+
+void
+buf_page_init_for_backup_restore(
+/*=============================*/
+ ulint space, /* in: space id */
+ ulint offset, /* in: offset of the page within space
+ in units of a page */
+ buf_block_t* block) /* in: block to init */
+{
+ /* Set the state of the block */
+ block->magic_n = BUF_BLOCK_MAGIC_N;
+
+ block->state = BUF_BLOCK_FILE_PAGE;
+ block->space = space;
+ block->offset = offset;
+
+ block->lock_hash_val = 0;
+ block->lock_mutex = NULL;
+
+ block->freed_page_clock = 0;
+
+ block->newest_modification = ut_dulint_zero;
+ block->oldest_modification = ut_dulint_zero;
+
+ block->accessed = FALSE;
+ block->buf_fix_count = 0;
+ block->io_fix = 0;
+
+ block->n_hash_helps = 0;
+ block->is_hashed = FALSE;
+ block->n_fields = 1;
+ block->n_bytes = 0;
+ block->side = BTR_SEARCH_LEFT_SIDE;
+
+ block->file_page_was_freed = FALSE;
+}
+
+/************************************************************************
Inits a page to the buffer buf_pool. */
static
void
buf_page_init(
/*==========*/
- /* out: pointer to the block */
ulint space, /* in: space id */
ulint offset, /* in: offset of the page within space
in units of a page */
@@ -1141,6 +1179,8 @@ buf_page_init(
ut_ad(block->state == BUF_BLOCK_READY_FOR_USE);
/* Set the state of the block */
+ block->magic_n = BUF_BLOCK_MAGIC_N;
+
block->state = BUF_BLOCK_FILE_PAGE;
block->space = space;
block->offset = offset;
diff --git a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c
index db187cdd896..475a5bd9cbd 100644
--- a/innobase/buf/buf0rea.c
+++ b/innobase/buf/buf0rea.c
@@ -100,6 +100,11 @@ buf_read_page_low(
block = buf_page_init_for_read(mode, space, offset);
if (block != NULL) {
+ if (buf_debug_prints) {
+ printf("Posting read request for page %lu, sync %lu\n",
+ offset, sync);
+ }
+
fil_io(OS_FILE_READ | wake_later,
sync, space, offset, 0, UNIV_PAGE_SIZE,
(void*)block->frame, (void*)block);
@@ -467,6 +472,12 @@ buf_read_ahead_linear(
count = 0;
+ /* Since Windows XP seems to schedule the i/o handler thread
+ very eagerly, and consequently it does not wait for the
+ full read batch to be posted, we use special heuristics here */
+
+ os_aio_simulated_put_read_threads_to_sleep();
+
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync
aio mode: hence FALSE as the first parameter */
@@ -556,16 +567,34 @@ buf_read_recv_pages(
highest page number the last in the array */
ulint n_stored) /* in: number of page numbers in the array */
{
+ ulint count;
ulint i;
for (i = 0; i < n_stored; i++) {
+ count = 0;
+
+ os_aio_print_debug = FALSE;
+
while (buf_pool->n_pend_reads >= RECV_POOL_N_FREE_BLOCKS / 2) {
os_aio_simulated_wake_handler_threads();
os_thread_sleep(500000);
+
+ count++;
+
+ if (count > 100) {
+ fprintf(stderr,
+"InnoDB: Error: InnoDB has waited for 50 seconds for pending\n"
+"InnoDB: reads to the buffer pool to be finished.\n"
+"InnoDB: Number of pending reads %lu\n", buf_pool->n_pend_reads);
+
+ os_aio_print_debug = TRUE;
+ }
}
+ os_aio_print_debug = FALSE;
+
if ((i + 1 == n_stored) && sync) {
buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space,
page_nos[i]);
diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c
index 2254dcb6ae6..61a02f7efd0 100644
--- a/innobase/data/data0data.c
+++ b/innobase/data/data0data.c
@@ -64,6 +64,35 @@ dtuple_get_nth_field_noninline(
return(dtuple_get_nth_field(tuple, n));
}
+/*************************************************************************
+Tests if dfield data length and content is equal to the given. */
+
+ibool
+dfield_data_is_binary_equal(
+/*========================*/
+ /* out: TRUE if equal */
+ dfield_t* field, /* in: field */
+ ulint len, /* in: data length or UNIV_SQL_NULL */
+ byte* data) /* in: data */
+{
+ if (len != field->len) {
+
+ return(FALSE);
+ }
+
+ if (len == UNIV_SQL_NULL) {
+
+ return(TRUE);
+ }
+
+ if (0 != ut_memcmp(field->data, data, len)) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
/****************************************************************
Returns TRUE if lengths of two dtuples are equal and respective data fields
in them are equal when compared with collation in char fields (not as binary
@@ -154,6 +183,69 @@ dtuple_set_n_fields(
}
/**************************************************************
+Checks that a data field is typed. */
+static
+ibool
+dfield_check_typed_no_assert(
+/*=========================*/
+ /* out: TRUE if ok */
+ dfield_t* field) /* in: data field */
+{
+ if (dfield_get_type(field)->mtype > DATA_MYSQL
+ || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+
+ fprintf(stderr,
+"InnoDB: Error: data field type %lu, len %lu\n",
+ dfield_get_type(field)->mtype, dfield_get_len(field));
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/**************************************************************
+Checks that a data tuple is typed. */
+
+ibool
+dtuple_check_typed_no_assert(
+/*=========================*/
+ /* out: TRUE if ok */
+ dtuple_t* tuple) /* in: tuple */
+{
+ dfield_t* field;
+ ulint i;
+ char err_buf[1000];
+
+ if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
+ fprintf(stderr,
+"InnoDB: Error: index entry has %lu fields\n",
+ dtuple_get_n_fields(tuple));
+
+ dtuple_sprintf(err_buf, 900, tuple);
+ fprintf(stderr,
+"InnoDB: Tuple contents: %s\n", err_buf);
+
+ return(FALSE);
+ }
+
+ for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
+
+ field = dtuple_get_nth_field(tuple, i);
+
+ if (!dfield_check_typed_no_assert(field)) {
+
+ dtuple_sprintf(err_buf, 900, tuple);
+ fprintf(stderr,
+"InnoDB: Tuple contents: %s\n", err_buf);
+
+ return(FALSE);
+ }
+ }
+
+ return(TRUE);
+}
+
+/**************************************************************
Checks that a data field is typed. Asserts an error if not. */
ibool
@@ -162,8 +254,15 @@ dfield_check_typed(
/* out: TRUE if ok */
dfield_t* field) /* in: data field */
{
- ut_a(dfield_get_type(field)->mtype <= DATA_MYSQL);
- ut_a(dfield_get_type(field)->mtype >= DATA_VARCHAR);
+ if (dfield_get_type(field)->mtype > DATA_MYSQL
+ || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+
+ fprintf(stderr,
+"InnoDB: Error: data field type %lu, len %lu\n",
+ dfield_get_type(field)->mtype, dfield_get_len(field));
+
+ ut_a(0);
+ }
return(TRUE);
}
@@ -460,9 +559,21 @@ dtuple_convert_big_rec(
ibool is_externally_stored;
ulint i;
ulint j;
+ char err_buf[1000];
+ ut_a(dtuple_check_typed_no_assert(entry));
+
size = rec_get_converted_size(entry);
+ if (size > 1000000000) {
+ fprintf(stderr,
+"InnoDB: Warning: tuple size very big: %lu\n", size);
+
+ dtuple_sprintf(err_buf, 900, entry);
+ fprintf(stderr,
+"InnoDB: Tuple contents: %s\n", err_buf);
+ }
+
heap = mem_heap_create(size + dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t) + 1000);
diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c
index 38c09c1011d..d981dc59036 100644
--- a/innobase/dict/dict0crea.c
+++ b/innobase/dict/dict0crea.c
@@ -153,6 +153,7 @@ dict_create_sys_tables_tuple(
if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
dfield_set_data(dfield, table->cluster_name,
ut_strlen(table->cluster_name));
+ ut_a(0); /* Oracle-style clusters are not supported yet */
} else {
dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
}
diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c
index a6f268c2153..7c166ecd068 100644
--- a/innobase/dict/dict0dict.c
+++ b/innobase/dict/dict0dict.c
@@ -2805,6 +2805,12 @@ dict_update_statistics_low(
index = dict_table_get_first_index(table);
+ if (index == NULL) {
+ /* Table definition is corrupt */
+
+ return;
+ }
+
while (index) {
size = btr_get_size(index, BTR_TOTAL_SIZE);
@@ -3196,6 +3202,14 @@ dict_print_info_on_foreign_keys(
buf2 += sprintf(buf2, ")");
+ if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) {
+ buf2 += sprintf(buf2, " ON DELETE CASCADE");
+ }
+
+ if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) {
+ buf2 += sprintf(buf2, " ON DELETE SET NULL");
+ }
+
foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
}
diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c
index 4917359c748..6d48ddf4d95 100644
--- a/innobase/dict/dict0load.c
+++ b/innobase/dict/dict0load.c
@@ -21,33 +21,6 @@ Created 4/24/1996 Heikki Tuuri
#include "dict0boot.h"
/************************************************************************
-Loads definitions for table columns. */
-static
-void
-dict_load_columns(
-/*==============*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap); /* in: memory heap for temporary storage */
-/************************************************************************
-Loads definitions for table indexes. */
-static
-void
-dict_load_indexes(
-/*==============*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap); /* in: memory heap for temporary storage */
-/************************************************************************
-Loads definitions for index fields. */
-static
-void
-dict_load_fields(
-/*=============*/
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index whose fields to load */
- mem_heap_t* heap); /* in: memory heap for temporary storage */
-
-
-/************************************************************************
Finds the first table name in the given database. */
char*
@@ -194,7 +167,12 @@ loop:
fprintf(stderr, "InnoDB: Failed to load table %s\n",
table_name);
} else {
- dict_update_statistics_low(table, TRUE);
+ /* The table definition was corrupt if there
+ is no index */
+
+ if (dict_table_get_first_index(table)) {
+ dict_update_statistics_low(table, TRUE);
+ }
dict_table_print_low(table);
}
@@ -208,266 +186,6 @@ loop:
}
/************************************************************************
-Loads a table definition and also all its index definitions, and also
-the cluster definition if the table is a member in a cluster. Also loads
-all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table. Adds all these to the data
-dictionary cache. */
-
-dict_table_t*
-dict_load_table(
-/*============*/
- /* out: table, NULL if does not exist */
- char* name) /* in: table name */
-{
- dict_table_t* table;
- dict_table_t* sys_tables;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap;
- dfield_t* dfield;
- rec_t* rec;
- byte* field;
- ulint len;
- char* buf;
- ulint space;
- ulint n_cols;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(1000);
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, name, ut_strlen(name));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
- || rec_get_deleted_flag(rec)) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- field = rec_get_nth_field(rec, 0, &len);
-
- /* Check if the table name in record is the searched one */
- if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) {
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- ut_a(0 == ut_strcmp("SPACE",
- dict_field_get_col(
- dict_index_get_nth_field(
- dict_table_get_first_index(sys_tables), 9))->name));
-
- field = rec_get_nth_field(rec, 9, &len);
- space = mach_read_from_4(field);
-
- ut_a(0 == ut_strcmp("N_COLS",
- dict_field_get_col(
- dict_index_get_nth_field(
- dict_table_get_first_index(sys_tables), 4))->name));
-
- field = rec_get_nth_field(rec, 4, &len);
- n_cols = mach_read_from_4(field);
-
- table = dict_mem_table_create(name, space, n_cols);
-
- ut_a(0 == ut_strcmp("ID",
- dict_field_get_col(
- dict_index_get_nth_field(
- dict_table_get_first_index(sys_tables), 3))->name));
-
- field = rec_get_nth_field(rec, 3, &len);
- table->id = mach_read_from_8(field);
-
- field = rec_get_nth_field(rec, 5, &len);
- table->type = mach_read_from_4(field);
-
- if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
- ut_a(0);
-
- field = rec_get_nth_field(rec, 6, &len);
- table->mix_id = mach_read_from_8(field);
-
- field = rec_get_nth_field(rec, 8, &len);
- buf = mem_heap_alloc(heap, len);
- ut_memcpy(buf, field, len);
-
- table->cluster_name = buf;
- }
-
- if ((table->type == DICT_TABLE_CLUSTER)
- || (table->type == DICT_TABLE_CLUSTER_MEMBER)) {
-
- field = rec_get_nth_field(rec, 7, &len);
- table->mix_len = mach_read_from_4(field);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
- /* Load the cluster table definition if not yet in
- memory cache */
- dict_table_get_low(table->cluster_name);
- }
-
- dict_load_columns(table, heap);
-
- dict_table_add_to_cache(table);
-
- dict_load_indexes(table, heap);
-
- ut_a(DB_SUCCESS == dict_load_foreigns(table->name));
-
- mem_heap_free(heap);
-
- return(table);
-}
-
-/***************************************************************************
-Loads a table object based on the table id. */
-
-dict_table_t*
-dict_load_table_on_id(
-/*==================*/
- /* out: table; NULL if table does not exist */
- dulint table_id) /* in: table id */
-{
- byte id_buf[8];
- btr_pcur_t pcur;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sys_table_ids;
- dict_table_t* sys_tables;
- rec_t* rec;
- byte* field;
- ulint len;
- dict_table_t* table;
- char* name;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* NOTE that the operation of this function is protected by
- the dictionary mutex, and therefore no deadlocks can occur
- with other dictionary operations. */
-
- mtr_start(&mtr);
- /*---------------------------------------------------*/
- /* Get the secondary index based on ID for table SYS_TABLES */
- sys_tables = dict_sys->sys_tables;
- sys_table_ids = dict_table_get_next_index(
- dict_table_get_first_index(sys_tables));
- heap = mem_heap_create(256);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- /* Write the table id in byte format to id_buf */
- mach_write_to_8(id_buf, table_id);
-
- dfield_set_data(dfield, id_buf, 8);
- dict_index_copy_types(tuple, sys_table_ids, 1);
-
- btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
- || rec_get_deleted_flag(rec)) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- /*---------------------------------------------------*/
- /* Now we have the record in the secondary index containing the
- table ID and NAME */
-
- rec = btr_pcur_get_rec(&pcur);
- field = rec_get_nth_field(rec, 0, &len);
- ut_ad(len == 8);
-
- /* Check if the table id in record is the one searched for */
- if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) {
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- /* Now we get the table name from the record */
- field = rec_get_nth_field(rec, 1, &len);
-
- name = mem_heap_alloc(heap, len + 1);
- ut_memcpy(name, field, len);
- name[len] = '\0';
-
- /* Load the table definition to memory */
- table = dict_load_table(name);
-
- ut_a(table);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(table);
-}
-
-/************************************************************************
-This function is called when the database is booted. Loads system table
-index definitions except for the clustered index which is added to the
-dictionary cache at booting before calling this function. */
-
-void
-dict_load_sys_table(
-/*================*/
- dict_table_t* table) /* in: system table */
-{
- mem_heap_t* heap;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(1000);
-
- dict_load_indexes(table, heap);
-
- mem_heap_free(heap);
-}
-
-/************************************************************************
Loads definitions for table columns. */
static
void
@@ -568,12 +286,97 @@ dict_load_columns(
}
/************************************************************************
-Loads definitions for table indexes. Adds them to the data dictionary cache.
-*/
+Loads definitions for index fields. */
static
void
+dict_load_fields(
+/*=============*/
+ dict_table_t* table, /* in: table */
+ dict_index_t* index, /* in: index whose fields to load */
+ mem_heap_t* heap) /* in: memory heap for temporary storage */
+{
+ dict_table_t* sys_fields;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ char* col_name;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ byte* buf;
+ ulint i;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ UT_NOT_USED(table);
+
+ mtr_start(&mtr);
+
+ sys_fields = dict_table_get_low("SYS_FIELDS");
+ sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = mem_heap_alloc(heap, 8);
+ mach_write_to_8(buf, index->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (i = 0; i < index->n_fields; i++) {
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+ if (rec_get_deleted_flag(rec)) {
+ fprintf(stderr,
+"InnoDB: Error: data dictionary entry for table %s is corrupt!\n",
+"InnoDB: An index field is delete marked.\n",
+ table->name);
+ }
+
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+ ut_a(ut_memcmp(buf, field, len) == 0);
+
+ field = rec_get_nth_field(rec, 1, &len);
+ ut_ad(len == 4);
+ ut_a(i == mach_read_from_4(field));
+
+ ut_a(0 == ut_strcmp("COL_NAME",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_fields), 4))->name));
+
+ field = rec_get_nth_field(rec, 4, &len);
+
+ col_name = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(col_name, field, len);
+ col_name[len] = '\0';
+
+ dict_mem_index_add_field(index, col_name, 0);
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Loads definitions for table indexes. Adds them to the data dictionary
+cache. */
+static
+ibool
dict_load_indexes(
/*==============*/
+ /* out: TRUE if ok, FALSE if corruption
+ of dictionary table */
dict_table_t* table, /* in: table */
mem_heap_t* heap) /* in: memory heap for temporary storage */
{
@@ -637,7 +440,17 @@ dict_load_indexes(
break;
}
- ut_a(!rec_get_deleted_flag(rec));
+ if (rec_get_deleted_flag(rec)) {
+ fprintf(stderr,
+"InnoDB: Error: data dictionary entry for table %s is corrupt!\n"
+"InnoDB: An index is delete marked.\n",
+ table->name);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(FALSE);
+ }
field = rec_get_nth_field(rec, 1, &len);
ut_ad(len == 8);
@@ -671,6 +484,33 @@ dict_load_indexes(
field = rec_get_nth_field(rec, 8, &len);
page_no = mach_read_from_4(field);
+ if (page_no == FIL_NULL) {
+
+ fprintf(stderr,
+ "InnoDB: Error: trying to load index %s for table %s\n"
+ "InnoDB: but the index tree has been freed!\n",
+ name_buf, table->name);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(FALSE);
+ }
+
+ if ((type & DICT_CLUSTERED) == 0
+ && NULL == dict_table_get_first_index(table)) {
+
+ fprintf(stderr,
+ "InnoDB: Error: trying to load index %s for table %s\n"
+ "InnoDB: but the first index was not clustered!\n",
+ name_buf, table->name);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(FALSE);
+ }
+
if (is_sys_table
&& ((type & DICT_CLUSTERED)
|| ((table == dict_sys->sys_tables)
@@ -688,16 +528,7 @@ dict_load_indexes(
dict_load_fields(table, index, heap);
- if (index->type & DICT_CLUSTERED == 0
- && NULL == dict_table_get_first_index(table)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to load index %s for table %s\n"
- "InnoDB: but the first index was not clustered\n",
- index->name, table->name);
- } else {
- dict_index_add_to_cache(table, index);
- }
+ dict_index_add_to_cache(table, index);
}
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
@@ -705,84 +536,266 @@ dict_load_indexes(
btr_pcur_close(&pcur);
mtr_commit(&mtr);
+
+ return(TRUE);
}
/************************************************************************
-Loads definitions for index fields. */
-static
-void
-dict_load_fields(
-/*=============*/
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index whose fields to load */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
+Loads a table definition and also all its index definitions, and also
+the cluster definition if the table is a member in a cluster. Also loads
+all foreign key constraints where the foreign key is in the table or where
+a foreign key references columns in this table. Adds all these to the data
+dictionary cache. */
+
+dict_table_t*
+dict_load_table(
+/*============*/
+ /* out: table, NULL if does not exist */
+ char* name) /* in: table name */
{
- dict_table_t* sys_fields;
- dict_index_t* sys_index;
+ dict_table_t* table;
+ dict_table_t* sys_tables;
btr_pcur_t pcur;
+ dict_index_t* sys_index;
dtuple_t* tuple;
+ mem_heap_t* heap;
dfield_t* dfield;
- char* col_name;
rec_t* rec;
byte* field;
ulint len;
- byte* buf;
- ulint i;
+ char* buf;
+ ulint space;
+ ulint n_cols;
mtr_t mtr;
ut_ad(mutex_own(&(dict_sys->mutex)));
- UT_NOT_USED(table);
-
+ heap = mem_heap_create(1000);
+
mtr_start(&mtr);
- sys_fields = dict_table_get_low("SYS_FIELDS");
- sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);
+ sys_tables = dict_table_get_low("SYS_TABLES");
+ sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
tuple = dtuple_create(heap, 1);
dfield = dtuple_get_nth_field(tuple, 0);
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, index->id);
-
- dfield_set_data(dfield, buf, 8);
+ dfield_set_data(dfield, name, ut_strlen(name));
dict_index_copy_types(tuple, sys_index, 1);
btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i < index->n_fields; i++) {
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
- rec = btr_pcur_get_rec(&pcur);
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+ || rec_get_deleted_flag(rec)) {
+ /* Not found */
- ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
- ut_a(!rec_get_deleted_flag(rec));
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
- field = rec_get_nth_field(rec, 0, &len);
- ut_ad(len == 8);
- ut_a(ut_memcmp(buf, field, len) == 0);
+ return(NULL);
+ }
- field = rec_get_nth_field(rec, 1, &len);
- ut_ad(len == 4);
- ut_a(i == mach_read_from_4(field));
+ field = rec_get_nth_field(rec, 0, &len);
- ut_a(0 == ut_strcmp("COL_NAME",
- dict_field_get_col(
- dict_index_get_nth_field(
- dict_table_get_first_index(sys_fields), 4))->name));
+ /* Check if the table name in record is the searched one */
+ if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) {
- field = rec_get_nth_field(rec, 4, &len);
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
- col_name = mem_heap_alloc(heap, len + 1);
- ut_memcpy(col_name, field, len);
- col_name[len] = '\0';
+ ut_a(0 == ut_strcmp("SPACE",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_tables), 9))->name));
+
+ field = rec_get_nth_field(rec, 9, &len);
+ space = mach_read_from_4(field);
+
+ ut_a(0 == ut_strcmp("N_COLS",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_tables), 4))->name));
+
+ field = rec_get_nth_field(rec, 4, &len);
+ n_cols = mach_read_from_4(field);
+
+ table = dict_mem_table_create(name, space, n_cols);
+
+ ut_a(0 == ut_strcmp("ID",
+ dict_field_get_col(
+ dict_index_get_nth_field(
+ dict_table_get_first_index(sys_tables), 3))->name));
+
+ field = rec_get_nth_field(rec, 3, &len);
+ table->id = mach_read_from_8(field);
+
+ field = rec_get_nth_field(rec, 5, &len);
+ table->type = mach_read_from_4(field);
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+ ut_a(0);
+
+ field = rec_get_nth_field(rec, 6, &len);
+ table->mix_id = mach_read_from_8(field);
+
+ field = rec_get_nth_field(rec, 8, &len);
+ buf = mem_heap_alloc(heap, len);
+ ut_memcpy(buf, field, len);
+
+ table->cluster_name = buf;
+ }
+
+ if ((table->type == DICT_TABLE_CLUSTER)
+ || (table->type == DICT_TABLE_CLUSTER_MEMBER)) {
- dict_mem_index_add_field(index, col_name, 0);
+ field = rec_get_nth_field(rec, 7, &len);
+ table->mix_len = mach_read_from_4(field);
+ }
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
+ /* Load the cluster table definition if not yet in
+ memory cache */
+ dict_table_get_low(table->cluster_name);
+ }
+
+ dict_load_columns(table, heap);
+
+ dict_table_add_to_cache(table);
+
+ dict_load_indexes(table, heap);
+
+ ut_a(DB_SUCCESS == dict_load_foreigns(table->name));
+
+ mem_heap_free(heap);
+
+ return(table);
+}
+
+/***************************************************************************
+Loads a table object based on the table id. */
+
+dict_table_t*
+dict_load_table_on_id(
+/*==================*/
+ /* out: table; NULL if table does not exist */
+ dulint table_id) /* in: table id */
+{
+ byte id_buf[8];
+ btr_pcur_t pcur;
+ mem_heap_t* heap;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ dict_index_t* sys_table_ids;
+ dict_table_t* sys_tables;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ dict_table_t* table;
+ char* name;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ /* NOTE that the operation of this function is protected by
+ the dictionary mutex, and therefore no deadlocks can occur
+ with other dictionary operations. */
+
+ mtr_start(&mtr);
+ /*---------------------------------------------------*/
+ /* Get the secondary index based on ID for table SYS_TABLES */
+ sys_tables = dict_sys->sys_tables;
+ sys_table_ids = dict_table_get_next_index(
+ dict_table_get_first_index(sys_tables));
+ heap = mem_heap_create(256);
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ /* Write the table id in byte format to id_buf */
+ mach_write_to_8(id_buf, table_id);
+
+ dfield_set_data(dfield, id_buf, 8);
+ dict_index_copy_types(tuple, sys_table_ids, 1);
+
+ btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+ || rec_get_deleted_flag(rec)) {
+ /* Not found */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ /*---------------------------------------------------*/
+ /* Now we have the record in the secondary index containing the
+ table ID and NAME */
+
+ rec = btr_pcur_get_rec(&pcur);
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+
+ /* Check if the table id in record is the one searched for */
+ if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) {
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ /* Now we get the table name from the record */
+ field = rec_get_nth_field(rec, 1, &len);
+
+ name = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(name, field, len);
+ name[len] = '\0';
+
+ /* Load the table definition to memory */
+ table = dict_load_table(name);
+
btr_pcur_close(&pcur);
mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(table);
+}
+
+/************************************************************************
+This function is called when the database is booted. Loads system table
+index definitions except for the clustered index which is added to the
+dictionary cache at booting before calling this function. */
+
+void
+dict_load_sys_table(
+/*================*/
+ dict_table_t* table) /* in: system table */
+{
+ mem_heap_t* heap;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ heap = mem_heap_create(1000);
+
+ dict_load_indexes(table, heap);
+
+ mem_heap_free(heap);
}
/************************************************************************
diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c
index 52f46062065..9a4c94de885 100644
--- a/innobase/dict/dict0mem.c
+++ b/innobase/dict/dict0mem.c
@@ -65,6 +65,9 @@ dict_mem_table_create(
table->cached = FALSE;
+ table->mix_id = ut_dulint_zero;
+ table->mix_len = 0;
+
table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
* sizeof(dict_col_t));
UT_LIST_INIT(table->indexes);
diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c
index e823fe62259..d78db2a5ea8 100644
--- a/innobase/fsp/fsp0fsp.c
+++ b/innobase/fsp/fsp0fsp.c
@@ -2608,6 +2608,7 @@ fseg_free_page_low(
ulint not_full_n_used;
ulint state;
ulint i;
+ char errbuf[200];
ut_ad(seg_inode && mtr);
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) ==
@@ -2621,8 +2622,25 @@ fseg_free_page_low(
descr = xdes_get_descriptor(space, page, mtr);
ut_a(descr);
- ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
- == FALSE);
+ if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+ != FALSE) {
+ ut_sprintf_buf(errbuf, descr, 40);
+ fprintf(stderr,
+"InnoDB: Dump of the tablespace extent descriptor: %s\n", errbuf);
+
+ fprintf(stderr,
+"InnoDB: Serious error! InnoDB is trying to free page %lu\n",
+"InnoDB: though it is already marked as free in the tablespace!\n"
+"InnoDB: The tablespace free space info is corrupt.\n"
+"InnoDB: You may need to dump your InnoDB tables and recreate the whole\n"
+"InnoDB: database!\n", page);
+
+ fprintf(stderr,
+"InnoDB: If the InnoDB recovery crashes here, see section 6.1\n"
+"InnoDB: of http://www.innodb.com/ibman.html about forcing recovery.\n");
+ ut_a(0);
+ }
+
state = xdes_get_state(descr, mtr);
if (state != XDES_FSEG) {
diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c
index bd1f0e6e1d8..f51a924c87f 100644
--- a/innobase/ibuf/ibuf0ibuf.c
+++ b/innobase/ibuf/ibuf0ibuf.c
@@ -685,21 +685,21 @@ ibuf_bitmap_get_map_page(
/****************************************************************************
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap pag
+ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
UNIV_INLINE
void
ibuf_set_free_bits_low(
/*===================*/
ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is reset if the index is
- a non-clustered non-unique, and page level is 0 */
+ page_t* page, /* in: index page; free bit is set if the index is
+ non-clustered and page level is 0 */
ulint val, /* in: value to set: < 4 */
mtr_t* mtr) /* in: mtr */
{
page_t* bitmap_page;
- if (type & (DICT_CLUSTERED | DICT_UNIQUE)) {
+ if (type & DICT_CLUSTERED) {
return;
}
@@ -733,8 +733,8 @@ void
ibuf_set_free_bits(
/*===============*/
ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is reset if the index is
- a non-clustered non-unique, and page level is 0 */
+ page_t* page, /* in: index page; free bit is set if the index is
+ non-clustered and page level is 0 */
ulint val, /* in: value to set: < 4 */
ulint max_val)/* in: ULINT_UNDEFINED or a maximum value which
the bits must have before setting; this is for
@@ -743,7 +743,7 @@ ibuf_set_free_bits(
mtr_t mtr;
page_t* bitmap_page;
- if (type & (DICT_CLUSTERED | DICT_UNIQUE)) {
+ if (type & DICT_CLUSTERED) {
return;
}
@@ -2024,7 +2024,7 @@ ibuf_insert_low(
ulint n_stored;
ulint bits;
- ut_a(!(index->type & (DICT_UNIQUE | DICT_CLUSTERED)));
+ ut_a(!(index->type & DICT_CLUSTERED));
ut_ad(dtuple_check_typed(entry));
do_merge = FALSE;
@@ -2254,10 +2254,7 @@ ibuf_insert(
ut_ad(dtuple_check_typed(entry));
- if (index->type & DICT_CLUSTERED || index->type & DICT_UNIQUE) {
-
- return(FALSE);
- }
+ ut_a(!(index->type & DICT_CLUSTERED));
if (rec_get_converted_size(entry)
>= page_get_free_space_of_empty() / 2) {
@@ -2302,6 +2299,7 @@ ibuf_insert_to_index_page(
rec_t* rec;
page_t* bitmap_page;
ulint old_bits;
+ char errbuf[1000];
ut_ad(ibuf_inside());
ut_ad(dtuple_check_typed(entry));
@@ -2324,11 +2322,24 @@ ibuf_insert_to_index_page(
/* This time the record must fit */
if (!page_cur_tuple_insert(&page_cur, entry, mtr)) {
- printf(
- "Ibuf insert fails; page free %lu, dtuple size %lu\n",
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+"InnoDB: Error: Insert buffer insert fails; page free %lu, dtuple size %lu\n",
page_get_max_insert_size(page, 1),
rec_get_converted_size(entry));
+ dtuple_sprintf(errbuf, 900, entry);
+
+ fprintf(stderr,
+"InnoDB: Cannot insert index record %s\n", errbuf);
+
+ fprintf(stderr,
+"InnoDB: The table where where this index record belongs\n"
+"InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
+"InnoDB: that table.\n");
+
bitmap_page = ibuf_bitmap_get_map_page(
buf_frame_get_space_id(page),
buf_frame_get_page_no(page),
@@ -2339,9 +2350,11 @@ ibuf_insert_to_index_page(
buf_frame_get_page_no(page),
IBUF_BITMAP_FREE, mtr);
- printf("Bitmap bits %lu\n", old_bits);
-
- ut_error;
+ fprintf(stderr, "Bitmap bits %lu\n", old_bits);
+
+ fprintf(stderr,
+"InnoDB: Send a detailed bug report to mysql@lists.mysql.com!\n");
+
}
}
}
diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h
index d22f9d79c1c..bf433c0c264 100644
--- a/innobase/include/btr0btr.h
+++ b/innobase/include/btr0btr.h
@@ -204,16 +204,6 @@ btr_page_reorganize(
page_t* page, /* in: page to be reorganized */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
-Reorganizes an index page. */
-
-void
-btr_page_reorganize_low(
-/*====================*/
- ibool low, /* in: TRUE if locks should not be updated, i.e.,
- there cannot exist locks on the page */
- page_t* page, /* in: page to be reorganized */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to left. */
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h
index bce1f0685cc..7af34deb30f 100644
--- a/innobase/include/btr0cur.h
+++ b/innobase/include/btr0cur.h
@@ -709,6 +709,7 @@ allowed to free an inherited external field. */
#define BTR_EXTERN_INHERITED_FLAG 64
extern ulint btr_cur_n_non_sea;
+extern ulint btr_cur_n_sea;
#ifndef UNIV_NONINL
#include "btr0cur.ic"
diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h
index fdf5cf375a3..14feca5d5c5 100644
--- a/innobase/include/btr0sea.h
+++ b/innobase/include/btr0sea.h
@@ -176,6 +176,7 @@ btr_search_validate(void);
/* The search info struct in an index */
struct btr_search_struct{
+ ulint magic_n; /* magic number */
/* The following 4 fields are currently not used: */
rec_t* last_search; /* pointer to the lower limit record of the
previous search; NULL if not known */
@@ -220,6 +221,8 @@ struct btr_search_struct{
ulint n_searches; /* number of searches */
};
+#define BTR_SEARCH_MAGIC_N 1112765
+
/* The hash index system */
typedef struct btr_search_sys_struct btr_search_sys_t;
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 5ddbf39335a..ca0692f1e17 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -219,6 +219,16 @@ buf_page_create(
a page */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
+Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
+
+void
+buf_page_init_for_backup_restore(
+/*=============================*/
+ ulint space, /* in: space id */
+ ulint offset, /* in: offset of the page within space
+ in units of a page */
+ buf_block_t* block); /* in: block to init */
+/************************************************************************
Decrements the bufferfix count of a buffer control block and releases
a latch, if specified. */
UNIV_INLINE
@@ -605,6 +615,7 @@ struct buf_block_struct{
/* 1. General fields */
+ ulint magic_n; /* magic number to check */
ulint state; /* state of the control block:
BUF_BLOCK_NOT_USED, ... */
byte* frame; /* pointer to buffer frame which
@@ -729,6 +740,8 @@ struct buf_block_struct{
frees a page in buffer pool */
};
+#define BUF_BLOCK_MAGIC_N 41526563
+
/* The buffer pool structure. NOTE! The definition appears here only for
other modules of this directory (buf) to see it. Do not use from outside! */
diff --git a/innobase/include/buf0rea.h b/innobase/include/buf0rea.h
index 1efe67369ab..aed965a6b21 100644
--- a/innobase/include/buf0rea.h
+++ b/innobase/include/buf0rea.h
@@ -89,7 +89,7 @@ buf_read_recv_pages(
/* The size in pages of the area which the read-ahead algorithms read if
invoked */
-#define BUF_READ_AHEAD_AREA ut_min(32, buf_pool->curr_size / 16)
+#define BUF_READ_AHEAD_AREA ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
/* Modes used in read-ahead */
#define BUF_READ_IBUF_PAGES_ONLY 131
diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h
index c19d7ea5552..e0fb06e5018 100644
--- a/innobase/include/data0data.h
+++ b/innobase/include/data0data.h
@@ -123,7 +123,7 @@ dfield_datas_are_binary_equal(
dfield_t* field2);/* in: field */
/*************************************************************************
Tests if dfield data length and content is equal to the given. */
-UNIV_INLINE
+
ibool
dfield_data_is_binary_equal(
/*========================*/
@@ -279,6 +279,14 @@ dtuple_check_typed(
/* out: TRUE if ok */
dtuple_t* tuple); /* in: tuple */
/**************************************************************
+Checks that a data tuple is typed. */
+
+ibool
+dtuple_check_typed_no_assert(
+/*=========================*/
+ /* out: TRUE if ok */
+ dtuple_t* tuple); /* in: tuple */
+/**************************************************************
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set. */
diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic
index 0750a3894d1..d356664df21 100644
--- a/innobase/include/data0data.ic
+++ b/innobase/include/data0data.ic
@@ -154,30 +154,6 @@ dfield_datas_are_binary_equal(
}
/*************************************************************************
-Tests if dfield data length and content is equal to the given. */
-UNIV_INLINE
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
- /* out: TRUE if equal */
- dfield_t* field, /* in: field */
- ulint len, /* in: data length or UNIV_SQL_NULL */
- byte* data) /* in: data */
-{
- if (len != field->len) {
-
- return(FALSE);
- }
-
- if (len != UNIV_SQL_NULL && 0 != ut_memcmp(field->data, data, len)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
Gets info bits in a data tuple. */
UNIV_INLINE
ulint
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
index eeb4f2e45f1..d4bd0036c5a 100644
--- a/innobase/include/log0log.h
+++ b/innobase/include/log0log.h
@@ -157,6 +157,14 @@ log_io_complete(
/*============*/
log_group_t* group); /* in: log group */
/**********************************************************
+Flushes the log files to the disk, using, for example, the Unix fsync.
+This function does the flush even if the user has set
+srv_flush_log_at_trx_commit = FALSE. */
+
+void
+log_flush_to_disk(void);
+/*===================*/
+/**********************************************************
This function is called, e.g., when a transaction wants to commit. It checks
that the log has been flushed to disk up to the last log entry written by the
transaction. If there is a flush running, it waits and checks if the flush
@@ -260,7 +268,9 @@ log_reset_first_header_and_checkpoint(
/*==================================*/
byte* hdr_buf,/* in: buffer which will be written to the start
of the first log file */
- dulint lsn); /* in: lsn of the start of the first log file */
+ dulint start); /* in: lsn of the start of the first log file;
+ we pretend that there is a checkpoint at
+ start + LOG_BLOCK_HDR_SIZE */
/************************************************************************
Starts an archiving operation. */
@@ -463,6 +473,15 @@ log_block_init(
byte* log_block, /* in: pointer to the log buffer */
dulint lsn); /* in: lsn within the log block */
/****************************************************************
+Initializes a log block in the log buffer in the old, < 3.23.52 format, where
+there was no checksum yet. */
+UNIV_INLINE
+void
+log_block_init_in_old_format(
+/*=========================*/
+ byte* log_block, /* in: pointer to the log buffer */
+ dulint lsn); /* in: lsn within the log block */
+/****************************************************************
Converts a lsn to a log block number. */
UNIV_INLINE
ulint
@@ -523,7 +542,10 @@ extern log_t* log_sys;
bytes */
/* Offsets of a log block trailer from the end of the block */
-#define LOG_BLOCK_TRL_NO 4 /* log block number */
+#define LOG_BLOCK_TRL_CHECKSUM 4 /* 1 byte checksum of the log block
+ contents */
+#define LOG_BLOCK_TRL_NO 3 /* 3 lowest bytes of the log block
+ number */
#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */
/* Offsets for a checkpoint field */
@@ -558,11 +580,22 @@ extern log_t* log_sys;
#define LOG_GROUP_ID 0 /* log group number */
#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this
log file */
-#define LOG_FILE_NO 12 /* 4-byte archived log file number */
+#define LOG_FILE_NO 12 /* 4-byte archived log file number;
+ this field is only defined in an
+ archived log file */
+#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16
+ /* a 32-byte field which contains
+ the string 'ibbackup' and the
+ creation time if the log file was
+ created by ibbackup --restore;
+ when mysqld is first time started
+ on the restored database, it can
+ print helpful info for the user */
#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
/* this 4-byte field is TRUE when
the writing of an archived log file
- has been completed */
+ has been completed; this field is
+ only defined in an archived log file */
#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4)
/* lsn where the archived log file
at least extends: actually the
@@ -572,7 +605,14 @@ extern log_t* log_sys;
is defined only when an archived log
file has been completely written */
#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE
+ /* first checkpoint field in the log
+ header; we write alternately to the
+ checkpoint fields when we make new
+ checkpoints; this field is only defined
+ in the first log file of a log group */
#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE)
+ /* second checkpoint field in the log
+ header */
#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
#define LOG_GROUP_OK 301
@@ -678,7 +718,7 @@ struct log_struct{
write i/o has been completed for all
log groups */
dulint flush_lsn; /* end lsn for the current flush */
- ulint flush_end_offset;/* the data in buffer ha been flushed
+ ulint flush_end_offset;/* the data in buffer has been flushed
up to this offset when the current
flush ends: this field will then
be copied to buf_next_to_write */
diff --git a/innobase/include/log0log.ic b/innobase/include/log0log.ic
index e5c313d129b..36e65239374 100644
--- a/innobase/include/log0log.ic
+++ b/innobase/include/log0log.ic
@@ -179,7 +179,7 @@ log_block_get_trl_no(
trailer */
byte* log_block) /* in: log block */
{
- return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
+ return(mach_read_from_3(log_block + OS_FILE_LOG_BLOCK_SIZE
- LOG_BLOCK_TRL_NO));
}
@@ -192,8 +192,8 @@ log_block_set_trl_no(
byte* log_block, /* in: log block */
ulint n) /* in: log block number */
{
- mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_NO,
- n);
+ mach_write_to_3(log_block + OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_NO,
+ n & 0xFFFFFF);
}
/****************************************************************
@@ -237,6 +237,29 @@ log_block_init(
log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
log_block_set_first_rec_group(log_block, 0);
}
+
+/****************************************************************
+Initializes a log block in the log buffer in the old format, where there
+was no checksum yet. */
+UNIV_INLINE
+void
+log_block_init_in_old_format(
+/*=========================*/
+ byte* log_block, /* in: pointer to the log buffer */
+ dulint lsn) /* in: lsn within the log block */
+{
+ ulint no;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ no = log_block_convert_lsn_to_no(lsn);
+
+ log_block_set_hdr_no(log_block, no);
+ mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_NO - 1, no);
+ log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
+ log_block_set_first_rec_group(log_block, 0);
+}
/****************************************************************
Writes to the log the string given. The log must be released with
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
index 01fa12955ff..d4d12e4a9d9 100644
--- a/innobase/include/os0file.h
+++ b/innobase/include/os0file.h
@@ -16,6 +16,7 @@ Created 10/21/1995 Heikki Tuuri
os_file_write */
extern ibool os_do_not_call_flush_at_each_write;
extern ibool os_has_said_disk_full;
+extern ibool os_aio_print_debug;
#ifdef __WIN__
@@ -33,6 +34,8 @@ extern ibool os_has_said_disk_full;
typedef int os_file_t;
#endif
+extern ulint os_innodb_umask;
+
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads */
@@ -309,6 +312,15 @@ Wakes up simulated aio i/o-handler threads if they have something to do. */
void
os_aio_simulated_wake_handler_threads(void);
/*=======================================*/
+/**************************************************************************
+This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+
+void
+os_aio_simulated_put_read_threads_to_sleep(void);
+/*============================================*/
#ifdef WIN_ASYNC_IO
/**************************************************************************
diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h
index 8e68381b868..2f77127466f 100644
--- a/innobase/include/page0page.h
+++ b/innobase/include/page0page.h
@@ -328,7 +328,7 @@ page_dir_calc_reserved_space(
ulint n_recs); /* in: number of records */
/*******************************************************************
Looks for the directory slot which owns the given record. */
-UNIV_INLINE
+
ulint
page_dir_find_owner_slot(
/*=====================*/
diff --git a/innobase/include/page0page.ic b/innobase/include/page0page.ic
index f84fe5a5606..e7c0f8ee07c 100644
--- a/innobase/include/page0page.ic
+++ b/innobase/include/page0page.ic
@@ -479,6 +479,8 @@ page_rec_get_next(
offs = rec_get_next_offs(rec);
+ ut_a(offs < UNIV_PAGE_SIZE);
+
if (offs == 0) {
return(NULL);
@@ -487,40 +489,6 @@ page_rec_get_next(
return(page + offs);
}
-/*******************************************************************
-Looks for the directory slot which owns the given record. */
-UNIV_INLINE
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
- /* out: the directory slot number */
- rec_t* rec) /* in: the physical record */
-{
- ulint i;
- page_t* page;
- page_dir_slot_t* slot;
-
- ut_ad(page_rec_check(rec));
-
- while (rec_get_n_owned(rec) == 0) {
- rec = page_rec_get_next(rec);
- }
-
- page = buf_frame_align(rec);
-
- i = page_dir_get_n_slots(page) - 1;
- slot = page_dir_get_nth_slot(page, i);
-
- while (page_dir_slot_get_rec(slot) != rec) {
- ut_a(i > 0);
-
- i--;
- slot = page_dir_get_nth_slot(page, i);
- }
-
- return(i);
-}
-
/****************************************************************
Sets the pointer to the next record on the page. */
UNIV_INLINE
@@ -534,7 +502,7 @@ page_rec_set_next(
page_t* page;
ut_ad(page_rec_check(rec));
- ut_ad((next == NULL)
+ ut_a((next == NULL)
|| (buf_frame_align(rec) == buf_frame_align(next)));
page = buf_frame_align(rec);
@@ -573,7 +541,7 @@ page_rec_get_prev(
slot_no = page_dir_find_owner_slot(rec);
- ut_ad(slot_no != 0);
+ ut_a(slot_no != 0);
slot = page_dir_get_nth_slot(page, slot_no - 1);
@@ -584,7 +552,7 @@ page_rec_get_prev(
rec2 = page_rec_get_next(rec2);
}
- ut_ad(prev_rec);
+ ut_a(prev_rec);
return(prev_rec);
}
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index 13b3dffd874..8152c534f48 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -230,6 +230,19 @@ row_update_cascade_for_mysql(
or set null operation */
dict_table_t* table); /* in: table where we do the operation */
/*************************************************************************
+Locks the data dictionary exclusively for performing a table create
+operation. */
+
+void
+row_mysql_lock_data_dictionary(void);
+/*================================*/
+/*************************************************************************
+Unlocks the data dictionary exclusively lock. */
+
+void
+row_mysql_unlock_data_dictionary(void);
+/*==================================*/
+/*************************************************************************
Does a table creation operation for MySQL. If the name of the created
table ends to characters INNODB_MONITOR, then this also starts
printing of monitor output by the master thread. */
diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h
index c456768e820..820af4cd014 100644
--- a/innobase/include/trx0roll.h
+++ b/innobase/include/trx0roll.h
@@ -102,11 +102,13 @@ trx_rollback(
calling function can start running
a new query thread */
/***********************************************************************
-Rollback uncommitted transactions which have no user session. */
+Rollback or clean up transactions which have no user session. If the
+transaction already was committed, then we clean up a possible insert
+undo log. If the transaction was not yet committed, then we roll it back. */
void
-trx_rollback_all_without_sess(void);
-/*===============================*/
+trx_rollback_or_clean_all_without_sess(void);
+/*========================================*/
/********************************************************************
Finishes a transaction rollback. */
diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h
index 60d5adb72d1..b08df7f6901 100644
--- a/innobase/include/trx0sys.h
+++ b/innobase/include/trx0sys.h
@@ -24,6 +24,14 @@ Created 3/26/1996 Heikki Tuuri
#include "fsp0fsp.h"
#include "read0types.h"
+/* In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. We have successfully got the updates to InnoDB
+up to this position. If .._pos is -1, it means no crash recovery was needed,
+or there was no master log position info inside InnoDB. */
+
+extern char trx_sys_mysql_master_log_name[];
+extern ib_longlong trx_sys_mysql_master_log_pos;
+
/* The transaction system */
extern trx_sys_t* trx_sys;
@@ -229,13 +237,18 @@ trx_in_trx_list(
trx_t* in_trx);/* in: trx */
/*********************************************************************
Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. */
+which corresponds to the transaction just being committed. In a MySQL
+replication slave updates the latest master binlog position up to which
+replication has proceeded. */
void
trx_sys_update_mysql_binlog_offset(
/*===============================*/
- trx_t* trx, /* in: transaction being committed */
- mtr_t* mtr); /* in: mtr */
+ char* file_name,/* in: MySQL log file name */
+ ib_longlong offset, /* in: position in that log file */
+ ulint field, /* in: offset of the MySQL log info field in
+ the trx sys header */
+ mtr_t* mtr); /* in: mtr */
/*********************************************************************
Prints to stderr the MySQL binlog offset info in the trx system header if
the magic number shows it valid. */
@@ -243,15 +256,17 @@ the magic number shows it valid. */
void
trx_sys_print_mysql_binlog_offset(void);
/*===================================*/
+/*********************************************************************
+Prints to stderr the MySQL master log offset info in the trx system header if
+the magic number shows it valid. */
+
+void
+trx_sys_print_mysql_master_log_pos(void);
+/*====================================*/
/* The automatically created system rollback segment has this id */
#define TRX_SYS_SYSTEM_RSEG_ID 0
-/* Max number of rollback segments: the number of segment specification slots
-in the transaction system array; rollback segment id must fit in one byte,
-therefore 256 */
-#define TRX_SYS_N_RSEGS 256
-
/* Space id and page no where the trx system file copy resides */
#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
@@ -277,22 +292,29 @@ therefore 256 */
segment specification slots */
/*-------------------------------------------------------------*/
-#define TRX_SYS_MYSQL_LOG_NAME_LEN 32
+/* Max number of rollback segments: the number of segment specification slots
+in the transaction system array; rollback segment id must fit in one byte,
+therefore 256; each slot is currently 8 bytes in size */
+#define TRX_SYS_N_RSEGS 256
+
+#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
+/* The offset of the MySQL replication info on the trx system header page;
+this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
+#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
+
/* The offset of the MySQL binlog offset info on the trx system header page */
-#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 300)
+#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /* magic number which shows
if we have valid data in the
MySQL binlog info; the value
is ..._MAGIC_N if yes */
-#define TRX_SYS_MYSQL_LOG_NAME 4 /* MySQL log file name */
-#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH (4 + TRX_SYS_MYSQL_LOG_NAME_LEN)
- /* high 4 bytes of the offset
+#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /* high 4 bytes of the offset
within that file */
-#define TRX_SYS_MYSQL_LOG_OFFSET_LOW (8 + TRX_SYS_MYSQL_LOG_NAME_LEN)
- /* low 4 bytes of the offset
+#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /* low 4 bytes of the offset
within that file */
+#define TRX_SYS_MYSQL_LOG_NAME 12 /* MySQL log file name */
/* The offset of the doublewrite buffer header on the trx system header page */
#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
index 261f33d3dc3..090473f3a5a 100644
--- a/innobase/include/trx0trx.h
+++ b/innobase/include/trx0trx.h
@@ -124,6 +124,15 @@ void
trx_commit_off_kernel(
/*==================*/
trx_t* trx); /* in: transaction */
+/********************************************************************
+Cleans up a transaction at database startup. The cleanup is needed if
+the transaction already got to the middle of a commit when the database
+crashed, andf we cannot roll it back. */
+
+void
+trx_cleanup_at_db_startup(
+/*======================*/
+ trx_t* trx); /* in: transaction */
/**************************************************************************
Does the transaction commit for MySQL. */
@@ -322,13 +331,24 @@ struct trx_struct{
void* mysql_thd; /* MySQL thread handle corresponding
to this trx, or NULL */
char* mysql_log_file_name;
- /* If MySQL binlog is used, this field
+ /* if MySQL binlog is used, this field
contains a pointer to the latest file
name; this is NULL if binlog is not
used */
- ib_longlong mysql_log_offset;/* If MySQL binlog is used, this field
+ ib_longlong mysql_log_offset;/* if MySQL binlog is used, this field
contains the end offset of the binlog
entry */
+ char* mysql_master_log_file_name;
+ /* if the database server is a MySQL
+ replication slave, we have here the
+ master binlog name up to which
+ replication has processed; otherwise
+ this is a pointer to a null character */
+ ib_longlong mysql_master_log_pos;
+ /* if the database server is a MySQL
+ replication slave, this is the
+ position in the log file up to which
+ replication has processed */
os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
with this transaction object */
/*------------------------------*/
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
index b958475e683..e7d93a594c1 100644
--- a/innobase/include/univ.i
+++ b/innobase/include/univ.i
@@ -9,40 +9,26 @@ Created 1/20/1994 Heikki Tuuri
#ifndef univ_i
#define univ_i
-#if (defined(_WIN32) || defined(_WIN64)) && !defined(MYSQL_SERVER)
+#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER)
#define __WIN__
#include <windows.h>
-/* When compiling for Itanium IA64, undefine the flag below to prevent use
-of 32-bit assembler */
-
-#ifndef WIN64
+#if !defined(WIN64) && !defined(_WIN64)
#define UNIV_CAN_USE_X86_ASSEMBLER
#endif
-/* If you want to check for errors with compiler level -W4,
-comment out the above include of windows.h and let the following defines
-be defined:
-#define HANDLE void*
-#define CRITICAL_SECTION ulint
-*/
-
#ifdef _NT_
#define __NT__
#endif
#else
-/* The Unix version */
-
-/* Most C compilers other than gcc do not know 'extern inline' */
-#if !defined(__GNUC__) && !defined(__WIN__)
-#define UNIV_MUST_NOT_INLINE
-#endif
+/* The defines used with MySQL */
/* Include two header files from MySQL to make the Unix flavor used
-in compiling more Posix-compatible. We assume that 'innobase' is a
-subdirectory of 'mysql'. */
+in compiling more Posix-compatible. These headers also define __WIN__
+if we are compiling on Windows. */
+
#include <global.h>
#include <my_pthread.h>
@@ -59,6 +45,20 @@ subdirectory of 'mysql'. */
#include <sched.h>
#endif
+/* When compiling for Itanium IA64, undefine the flag below to prevent use
+of the 32-bit x86 assembler in mutex operations. */
+
+#if defined(__WIN__) && !defined(WIN64) && !defined(_WIN64)
+#define UNIV_CAN_USE_X86_ASSEMBLER
+#endif
+
+/* We only try to do explicit inlining of functions with gcc and
+Microsoft Visual C++ */
+
+#if !defined(__GNUC__) && !defined(__WIN__)
+#define UNIV_MUST_NOT_INLINE
+#endif
+
#ifdef HAVE_PREAD
#define HAVE_PWRITE
#endif
diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h
index 338460d7de9..408788016c1 100644
--- a/innobase/include/ut0ut.h
+++ b/innobase/include/ut0ut.h
@@ -114,7 +114,7 @@ ut_2_exp(
ulint n); /* in: number */
/*****************************************************************
Calculates fast the number rounded up to the nearest power of 2. */
-UNIV_INLINE
+
ulint
ut_2_power_up(
/*==========*/
@@ -155,6 +155,13 @@ ut_print_timestamp(
/*===============*/
FILE* file); /* in: file where to print */
/**************************************************************
+Sprintfs a timestamp to a buffer. */
+
+void
+ut_sprintf_timestamp(
+/*=================*/
+ char* buf); /* in: buffer where to sprintf */
+/**************************************************************
Returns current year, month, day. */
void
diff --git a/innobase/include/ut0ut.ic b/innobase/include/ut0ut.ic
index 90f25d2b382..9d7dd283f29 100644
--- a/innobase/include/ut0ut.ic
+++ b/innobase/include/ut0ut.ic
@@ -172,25 +172,3 @@ ut_2_exp(
{
return(1 << n);
}
-
-/*****************************************************************
-Calculates fast the number rounded up to the nearest power of 2. */
-UNIV_INLINE
-ulint
-ut_2_power_up(
-/*==========*/
- /* out: first power of 2 which is >= n */
- ulint n) /* in: number != 0 */
-{
- ulint res;
-
- res = 1;
-
- ut_ad(n > 0);
-
- while (res < n) {
- res = res * 2;
- }
-
- return(res);
-}
diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c
index 9fd6811d639..1bdbf72bda6 100644
--- a/innobase/lock/lock0lock.c
+++ b/innobase/lock/lock0lock.c
@@ -1541,6 +1541,15 @@ lock_rec_enqueue_waiting(
trx = thr_get_trx(thr);
+ if (trx->dict_operation) {
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: Error: a record lock wait happens in a dictionary operation!\n"
+"InnoDB: Table name %s. Send a bug report to mysql@lists.mysql.com\n",
+index->table_name);
+ }
+
/* Enqueue the lock request that will wait to be granted */
lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx);
@@ -2914,7 +2923,7 @@ lock_table_enqueue_waiting(
trx_t* trx;
ut_ad(mutex_own(&kernel_mutex));
-
+
/* Test if there already is some other reason to suspend thread:
we do not enqueue a lock request if the query thread should be
stopped anyway */
@@ -2926,6 +2935,15 @@ lock_table_enqueue_waiting(
}
trx = thr_get_trx(thr);
+
+ if (trx->dict_operation) {
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: Error: a table lock wait happens in a dictionary operation!\n"
+"InnoDB: Table name %s. Send a bug report to mysql@lists.mysql.com\n",
+table->name);
+ }
/* Enqueue the lock request that will wait to be granted */
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c
index 2ba035d1eb2..d6e9deaa151 100644
--- a/innobase/log/log0log.c
+++ b/innobase/log/log0log.c
@@ -162,6 +162,8 @@ log_reserve_and_open(
ulint archived_lsn_age;
ulint count = 0;
ulint dummy;
+
+ ut_a(len < log->buf_size / 2);
loop:
mutex_enter(&(log->mutex));
@@ -663,6 +665,8 @@ log_init(void)
log_sys->buf_next_to_write = 0;
+ log_sys->flush_lsn = ut_dulint_zero;
+
log_sys->written_to_some_lsn = log_sys->lsn;
log_sys->written_to_all_lsn = log_sys->lsn;
@@ -777,9 +781,15 @@ log_group_init(
*(group->file_header_bufs + i) = ut_align(
mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
OS_FILE_LOG_BLOCK_SIZE);
+
+ memset(*(group->file_header_bufs + i), '\0',
+ LOG_FILE_HDR_SIZE);
+
*(group->archive_file_header_bufs + i) = ut_align(
mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
OS_FILE_LOG_BLOCK_SIZE);
+ memset(*(group->archive_file_header_bufs + i), '\0',
+ LOG_FILE_HDR_SIZE);
}
group->archive_space_id = archive_space_id;
@@ -791,6 +801,8 @@ log_group_init(
mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
OS_FILE_LOG_BLOCK_SIZE);
+ memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
+
UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
ut_a(log_calc_max_ages());
@@ -839,7 +851,7 @@ log_group_check_flush_completion(
{
ut_ad(mutex_own(&(log_sys->mutex)));
- if (!log_sys->one_flushed && (group->n_pending_writes == 0)) {
+ if (!log_sys->one_flushed && group->n_pending_writes == 0) {
if (log_debug_writes) {
printf("Log flushed first to group %lu\n", group->id);
@@ -933,16 +945,20 @@ log_io_complete(
return;
}
+ ut_a(0); /* We currently use synchronous writing of the
+ logs and cannot end up here! */
+
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
+ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
+ && srv_flush_log_at_trx_commit != 2) {
fil_flush(group->space_id);
}
mutex_enter(&(log_sys->mutex));
- ut_ad(group->n_pending_writes > 0);
- ut_ad(log_sys->n_pending_writes > 0);
+ ut_a(group->n_pending_writes > 0);
+ ut_a(log_sys->n_pending_writes > 0);
group->n_pending_writes--;
log_sys->n_pending_writes--;
@@ -956,6 +972,57 @@ log_io_complete(
}
/**********************************************************
+Flushes the log files to the disk, using, for example, the Unix fsync.
+This function does the flush even if the user has set
+srv_flush_log_at_trx_commit = FALSE. */
+
+void
+log_flush_to_disk(void)
+/*===================*/
+{
+ log_group_t* group;
+loop:
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->n_pending_writes > 0) {
+ /* A log file write is running */
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Wait for the log file write to complete and try again */
+
+ os_event_wait(log_sys->no_flush_event);
+
+ goto loop;
+ }
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ log_sys->n_pending_writes++;
+ group->n_pending_writes++;
+
+ os_event_reset(log_sys->no_flush_event);
+ os_event_reset(log_sys->one_flushed_event);
+
+ mutex_exit(&(log_sys->mutex));
+
+ fil_flush(group->space_id);
+
+ mutex_enter(&(log_sys->mutex));
+
+ ut_a(group->n_pending_writes == 1);
+ ut_a(log_sys->n_pending_writes == 1);
+
+ group->n_pending_writes--;
+ log_sys->n_pending_writes--;
+
+ os_event_set(log_sys->no_flush_event);
+ os_event_set(log_sys->one_flushed_event);
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/**********************************************************
Writes a log file header to a log file space. */
static
void
@@ -970,7 +1037,6 @@ log_group_file_header_flush(
{
byte* buf;
ulint dest_offset;
- ibool sync;
ut_ad(mutex_own(&(log_sys->mutex)));
@@ -981,15 +1047,11 @@ log_group_file_header_flush(
mach_write_to_4(buf + LOG_GROUP_ID, group->id);
mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
- dest_offset = nth_file * group->file_size;
+ /* Wipe over possible label of ibbackup --restore */
+ memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
- sync = FALSE;
-
- if (type == LOG_RECOVER) {
+ dest_offset = nth_file * group->file_size;
- sync = TRUE;
- }
-
if (log_debug_writes) {
printf(
"Writing log file header to group %lu file %lu\n", group->id,
@@ -997,14 +1059,9 @@ log_group_file_header_flush(
}
if (log_do_write) {
- if (type == LOG_FLUSH) {
- log_sys->n_pending_writes++;
- group->n_pending_writes++;
- }
-
log_sys->n_log_ios++;
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
dest_offset / UNIV_PAGE_SIZE,
dest_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
@@ -1013,6 +1070,31 @@ log_group_file_header_flush(
}
/**********************************************************
+Stores a 1-byte checksum to the trailer checksum field of a log block
+before writing it to a log file. This checksum is used in recovery to
+check the consistency of a log block. The checksum is simply the 8 low
+bits of 1 + the sum of the bytes in the log block except the trailer bytes. */
+static
+void
+log_block_store_checksum(
+/*=====================*/
+ byte* block) /* in/out: pointer to a log block */
+{
+ ulint i;
+ ulint sum;
+
+ sum = 1;
+
+ for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
+ sum += (ulint)(*(block + i));
+ }
+
+ mach_write_to_1(block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM,
+ 0xFF & sum);
+}
+
+/**********************************************************
Writes a buffer to a log file group. */
void
@@ -1032,20 +1114,13 @@ log_group_write_buf(
header */
{
ulint write_len;
- ibool sync;
ibool write_header;
ulint next_offset;
+ ulint i;
ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- sync = FALSE;
-
- if (type == LOG_RECOVER) {
-
- sync = TRUE;
- }
+ ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
if (new_data_offset == 0) {
write_header = TRUE;
@@ -1076,7 +1151,6 @@ loop:
}
if (log_debug_writes) {
- ulint i;
printf(
"Writing log file segment to group %lu offset %lu len %lu\n"
@@ -1100,15 +1174,17 @@ loop:
}
}
- if (log_do_write) {
- if (type == LOG_FLUSH) {
- log_sys->n_pending_writes++;
- group->n_pending_writes++;
- }
+ /* Calculate the checksums for each log block and write them to
+ the trailer fields of the log blocks */
+
+ for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
+ log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
+ }
+ if (log_do_write) {
log_sys->n_log_ios++;
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
next_offset / UNIV_PAGE_SIZE,
next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
}
@@ -1126,15 +1202,15 @@ loop:
/**********************************************************
This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been flushed to disk up to the last log entry written by the
-transaction. If there is a flush running, it waits and checks if the flush
-flushed enough. If not, starts a new flush. */
+that the log has been written to the log file up to the last log entry written
+by the transaction. If there is a flush running, it waits and checks if the
+flush flushed enough. If not, starts a new flush. */
void
log_flush_up_to(
/*============*/
dulint lsn, /* in: log sequence number up to which the log should
- be flushed, ut_dulint_max if not specified */
+ be written, ut_dulint_max if not specified */
ulint wait) /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
or LOG_WAIT_ALL_GROUPS */
{
@@ -1144,6 +1220,7 @@ log_flush_up_to(
ulint area_start;
ulint area_end;
ulint loop_count;
+ ulint unlock;
if (recv_no_ibuf_operations) {
/* Recovery is running and no operations on the log files are
@@ -1209,6 +1286,12 @@ loop:
ut_dulint_get_low(log_sys->lsn));
}
+ log_sys->n_pending_writes++;
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ group->n_pending_writes++; /* We assume here that we have only
+ one log group! */
+
os_event_reset(log_sys->no_flush_event);
os_event_reset(log_sys->one_flushed_event);
@@ -1254,6 +1337,36 @@ loop:
group = UT_LIST_GET_NEXT(log_groups, group);
}
+ mutex_exit(&(log_sys->mutex));
+
+ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
+ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
+ && srv_flush_log_at_trx_commit != 2) {
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ fil_flush(group->space_id);
+ }
+
+ mutex_enter(&(log_sys->mutex));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ ut_a(group->n_pending_writes == 1);
+ ut_a(log_sys->n_pending_writes == 1);
+
+ group->n_pending_writes--;
+ log_sys->n_pending_writes--;
+
+ unlock = log_group_check_flush_completion(group);
+ unlock = unlock | log_sys_check_flush_completion();
+
+ log_flush_do_unlocks(unlock);
+
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+
do_waits:
mutex_exit(&(log_sys->mutex));
@@ -1539,15 +1652,23 @@ log_reset_first_header_and_checkpoint(
/*==================================*/
byte* hdr_buf,/* in: buffer which will be written to the start
of the first log file */
- dulint lsn) /* in: lsn of the start of the first log file
- + LOG_BLOCK_HDR_SIZE */
+ dulint start) /* in: lsn of the start of the first log file;
+ we pretend that there is a checkpoint at
+ start + LOG_BLOCK_HDR_SIZE */
{
ulint fold;
byte* buf;
-
+ dulint lsn;
+
mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
- mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, lsn);
+ mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
+
+ lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE);
+ /* Write the label of ibbackup --restore */
+ sprintf(hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "ibbackup ");
+ ut_sprintf_timestamp(hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
+ + strlen("ibbackup "));
buf = hdr_buf + LOG_CHECKPOINT_1;
mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero);
diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c
index c31719f7bb0..53f75c176ea 100644
--- a/innobase/log/log0recv.c
+++ b/innobase/log/log0recv.c
@@ -568,6 +568,55 @@ recv_read_cp_info_for_backup(
return(TRUE);
}
+/**********************************************************
+Checks the 1-byte checksum to the trailer checksum field of a log block.
+We also accept a log block in the old format where the checksum field
+contained the highest byte of the log block number. */
+static
+ibool
+log_block_checksum_is_ok_or_old_format(
+/*===================================*/
+ /* out: TRUE if ok, or if the log block may be in the
+ format of InnoDB version < 3.23.52 */
+ byte* block) /* in: pointer to a log block */
+{
+ ulint i;
+ ulint sum;
+
+ sum = 1;
+
+ for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
+ sum += (ulint)(*(block + i));
+ }
+
+/* printf("Checksum %lu, byte %lu\n", 0xFF & sum,
+ mach_read_from_1(block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM));
+*/
+ if (mach_read_from_1(block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM)
+ == (0xFF & sum)) {
+
+ return(TRUE);
+ }
+
+ if (((0xFF000000 & log_block_get_hdr_no(block)) >> 24)
+ == mach_read_from_1(block + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM)) {
+
+ /* We assume the log block is in the format of
+ InnoDB version < 3.23.52 and the block is ok */
+/*
+ fprintf(stderr,
+"InnoDB: Scanned old format < InnoDB-3.23.52 log block number %lu\n",
+ log_block_get_hdr_no(block));
+*/
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
/***********************************************************************
Scans the log segment and n_bytes_scanned is set to the length of valid
log scanned. */
@@ -598,12 +647,13 @@ recv_scan_log_seg_for_backup(
no = log_block_get_hdr_no(log_block);
- /* fprintf(stderr, "Log block header no %lu\n", no); */
+/* fprintf(stderr, "Log block header no %lu\n", no); */
- if (no != log_block_get_trl_no(log_block)
- || no != log_block_convert_lsn_to_no(*scanned_lsn)) {
-
-/* printf(
+ if ((no & 0xFFFFFF) != log_block_get_trl_no(log_block)
+ || no != log_block_convert_lsn_to_no(*scanned_lsn)
+ || !log_block_checksum_is_ok_or_old_format(log_block)) {
+/*
+ printf(
"Log block n:o %lu, trailer n:o %lu, scanned lsn n:o %lu\n",
no, log_block_get_trl_no(log_block),
log_block_convert_lsn_to_no(*scanned_lsn));
@@ -611,8 +661,8 @@ recv_scan_log_seg_for_backup(
/* Garbage or an incompletely written log block */
log_block += OS_FILE_LOG_BLOCK_SIZE;
-
-/* printf(
+/*
+ printf(
"Next log block n:o %lu, trailer n:o %lu\n",
log_block_get_hdr_no(log_block),
log_block_get_trl_no(log_block));
@@ -629,11 +679,11 @@ recv_scan_log_seg_for_backup(
/* Garbage from a log buffer flush which was made
before the most recent database recovery */
-
+/*
printf("Scanned cp n:o %lu, block cp n:o %lu\n",
*scanned_checkpoint_no,
log_block_get_checkpoint_no(log_block));
-
+*/
break;
}
@@ -1011,7 +1061,7 @@ recv_recover_page(
page_lsn = page_newest_lsn;
}
} else {
- /* In recovery from a backup we do not use the buffer
+ /* In recovery from a backup we do not really use the buffer
pool */
page_newest_lsn = ut_dulint_zero;
@@ -1361,6 +1411,14 @@ recv_apply_log_recs_for_backup(
nth_page_in_file >> (32 - UNIV_PAGE_SIZE_SHIFT),
UNIV_PAGE_SIZE);
+ /* We simulate a page read made by the buffer pool,
+ to make sure recovery works ok. We must init the
+ block corresponding to buf_pool->frame_zero
+ (== page) */
+
+ buf_page_init_for_backup_restore(0, i,
+ buf_block_align(page));
+
recv_recover_page(TRUE, FALSE, page, 0, i);
buf_flush_init_for_writing(page,
@@ -2037,8 +2095,33 @@ recv_scan_log_recs(
/* fprintf(stderr, "Log block header no %lu\n", no); */
- if (no != log_block_get_trl_no(log_block)
- || no != log_block_convert_lsn_to_no(scanned_lsn)) {
+ if ((no & 0xFFFFFF) != log_block_get_trl_no(log_block)
+ || no != log_block_convert_lsn_to_no(scanned_lsn)
+ || !log_block_checksum_is_ok_or_old_format(log_block)) {
+
+ if ((no & 0xFFFFFF) == log_block_get_trl_no(log_block)
+ && no == log_block_convert_lsn_to_no(scanned_lsn)
+ && !log_block_checksum_is_ok_or_old_format(
+ log_block)) {
+ fprintf(stderr,
+"InnoDB: Log block no %lu at lsn %lu %lu has\n"
+"InnoDB: ok header and trailer, but checksum field contains %lu\n",
+ no, ut_dulint_get_high(scanned_lsn),
+ ut_dulint_get_low(scanned_lsn),
+ mach_read_from_1(log_block
+ + OS_FILE_LOG_BLOCK_SIZE
+ - LOG_BLOCK_TRL_CHECKSUM));
+ }
+
+ if ((no & 0xFFFFFF)
+ != log_block_get_trl_no(log_block)) {
+ fprintf(stderr,
+"InnoDB: Log block with header no %lu at lsn %lu %lu has\n"
+"InnoDB: trailer no %lu\n",
+ no, ut_dulint_get_high(scanned_lsn),
+ ut_dulint_get_low(scanned_lsn),
+ log_block_get_trl_no(log_block));
+ }
/* Garbage or an incompletely written log block */
@@ -2241,6 +2324,7 @@ recv_recovery_from_checkpoint_start(
dulint archived_lsn;
ulint capacity;
byte* buf;
+ byte log_hdr_buf[LOG_FILE_HDR_SIZE];
ulint err;
ut_ad((type != LOG_CHECKPOINT)
@@ -2288,6 +2372,33 @@ recv_recovery_from_checkpoint_start(
checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
+ /* Read the first log file header to print a note if this is
+ a recovery from a restored InnoDB Hot Backup */
+
+ fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id,
+ 0, 0, LOG_FILE_HDR_SIZE,
+ log_hdr_buf, max_cp_group);
+
+ if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
+ "ibbackup", ut_strlen("ibbackup"))) {
+ /* This log file was created by ibbackup --restore: print
+ a note to the user about it */
+
+ fprintf(stderr,
+ "InnoDB: The log file was created by ibbackup --restore at\n"
+ "InnoDB: %s\n", log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
+
+ /* Wipe over the label now */
+
+ ut_memcpy(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
+ " ", 4);
+ /* Write to the log file to wipe over the label */
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
+ max_cp_group->space_id,
+ 0, 0, OS_FILE_LOG_BLOCK_SIZE,
+ log_hdr_buf, max_cp_group);
+ }
+
group = UT_LIST_GET_FIRST(log_sys->log_groups);
while (group) {
@@ -2471,7 +2582,7 @@ recv_recovery_from_checkpoint_finish(void)
/* Rollback the uncommitted transactions which have no user session */
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
- trx_rollback_all_without_sess();
+ trx_rollback_or_clean_all_without_sess();
}
/* Apply the hashed log records to the respective file pages */
@@ -2487,6 +2598,7 @@ recv_recovery_from_checkpoint_finish(void)
}
if (recv_needed_recovery) {
+ trx_sys_print_mysql_master_log_pos();
trx_sys_print_mysql_binlog_offset();
}
@@ -2614,10 +2726,9 @@ recv_reset_log_files_for_backup(
/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
- log_reset_first_header_and_checkpoint(buf,
- ut_dulint_add(lsn, LOG_BLOCK_HDR_SIZE));
+ log_reset_first_header_and_checkpoint(buf, lsn);
- log_block_init(buf + LOG_FILE_HDR_SIZE, lsn);
+ log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
LOG_BLOCK_HDR_SIZE);
sprintf(name, "%sib_logfile%lu", log_dir, 0);
@@ -2754,7 +2865,7 @@ ask_again:
if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
fprintf(stderr,
"InnoDB: Archive log file %s starts from too big a lsn\n",
- name);
+ name);
return(TRUE);
}
@@ -2765,7 +2876,7 @@ ask_again:
fprintf(stderr,
"InnoDB: Archive log file %s starts from a wrong lsn\n",
- name);
+ name);
return(TRUE);
}
diff --git a/innobase/mtr/mtr0log.c b/innobase/mtr/mtr0log.c
index 26f5a5d1cb7..b582afc5710 100644
--- a/innobase/mtr/mtr0log.c
+++ b/innobase/mtr/mtr0log.c
@@ -290,7 +290,7 @@ mlog_write_string(
ut_a(0);
}
ut_ad(ptr && mtr);
- ut_ad(len < UNIV_PAGE_SIZE);
+ ut_a(len < UNIV_PAGE_SIZE);
ut_memcpy(ptr, str, len);
@@ -338,9 +338,13 @@ mlog_parse_string(
offset = mach_read_from_2(ptr);
ptr += 2;
+ ut_a(offset < UNIV_PAGE_SIZE);
+
len = mach_read_from_2(ptr);
ptr += 2;
+ ut_a(len + offset < UNIV_PAGE_SIZE);
+
if (end_ptr < ptr + len) {
return(NULL);
diff --git a/innobase/mtr/mtr0mtr.c b/innobase/mtr/mtr0mtr.c
index f38aa6793b9..565489613ae 100644
--- a/innobase/mtr/mtr0mtr.c
+++ b/innobase/mtr/mtr0mtr.c
@@ -315,7 +315,7 @@ mtr_log_reserve_and_write(
}
data_size = dyn_array_get_data_size(mlog);
-
+
/* Open the database log for log_write_low */
mtr->start_lsn = log_reserve_and_open(data_size);
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index dd51227bbf6..5806cb8f1f3 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -22,6 +22,16 @@ Created 10/21/1995 Heikki Tuuri
#endif
+/* This specifies the file permissions InnoDB uses when it craetes files in
+Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
+my_umask */
+
+#ifndef __WIN__
+ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+#else
+ulint os_innodb_umask = 0;
+#endif
+
/* If the following is set to TRUE, we do not call os_file_flush in every
os_file_write. We can set this TRUE if the doublewrite buffer is used. */
ibool os_do_not_call_flush_at_each_write = FALSE;
@@ -32,7 +42,7 @@ OS does not provide an atomic pread or pwrite, or similar */
os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
/* In simulated aio, merge at most this many consecutive i/os */
-#define OS_AIO_MERGE_N_CONSECUTIVE 32
+#define OS_AIO_MERGE_N_CONSECUTIVE 64
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
@@ -40,6 +50,8 @@ use simulated aio we build below with threads */
ibool os_aio_use_native_aio = FALSE;
+ibool os_aio_print_debug = FALSE;
+
/* The aio array slot structure */
typedef struct os_aio_slot_struct os_aio_slot_t;
@@ -115,7 +127,12 @@ os_aio_array_t* os_aio_sync_array = NULL;
ulint os_aio_n_segments = ULINT_UNDEFINED;
+/* If the following is TRUE, read i/o handler threads try to
+wait until a batch of new read requests have been posted */
+ibool os_aio_recommend_sleep_for_read_threads = FALSE;
+
ulint os_n_file_reads = 0;
+ulint os_bytes_read_since_printout = 0;
ulint os_n_file_writes = 0;
ulint os_n_fsyncs = 0;
ulint os_n_file_reads_old = 0;
@@ -412,8 +429,8 @@ try_again:
}
if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR | S_IRGRP
- | S_IWGRP | S_IROTH | S_IWOTH);
+ file = open(name, create_flag, S_IRUSR | S_IWUSR
+ | S_IRGRP | S_IWGRP);
} else {
file = open(name, create_flag);
}
@@ -548,8 +565,7 @@ try_again:
}
#endif
if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR | S_IRGRP
- | S_IWGRP | S_IROTH | S_IWOTH);
+ file = open(name, create_flag, os_innodb_umask);
} else {
file = open(name, create_flag);
}
@@ -735,6 +751,8 @@ os_file_flush(
ut_a(file);
+ os_n_fsyncs++;
+
ret = FlushFileBuffers(file);
if (ret) {
@@ -957,6 +975,7 @@ os_file_read(
ut_a((offset & 0xFFFFFFFF) == offset);
os_n_file_reads++;
+ os_bytes_read_since_printout += n;
try_again:
ut_ad(file);
@@ -1626,13 +1645,40 @@ os_aio_simulated_wake_handler_threads(void)
/* We do not use simulated aio: do nothing */
return;
- }
+ }
+
+ os_aio_recommend_sleep_for_read_threads = FALSE;
for (i = 0; i < os_aio_n_segments; i++) {
os_aio_simulated_wake_handler_thread(i);
}
}
+/**************************************************************************
+This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+
+void
+os_aio_simulated_put_read_threads_to_sleep(void)
+/*============================================*/
+{
+ os_aio_array_t* array;
+ ulint g;
+
+ os_aio_recommend_sleep_for_read_threads = TRUE;
+
+ for (g = 0; g < os_aio_n_segments; g++) {
+ os_aio_get_array_and_local_segment(&array, g);
+
+ if (array == os_aio_read_array) {
+
+ os_event_reset(os_aio_segment_wait_events[g]);
+ }
+ }
+}
+
/***********************************************************************
Requests an asynchronous i/o operation. */
@@ -2042,15 +2088,10 @@ os_aio_simulated_handle(
ibool ret;
ulint n;
ulint i;
-
+
segment = os_aio_get_array_and_local_segment(&array, global_segment);
restart:
- /* Give other threads chance to add several i/os to the array
- at once */
-
- os_thread_yield();
-
/* NOTE! We only access constant fields in os_aio_array. Therefore
we do not have to acquire the protecting mutex yet */
@@ -2061,6 +2102,15 @@ restart:
/* Look through n slots after the segment * n'th slot */
+ if (array == os_aio_read_array
+ && os_aio_recommend_sleep_for_read_threads) {
+
+ /* Give other threads chance to add several i/os to the array
+ at once. */
+
+ goto recommended_sleep;
+ }
+
os_mutex_enter(array->mutex);
/* Check if there is a slot for which the i/o has already been
@@ -2071,6 +2121,11 @@ restart:
if (slot->reserved && slot->io_already_done) {
+ if (os_aio_print_debug) {
+ fprintf(stderr,
+"InnoDB: i/o for slot %lu already done, returning\n", i);
+ }
+
ret = TRUE;
goto slot_io_done;
@@ -2177,6 +2232,13 @@ consecutive_loop:
srv_io_thread_op_info[global_segment] = (char*) "doing file i/o";
+ if (os_aio_print_debug) {
+ fprintf(stderr,
+"InnoDB: doing i/o of type %lu at offset %lu %lu, length %lu\n",
+ slot->type, slot->offset_high, slot->offset,
+ total_len);
+ }
+
/* Do the i/o with ordinary, synchronous i/o functions: */
if (slot->type == OS_FILE_WRITE) {
ret = os_file_write(slot->name, slot->file, combined_buf,
@@ -2244,10 +2306,18 @@ wait_for_io:
os_mutex_exit(array->mutex);
- srv_io_thread_op_info[global_segment] = (char*) "waiting for i/o request";
+recommended_sleep:
+ srv_io_thread_op_info[global_segment] =
+ (char*)"waiting for i/o request";
os_event_wait(os_aio_segment_wait_events[global_segment]);
+ if (os_aio_print_debug) {
+ fprintf(stderr,
+"InnoDB: i/o handler thread for i/o segment %lu wakes up\n",
+ global_segment);
+ }
+
goto restart;
}
@@ -2316,6 +2386,7 @@ os_aio_print(void)
ulint n_reserved;
time_t current_time;
double time_elapsed;
+ double avg_bytes_read;
ulint i;
for (i = 0; i < srv_n_file_io_threads; i++) {
@@ -2392,9 +2463,19 @@ loop:
fil_n_pending_log_flushes, fil_n_pending_tablespace_flushes);
printf("%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
os_n_file_reads, os_n_file_writes, os_n_fsyncs);
- printf("%.2f reads/s, %.2f writes/s, %.2f fsyncs/s\n",
+
+ if (os_n_file_reads == os_n_file_reads_old) {
+ avg_bytes_read = 0.0;
+ } else {
+ avg_bytes_read = os_bytes_read_since_printout /
+ (os_n_file_reads - os_n_file_reads_old);
+ }
+
+ printf(
+"%.2f reads/s, %lu avg bytes/read, %.2f writes/s, %.2f fsyncs/s\n",
(os_n_file_reads - os_n_file_reads_old)
/ time_elapsed,
+ (ulint)avg_bytes_read,
(os_n_file_writes - os_n_file_writes_old)
/ time_elapsed,
(os_n_fsyncs - os_n_fsyncs_old)
@@ -2403,6 +2484,7 @@ loop:
os_n_file_reads_old = os_n_file_reads;
os_n_file_writes_old = os_n_file_writes;
os_n_fsyncs_old = os_n_fsyncs;
+ os_bytes_read_since_printout = 0;
os_last_printout = current_time;
}
diff --git a/innobase/page/page0cur.c b/innobase/page/page0cur.c
index 0b233b4dd72..dfe28fd40c4 100644
--- a/innobase/page/page0cur.c
+++ b/innobase/page/page0cur.c
@@ -403,6 +403,8 @@ page_cur_insert_rec_write_log(
byte* log_ptr;
ulint i;
+ ut_a(rec_size < UNIV_PAGE_SIZE);
+
log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN);
if (log_ptr == NULL) {
@@ -491,6 +493,8 @@ page_cur_insert_rec_write_log(
mlog_close(mtr, log_ptr);
+ ut_a(rec_size - i < UNIV_PAGE_SIZE);
+
if (rec_size - i >= MLOG_BUF_MARGIN) {
mlog_catenate_string(mtr, ins_ptr, rec_size - i);
}
@@ -602,6 +606,9 @@ page_cur_parse_insert_rec(
/* Build the inserted record to buf */
+ ut_a(mismatch_index < UNIV_PAGE_SIZE);
+ ut_a(end_seg_len < UNIV_PAGE_SIZE);
+
ut_memcpy(buf, rec_get_start(cursor_rec), mismatch_index);
ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
@@ -937,6 +944,8 @@ page_copy_rec_list_end_to_created_page(
log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
+ ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
+
mach_write_to_4(log_ptr, log_data_len);
rec_set_next_offs(insert_rec, PAGE_SUPREMUM);
diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c
index a75a7279fb5..f3f6776bf13 100644
--- a/innobase/page/page0page.c
+++ b/innobase/page/page0page.c
@@ -17,6 +17,7 @@ Created 2/2/1994 Heikki Tuuri
#include "lock0lock.h"
#include "fut0lst.h"
#include "btr0sea.h"
+#include "buf0buf.h"
/* A cached template page used in page_create */
page_t* page_template = NULL;
@@ -63,6 +64,65 @@ Assuming a page size of 8 kB, a typical index page of a secondary
index contains 300 index entries, and the size of the page directory
is 50 x 4 bytes = 200 bytes. */
+/*******************************************************************
+Looks for the directory slot which owns the given record. */
+
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+ /* out: the directory slot number */
+ rec_t* rec) /* in: the physical record */
+{
+ ulint i;
+ ulint steps = 0;
+ page_t* page;
+ page_dir_slot_t* slot;
+ rec_t* original_rec = rec;
+ char err_buf[1000];
+
+ ut_ad(page_rec_check(rec));
+
+ while (rec_get_n_owned(rec) == 0) {
+ steps++;
+ rec = page_rec_get_next(rec);
+ }
+
+ page = buf_frame_align(rec);
+
+ i = page_dir_get_n_slots(page) - 1;
+ slot = page_dir_get_nth_slot(page, i);
+
+ while (page_dir_slot_get_rec(slot) != rec) {
+
+ if (i == 0) {
+ fprintf(stderr,
+ "InnoDB: Probable data corruption on page %lu\n",
+ buf_frame_get_page_no(page));
+
+ rec_sprintf(err_buf, 900, original_rec);
+
+ fprintf(stderr,
+ "InnoDB: Original record %s\n"
+ "InnoDB: on that page. Steps %lu.\n", err_buf, steps);
+
+ rec_sprintf(err_buf, 900, rec);
+
+ fprintf(stderr,
+ "InnoDB: Cannot find the dir slot for record %s\n"
+ "InnoDB: on that page!\n", err_buf);
+
+ buf_page_print(page);
+
+ ut_a(0);
+ }
+
+ i--;
+ slot = page_dir_get_nth_slot(page, i);
+ }
+
+ return(i);
+}
+
/******************************************************************
Used to check the consistency of a directory slot. */
static
diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c
index e4779b5f26b..31c76705c4b 100644
--- a/innobase/rem/rem0cmp.c
+++ b/innobase/rem/rem0cmp.c
@@ -104,7 +104,9 @@ cmp_types_are_equal(
if ((type1->mtype == DATA_VARCHAR && type2->mtype == DATA_CHAR)
|| (type1->mtype == DATA_CHAR && type2->mtype == DATA_VARCHAR)
|| (type1->mtype == DATA_FIXBINARY && type2->mtype == DATA_BINARY)
- || (type1->mtype == DATA_BINARY && type2->mtype == DATA_FIXBINARY)) {
+ || (type1->mtype == DATA_BINARY && type2->mtype == DATA_FIXBINARY)
+ || (type1->mtype == DATA_MYSQL && type2->mtype == DATA_VARMYSQL)
+ || (type1->mtype == DATA_VARMYSQL && type2->mtype == DATA_MYSQL)) {
return(TRUE);
}
@@ -124,14 +126,9 @@ cmp_types_are_equal(
return(FALSE);
}
- if (type1->mtype == DATA_MYSQL
- || type1->mtype == DATA_VARMYSQL) {
+ if (type1->mtype == DATA_INT && type1->len != type2->len) {
- if ((type1->prtype & ~DATA_NOT_NULL)
- != (type2->prtype & ~DATA_NOT_NULL)) {
-
- return(FALSE);
- }
+ return(FALSE);
}
return(TRUE);
diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c
index b28be55347c..bbec004176b 100644
--- a/innobase/row/row0ins.c
+++ b/innobase/row/row0ins.c
@@ -609,7 +609,7 @@ the caller must have a shared latch on dict_foreign_key_check_lock. */
ulint
row_ins_check_foreign_constraint(
/*=============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ /* out: DB_SUCCESS,
DB_NO_REFERENCED_ROW,
or DB_ROW_IS_REFERENCED */
ibool check_ref,/* in: TRUE if we want to check that
@@ -635,6 +635,7 @@ row_ins_check_foreign_constraint(
ulint i;
mtr_t mtr;
+run_again:
ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED));
if (thr_get_trx(thr)->check_foreigns == FALSE) {
@@ -682,7 +683,7 @@ row_ins_check_foreign_constraint(
if (err != DB_SUCCESS) {
- return(err);
+ goto do_possible_lock_wait;
}
}
@@ -727,6 +728,11 @@ row_ins_check_foreign_constraint(
if (!rec_get_deleted_flag(rec)) {
/* Found a matching record */
+/* printf(
+"FOREIGN: Found matching record from %s %s\n",
+ check_index->table_name, check_index->name);
+ rec_print(rec);
+*/
if (check_ref) {
err = DB_SUCCESS;
@@ -779,6 +785,17 @@ next_rec:
/* Restore old value */
dtuple_set_n_fields_cmp(entry, n_fields_cmp);
+do_possible_lock_wait:
+ if (err == DB_LOCK_WAIT) {
+ thr_get_trx(thr)->error_state = err;
+
+ que_thr_stop_for_mysql(thr);
+
+ row_mysql_handle_errors(&err, thr_get_trx(thr), thr, NULL);
+
+ goto run_again;
+ }
+
return(err);
}
@@ -792,8 +809,7 @@ static
ulint
row_ins_check_foreign_constraints(
/*==============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- code */
+ /* out: DB_SUCCESS or error code */
dict_table_t* table, /* in: table */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry for index */
diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c
index 1dfb1c4ee77..b05476e0a3d 100644
--- a/innobase/row/row0mysql.c
+++ b/innobase/row/row0mysql.c
@@ -934,6 +934,7 @@ row_update_for_mysql(
ut_ad(!prebuilt->sql_stat_start);
que_thr_move_to_run_state_for_mysql(thr, trx);
+
run_again:
thr->run_node = node;
thr->prev_node = node;
@@ -998,7 +999,6 @@ row_update_cascade_for_mysql(
trx_t* trx;
trx = thr_get_trx(thr);
-
run_again:
thr->run_node = node;
thr->prev_node = node;
@@ -1131,6 +1131,35 @@ row_mysql_recover_tmp_table(
}
/*************************************************************************
+Locks the data dictionary exclusively for performing a table create
+operation. */
+
+void
+row_mysql_lock_data_dictionary(void)
+/*================================*/
+{
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks or lock waits can occur then in these operations */
+
+ rw_lock_x_lock(&(dict_foreign_key_check_lock));
+ mutex_enter(&(dict_sys->mutex));
+}
+
+/*************************************************************************
+Unlocks the data dictionary exclusively lock. */
+
+void
+row_mysql_unlock_data_dictionary(void)
+/*==================================*/
+{
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations */
+
+ mutex_exit(&(dict_sys->mutex));
+ rw_lock_x_unlock(&(dict_foreign_key_check_lock));
+}
+
+/*************************************************************************
Does a table creation operation for MySQL. If the name of the created
table ends to characters INNODB_MONITOR, then this also starts
printing of monitor output by the master thread. */
@@ -1150,6 +1179,7 @@ row_create_table_for_mysql(
ulint err;
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+ ut_ad(mutex_own(&(dict_sys->mutex)));
if (srv_created_new_raw || srv_force_recovery) {
fprintf(stderr,
@@ -1263,19 +1293,13 @@ row_create_table_for_mysql(
"to use this feature you must compile InnoDB with\n"
"UNIV_MEM_DEBUG defined in univ.i and the server must be\n"
"quiet because allocation from a mem heap is not protected\n"
- "by any semaphore.\n");
+ "by any semaphore.\n");
ut_a(mem_validate());
printf("Memory validated\n");
}
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- rw_lock_x_lock(&(dict_foreign_key_check_lock));
- mutex_enter(&(dict_sys->mutex));
-
heap = mem_heap_create(512);
trx->dict_operation = TRUE;
@@ -1325,9 +1349,6 @@ row_create_table_for_mysql(
trx->error_state = DB_SUCCESS;
}
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&(dict_foreign_key_check_lock));
-
que_graph_free((que_t*) que_node_get_parent(thr));
trx->op_info = "";
@@ -1354,6 +1375,7 @@ row_create_index_for_mysql(
ulint keywordlen;
ulint err;
+ ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
trx->op_info = "creating index";
@@ -1372,12 +1394,6 @@ row_create_index_for_mysql(
return(DB_SUCCESS);
}
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- rw_lock_x_lock(&(dict_foreign_key_check_lock));
- mutex_enter(&(dict_sys->mutex));
-
heap = mem_heap_create(512);
trx->dict_operation = TRUE;
@@ -1405,9 +1421,6 @@ row_create_index_for_mysql(
trx->error_state = DB_SUCCESS;
}
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&(dict_foreign_key_check_lock));
-
que_graph_free((que_t*) que_node_get_parent(thr));
trx->op_info = "";
@@ -1441,6 +1454,7 @@ row_table_add_foreign_constraints(
ulint keywordlen;
ulint err;
+ ut_ad(mutex_own(&(dict_sys->mutex)));
ut_a(sql_string);
trx->op_info = "adding foreign keys";
@@ -1459,12 +1473,6 @@ row_table_add_foreign_constraints(
return(DB_SUCCESS);
}
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- rw_lock_x_lock(&(dict_foreign_key_check_lock));
- mutex_enter(&(dict_sys->mutex));
-
trx->dict_operation = TRUE;
err = dict_create_foreign_constraints(trx, sql_string, name);
@@ -1486,9 +1494,6 @@ row_table_add_foreign_constraints(
trx->error_state = DB_SUCCESS;
}
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&(dict_foreign_key_check_lock));
-
return((int) err);
}
@@ -1917,6 +1922,13 @@ row_drop_table_for_mysql(
ut_a(0);
} else {
dict_table_remove_from_cache(table);
+
+ if (dict_load_table(name) != NULL) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error: dropping of table %s failed!\n", name);
+
+ }
}
funct_exit:
rw_lock_s_unlock(&(purge_sys->purge_is_running));
diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c
index 390f1b59a4d..5da98943926 100644
--- a/innobase/row/row0purge.c
+++ b/innobase/row/row0purge.c
@@ -511,6 +511,14 @@ row_purge_parse_undo_rec(
clust_index = dict_table_get_first_index(node->table);
+ if (clust_index == NULL) {
+ /* The table was corrupt in the data dictionary */
+
+ rw_lock_x_unlock(&(purge_sys->purge_is_running));
+
+ return(FALSE);
+ }
+
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
node->heap);
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c
index 457cb72aaad..77af2390786 100644
--- a/innobase/row/row0upd.c
+++ b/innobase/row/row0upd.c
@@ -129,8 +129,7 @@ static
ulint
row_upd_check_references_constraints(
/*=================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or an error
- code */
+ /* out: DB_SUCCESS or an error code */
btr_pcur_t* pcur, /* in: cursor positioned on a record; NOTE: the
cursor position is lost in this function! */
dict_table_t* table, /* in: table in question */
@@ -626,7 +625,7 @@ row_upd_index_parse(
/*******************************************************************
Returns TRUE if ext_vec contains i. */
-UNIV_INLINE
+static
ibool
upd_ext_vec_contains(
/*=================*/
@@ -738,6 +737,7 @@ row_upd_build_difference_binary(
ulint n_diff;
ulint roll_ptr_pos;
ulint trx_id_pos;
+ ibool extern_bit;
ulint i;
/* This function is used only for a clustered index */
@@ -763,9 +763,10 @@ row_upd_build_difference_binary(
goto skip_compare;
}
+
+ extern_bit = rec_get_nth_field_extern_bit(rec, i);
- if (rec_get_nth_field_extern_bit(rec, i)
- != upd_ext_vec_contains(ext_vec, n_ext_vec, i)
+ if (extern_bit != upd_ext_vec_contains(ext_vec, n_ext_vec, i)
|| !dfield_data_is_binary_equal(dfield, len, data)) {
upd_field = upd_get_nth_field(update, n_diff);
@@ -1362,7 +1363,7 @@ ulint
row_upd_del_mark_clust_rec(
/*=======================*/
/* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
+ completed, else error code */
upd_node_t* node, /* in: row update node */
dict_index_t* index, /* in: clustered index */
que_thr_t* thr, /* in: query thread */
@@ -1381,8 +1382,6 @@ row_upd_del_mark_clust_rec(
pcur = node->pcur;
btr_cur = btr_pcur_get_btr_cur(pcur);
- ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
-
/* Store row because we have to build also the secondary index
entries */
@@ -1391,11 +1390,11 @@ row_upd_del_mark_clust_rec(
/* Mark the clustered index record deleted; we do not have to check
locks, because we assume that we have an x-lock on the record */
- err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur,
- TRUE, thr, mtr);
+ err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
+ btr_cur, TRUE, thr, mtr);
if (err == DB_SUCCESS && check_ref) {
- /* NOTE that the following call loses
- the position of pcur ! */
+ /* NOTE that the following call loses the position of pcur ! */
+
err = row_upd_check_references_constraints(pcur, index->table,
index, thr, mtr);
if (err != DB_SUCCESS) {
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
index d4aa085479b..19e2b08d5a6 100644
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@@ -639,7 +639,7 @@ srv_release_threads(
slot = srv_table_get_nth_slot(i);
- if ((slot->type == type) && slot->suspended) {
+ if (slot->in_use && slot->type == type && slot->suspended) {
slot->suspended = FALSE;
@@ -1631,6 +1631,7 @@ srv_init(void)
for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_mysql_table + i;
slot->in_use = FALSE;
+ slot->type = 0;
slot->event = os_event_create(NULL);
ut_a(slot->event);
}
@@ -1890,8 +1891,6 @@ srv_conc_exit_innodb(
trx_t* trx) /* in: transaction object associated with the
thread */
{
- srv_conc_slot_t* slot = NULL;
-
if (srv_thread_concurrency >= 500) {
return;
@@ -2200,10 +2199,12 @@ loop:
"FILE I/O\n"
"--------\n");
os_aio_print();
- printf("-------------\n"
- "INSERT BUFFER\n"
- "-------------\n");
+ printf("-------------------------------------\n"
+ "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
+ "-------------------------------------\n");
ibuf_print();
+ printf("Successful hash searches %lu, non-hash searches %lu\n",
+ btr_cur_n_sea, btr_cur_n_non_sea);
printf("---\n"
"LOG\n"
"---\n");
@@ -2498,18 +2499,19 @@ loop:
for (i = 0; i < 10; i++) {
n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
+ buf_pool->n_pages_written;
- srv_main_thread_op_info = "sleeping";
+ srv_main_thread_op_info = (char*)"sleeping";
os_thread_sleep(1000000);
/* ALTER TABLE in MySQL requires on Unix that the table handler
can drop tables lazily after there no longer are SELECT
queries to them. */
- srv_main_thread_op_info = "doing background drop tables";
+ srv_main_thread_op_info =
+ (char*)"doing background drop tables";
row_drop_tables_for_mysql_in_background();
- srv_main_thread_op_info = "";
+ srv_main_thread_op_info = (char*)"";
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
@@ -2520,8 +2522,9 @@ loop:
is issued or the we have specified in my.cnf no flush
at transaction commit */
- srv_main_thread_op_info = "flushing log";
+ srv_main_thread_op_info = (char*)"flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
/* If there were less than 10 i/os during the
one second sleep, we assume that there is free
@@ -2533,11 +2536,14 @@ loop:
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ buf_pool->n_pages_written;
if (n_pend_ios < 3 && (n_ios - n_ios_old < 10)) {
- srv_main_thread_op_info = "doing insert buffer merge";
+ srv_main_thread_op_info =
+ (char*)"doing insert buffer merge";
ibuf_contract_for_n_pages(TRUE, 5);
- srv_main_thread_op_info = "flushing log";
+ srv_main_thread_op_info =
+ (char*)"flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
}
if (srv_fast_shutdown && srv_shutdown_state > 0) {
@@ -2578,16 +2584,18 @@ loop:
srv_main_thread_op_info = "flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
}
/* We run a batch of insert buffer merge every 10 seconds,
even if the server were active */
- srv_main_thread_op_info = "doing insert buffer merge";
+ srv_main_thread_op_info = (char*)"doing insert buffer merge";
ibuf_contract_for_n_pages(TRUE, 5);
- srv_main_thread_op_info = "flushing log";
+ srv_main_thread_op_info = (char*)"flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
/* We run a full purge every 10 seconds, even if the server
were active */
@@ -2603,7 +2611,7 @@ loop:
goto background_loop;
}
- srv_main_thread_op_info = "purging";
+ srv_main_thread_op_info = (char*)"purging";
n_pages_purged = trx_purge();
current_time = time(NULL);
@@ -2612,6 +2620,7 @@ loop:
srv_main_thread_op_info = "flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_flush_to_disk();
last_flush_time = current_time;
}
}
@@ -2620,25 +2629,25 @@ background_loop:
/* In this loop we run background operations when the server
is quiet and we also come here about once in 10 seconds */
- srv_main_thread_op_info = "doing background drop tables";
+ srv_main_thread_op_info = (char*)"doing background drop tables";
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
- srv_main_thread_op_info = "";
+ srv_main_thread_op_info = (char*)"";
- srv_main_thread_op_info = "flushing buffer pool pages";
+ srv_main_thread_op_info = (char*)"flushing buffer pool pages";
/* Flush a few oldest pages to make the checkpoint younger */
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, ut_dulint_max);
- srv_main_thread_op_info = "making checkpoint";
+ srv_main_thread_op_info = (char*)"making checkpoint";
/* Make a new checkpoint about once in 10 seconds */
log_checkpoint(TRUE, FALSE);
- srv_main_thread_op_info = "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2651,11 +2660,11 @@ background_loop:
/* The server has been quiet for a while: start running background
operations */
- srv_main_thread_op_info = "purging";
+ srv_main_thread_op_info = (char*)"purging";
n_pages_purged = trx_purge();
- srv_main_thread_op_info = "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2664,10 +2673,10 @@ background_loop:
}
mutex_exit(&kernel_mutex);
- srv_main_thread_op_info = "doing insert buffer merge";
+ srv_main_thread_op_info = (char*)"doing insert buffer merge";
n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
- srv_main_thread_op_info = "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2676,10 +2685,10 @@ background_loop:
}
mutex_exit(&kernel_mutex);
- srv_main_thread_op_info = "flushing buffer pool pages";
+ srv_main_thread_op_info = (char*)"flushing buffer pool pages";
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
- srv_main_thread_op_info = "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2691,11 +2700,11 @@ background_loop:
srv_main_thread_op_info = "waiting for buffer pool flush to end";
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
- srv_main_thread_op_info = "making checkpoint";
+ srv_main_thread_op_info = (char*)"making checkpoint";
log_checkpoint(TRUE, FALSE);
- srv_main_thread_op_info = "reserving kernel mutex";
+ srv_main_thread_op_info = (char*)"reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2704,7 +2713,8 @@ background_loop:
}
mutex_exit(&kernel_mutex);
- srv_main_thread_op_info = "archiving log (if log archive is on)";
+ srv_main_thread_op_info =
+ (char*)"archiving log (if log archive is on)";
log_archive_do(FALSE, &n_bytes_archived);
@@ -2730,7 +2740,7 @@ background_loop:
master thread to wait for more server activity */
suspend_thread:
- srv_main_thread_op_info = "suspending";
+ srv_main_thread_op_info = (char*)"suspending";
mutex_enter(&kernel_mutex);
@@ -2744,7 +2754,7 @@ suspend_thread:
mutex_exit(&kernel_mutex);
- srv_main_thread_op_info = "waiting for server activity";
+ srv_main_thread_op_info = (char*)"waiting for server activity";
os_event_wait(event);
diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c
index 1fcf8c76a5f..ba0ffbda851 100644
--- a/innobase/srv/srv0start.c
+++ b/innobase/srv/srv0start.c
@@ -932,6 +932,26 @@ innobase_start_or_create_for_mysql(void)
ulint k;
mtr_t mtr;
+#ifdef UNIV_DEBUG
+ fprintf(stderr,
+"InnoDB: !!!!!!!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!!!!!!!\n");
+#endif
+
+#ifdef UNIV_SYNC_DEBUG
+ fprintf(stderr,
+"InnoDB: !!!!!!!!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!!!!!!!\n");
+#endif
+
+#ifdef UNIV_SEARCH_DEBUG
+ fprintf(stderr,
+"InnoDB: !!!!!!!!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!!!!!!!\n");
+#endif
+
+#ifdef UNIV_MEM_DEBUG
+ fprintf(stderr,
+"InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n");
+#endif
+
log_do_write = TRUE;
/* yydebug = TRUE; */
@@ -999,7 +1019,7 @@ innobase_start_or_create_for_mysql(void)
os_aio_use_native_aio = FALSE;
if (!os_aio_use_native_aio) {
- os_aio_init(4 * SRV_N_PENDING_IOS_PER_THREAD
+ os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
* srv_n_file_io_threads,
srv_n_file_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS);
diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c
index 47fffea5e40..0a951484b59 100644
--- a/innobase/trx/trx0roll.c
+++ b/innobase/trx/trx0roll.c
@@ -160,11 +160,13 @@ trx_rollback_last_sql_stat_for_mysql(
}
/***********************************************************************
-Rollback uncommitted transactions which have no user session. */
+Rollback or clean up transactions which have no user session. If the
+transaction already was committed, then we clean up a possible insert
+undo log. If the transaction was not yet committed, then we roll it back. */
void
-trx_rollback_all_without_sess(void)
-/*===============================*/
+trx_rollback_or_clean_all_without_sess(void)
+/*========================================*/
{
mem_heap_t* heap;
que_fork_t* fork;
@@ -217,6 +219,19 @@ loop:
trx->sess = trx_dummy_sess;
+ if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
+
+ fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n",
+ ut_dulint_get_high(trx->id),
+ ut_dulint_get_low(trx->id));
+
+ trx_cleanup_at_db_startup(trx);
+
+ mem_heap_free(heap);
+
+ goto loop;
+ }
+
fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
fork->trx = trx;
@@ -264,9 +279,17 @@ loop:
/* If the transaction was for a dictionary operation, we
drop the relevant table, if it still exists */
+ fprintf(stderr,
+"InnoDB: Dropping table with id %lu %lu in recovery if it exists\n",
+ ut_dulint_get_high(trx->table_id),
+ ut_dulint_get_low(trx->table_id));
+
table = dict_table_get_on_id_low(trx->table_id, trx);
if (table) {
+ fprintf(stderr,
+"InnoDB: Table found: dropping table %s in recovery\n", table->name);
+
err = row_drop_table_for_mysql(table->name, trx,
TRUE);
ut_a(err == (int) DB_SUCCESS);
diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c
index 32a1db48488..675cdf1b7e4 100644
--- a/innobase/trx/trx0sys.c
+++ b/innobase/trx/trx0sys.c
@@ -26,6 +26,14 @@ Created 3/26/1996 Heikki Tuuri
trx_sys_t* trx_sys = NULL;
trx_doublewrite_t* trx_doublewrite = NULL;
+/* In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. We have successfully got the updates to InnoDB
+up to this position. If .._pos is -1, it means no crash recovery was needed,
+or there was no master log position info inside InnoDB. */
+
+char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
+ib_longlong trx_sys_mysql_master_log_pos = -1;
+
/********************************************************************
Determines if a page number is located inside the doublewrite buffer. */
@@ -427,75 +435,62 @@ trx_sys_flush_max_trx_id(void)
/*********************************************************************
Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. */
+which corresponds to the transaction just being committed. In a MySQL
+replication slave updates the latest master binlog position up to which
+replication has proceeded. */
void
trx_sys_update_mysql_binlog_offset(
/*===============================*/
- trx_t* trx, /* in: transaction being committed */
- mtr_t* mtr) /* in: mtr */
+ char* file_name,/* in: MySQL log file name */
+ ib_longlong offset, /* in: position in that log file */
+ ulint field, /* in: offset of the MySQL log info field in
+ the trx sys header */
+ mtr_t* mtr) /* in: mtr */
{
trx_sysf_t* sys_header;
- char namebuf[TRX_SYS_MYSQL_LOG_NAME_LEN];
-
- ut_ad(trx->mysql_log_file_name);
- memset(namebuf, ' ', TRX_SYS_MYSQL_LOG_NAME_LEN - 1);
- namebuf[TRX_SYS_MYSQL_LOG_NAME_LEN - 1] = '\0';
+ if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
- /* Copy the whole MySQL log file name to the buffer, or only the
- last characters, if it does not fit */
+ /* We cannot fit the name to the 512 bytes we have reserved */
- if (ut_strlen(trx->mysql_log_file_name)
- > TRX_SYS_MYSQL_LOG_NAME_LEN - 1) {
- ut_memcpy(namebuf, trx->mysql_log_file_name
- + ut_strlen(trx->mysql_log_file_name)
- - (TRX_SYS_MYSQL_LOG_NAME_LEN - 1),
- TRX_SYS_MYSQL_LOG_NAME_LEN - 1);
- } else {
- ut_memcpy(namebuf, trx->mysql_log_file_name,
- 1 + ut_strlen(trx->mysql_log_file_name));
+ return;
}
- namebuf[TRX_SYS_MYSQL_LOG_NAME_LEN - 1] = '\0';
-
sys_header = trx_sysf_get(mtr);
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ if (mach_read_from_4(sys_header + field
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
- mlog_write_ulint(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ mlog_write_ulint(sys_header + field
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
TRX_SYS_MYSQL_LOG_MAGIC_N,
MLOG_4BYTES, mtr);
}
- if (0 != ut_memcmp(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME,
- namebuf, TRX_SYS_MYSQL_LOG_NAME_LEN)) {
+ if (0 != ut_memcmp(sys_header + field + TRX_SYS_MYSQL_LOG_NAME,
+ file_name, 1 + ut_strlen(file_name))) {
- mlog_write_string(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ mlog_write_string(sys_header + field
+ TRX_SYS_MYSQL_LOG_NAME,
- namebuf, TRX_SYS_MYSQL_LOG_NAME_LEN, mtr);
+ file_name, 1 + ut_strlen(file_name), mtr);
}
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ if (mach_read_from_4(sys_header + field
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
- || (trx->mysql_log_offset >> 32) > 0) {
+ || (offset >> 32) > 0) {
- mlog_write_ulint(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ mlog_write_ulint(sys_header + field
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
- (ulint)(trx->mysql_log_offset >> 32),
+ (ulint)(offset >> 32),
MLOG_4BYTES, mtr);
}
- mlog_write_ulint(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ mlog_write_ulint(sys_header + field
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW,
- (ulint)(trx->mysql_log_offset & 0xFFFFFFFF),
+ (ulint)(offset & 0xFFFFFFFF),
MLOG_4BYTES, mtr);
-
- trx->mysql_log_file_name = NULL;
}
/*********************************************************************
@@ -533,6 +528,58 @@ trx_sys_print_mysql_binlog_offset(void)
mtr_commit(&mtr);
}
+/*********************************************************************
+Prints to stderr the MySQL master log offset info in the trx system header if
+the magic number shows it valid. */
+
+void
+trx_sys_print_mysql_master_log_pos(void)
+/*====================================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+ != TRX_SYS_MYSQL_LOG_MAGIC_N) {
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ fprintf(stderr,
+"InnoDB: In a MySQL replication slave the last master binlog file\n"
+"InnoDB: position %lu %lu, file name %s\n",
+ mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
+ mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME);
+ /* Copy the master log position info to global variables we can
+ use in ha_innobase.cc to initialize glob_mi to right values */
+
+ ut_memcpy(trx_sys_mysql_master_log_name,
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME,
+ TRX_SYS_MYSQL_LOG_NAME_LEN);
+
+ trx_sys_mysql_master_log_pos =
+ (((ib_longlong)mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH))
+ << 32)
+ + (ib_longlong)
+ mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
+ mtr_commit(&mtr);
+}
+
/********************************************************************
Looks for a free slot for a rollback segment in the trx system file copy. */
@@ -660,7 +707,7 @@ trx_sys_init_at_db_start(void)
if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
fprintf(stderr,
- "InnoDB: %lu uncommitted transaction(s) which must be rolled back\n",
+ "InnoDB: %lu transaction(s) which must be rolled back or cleaned up\n",
UT_LIST_GET_LEN(trx_sys->trx_list));
fprintf(stderr, "InnoDB: Trx id counter is %lu %lu\n",
diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
index ae607c33fbe..9c49abbd287 100644
--- a/innobase/trx/trx0trx.c
+++ b/innobase/trx/trx0trx.c
@@ -83,6 +83,8 @@ trx_create(
trx->mysql_log_file_name = NULL;
trx->mysql_log_offset = 0;
+ trx->mysql_master_log_file_name = "";
+ trx->mysql_master_log_pos = 0;
trx->ignore_duplicates_in_insert = FALSE;
@@ -363,16 +365,31 @@ trx_lists_init_at_db_start(void)
trx = trx_create(NULL);
+ trx->id = undo->trx_id;
+
+ trx->insert_undo = undo;
+ trx->rseg = rseg;
+
if (undo->state != TRX_UNDO_ACTIVE) {
trx->conc_state = TRX_COMMITTED_IN_MEMORY;
+
+ /* We give a dummy value for the trx no;
+ this should have no relevance since purge
+ is not interested in committed transaction
+ numbers, unless they are in the history
+ list, in which case it looks the number
+ from the disk based undo log structure */
+
+ trx->no = trx->id;
} else {
trx->conc_state = TRX_ACTIVE;
- }
- trx->id = undo->trx_id;
- trx->insert_undo = undo;
- trx->rseg = rseg;
+ /* A running transaction always has the number
+ field inited to ut_dulint_max */
+
+ trx->no = ut_dulint_max;
+ }
if (undo->dict_operation) {
trx->dict_operation = undo->dict_operation;
@@ -397,14 +414,25 @@ trx_lists_init_at_db_start(void)
if (NULL == trx) {
trx = trx_create(NULL);
+ trx->id = undo->trx_id;
+
if (undo->state != TRX_UNDO_ACTIVE) {
trx->conc_state =
TRX_COMMITTED_IN_MEMORY;
+ /* We give a dummy value for the trx
+ number */
+
+ trx->no = trx->id;
} else {
trx->conc_state = TRX_ACTIVE;
+
+ /* A running transaction always has
+ the number field inited to
+ ut_dulint_max */
+
+ trx->no = ut_dulint_max;
}
- trx->id = undo->trx_id;
trx->rseg = rseg;
trx_list_insert_ordered(trx);
@@ -583,7 +611,7 @@ trx_commit_off_kernel(
if (undo) {
mutex_enter(&kernel_mutex);
#ifdef notdefined
- /* ########## There is a bug here: purge and rollback
+ /* !!!!!!!!! There is a bug here: purge and rollback
need the whole stack of old record versions even if no
consistent read would need them!! This is because they
decide on the basis of the old versions when we can
@@ -627,12 +655,25 @@ trx_commit_off_kernel(
mutex_exit(&(rseg->mutex));
/* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on */
+ in trx sys header if MySQL binlogging is on or the database
+ server is a MySQL replication slave */
if (trx->mysql_log_file_name) {
- trx_sys_update_mysql_binlog_offset(trx, &mtr);
+ trx_sys_update_mysql_binlog_offset(
+ trx->mysql_log_file_name,
+ trx->mysql_log_offset,
+ TRX_SYS_MYSQL_LOG_INFO, &mtr);
+ trx->mysql_log_file_name = NULL;
}
-
+
+ if (trx->mysql_master_log_file_name[0] != '\0') {
+ /* This database server is a MySQL replication slave */
+ trx_sys_update_mysql_binlog_offset(
+ trx->mysql_master_log_file_name,
+ trx->mysql_master_log_pos,
+ TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr);
+ }
+
/* If we did not take the shortcut, the following call
commits the mini-transaction, making the whole transaction
committed in the file-based world at this log sequence number;
@@ -707,12 +748,12 @@ trx_commit_off_kernel(
/*-------------------------------------*/
- /* Most MySQL users run with srv_flush.. set to FALSE: */
+ /* Most MySQL users run with srv_flush_.. set to FALSE: */
if (srv_flush_log_at_trx_commit) {
log_flush_up_to(lsn, LOG_WAIT_ONE_GROUP);
- }
+ }
/*-------------------------------------*/
@@ -730,6 +771,29 @@ trx_commit_off_kernel(
UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
}
+/********************************************************************
+Cleans up a transaction at database startup. The cleanup is needed if
+the transaction already got to the middle of a commit when the database
+crashed, andf we cannot roll it back. */
+
+void
+trx_cleanup_at_db_startup(
+/*======================*/
+ trx_t* trx) /* in: transaction */
+{
+ if (trx->insert_undo != NULL) {
+
+ trx_undo_insert_cleanup(trx);
+ }
+
+ trx->conc_state = TRX_NOT_STARTED;
+ trx->rseg = NULL;
+ trx->undo_no = ut_dulint_zero;
+ trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
+
+ UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
+}
+
/************************************************************************
Assigns a read view for a consistent read query. All the consistent reads
within the same transaction will get the same read view, which is created
diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c
index b5341871228..dd249a7062a 100644
--- a/innobase/trx/trx0undo.c
+++ b/innobase/trx/trx0undo.c
@@ -1147,7 +1147,7 @@ trx_undo_mem_create_at_db_start(
/* If the log segment is being freed, the page list is inconsistent! */
if (state == TRX_UNDO_TO_FREE) {
- return(undo);
+ goto add_to_list;
}
last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr);
@@ -1166,7 +1166,7 @@ trx_undo_mem_create_at_db_start(
undo->top_offset = rec - last_page;
undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
}
-
+add_to_list:
if (type == TRX_UNDO_INSERT) {
if (state != TRX_UNDO_CACHED) {
UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list,
diff --git a/innobase/ut/ut0mem.c b/innobase/ut/ut0mem.c
index a1320e8b5bc..2a7643551ad 100644
--- a/innobase/ut/ut0mem.c
+++ b/innobase/ut/ut0mem.c
@@ -38,6 +38,8 @@ os_fast_mutex_t ut_list_mutex; /* this protects the list */
ibool ut_mem_block_list_inited = FALSE;
+ulint* ut_mem_null_ptr = NULL;
+
/**************************************************************************
Initializes the mem block list at database startup. */
static
@@ -83,12 +85,16 @@ ut_malloc_low(
"InnoDB: Check if you should increase the swap file or\n"
"InnoDB: ulimits of your operating system.\n"
"InnoDB: On FreeBSD check you have compiled the OS with\n"
- "InnoDB: a big enough maximum process size.\n",
+ "InnoDB: a big enough maximum process size.\n"
+ "InnoDB: We now intentionally generate a seg fault so that\n"
+ "InnoDB: on Linux we get a stack trace.\n",
n, ut_total_allocated_memory, errno);
os_fast_mutex_unlock(&ut_list_mutex);
- exit(1);
+ /* Make an intentional seg fault so that we get a stack
+ trace */
+ printf("%lu\n", *ut_mem_null_ptr);
}
if (set_to_zero) {
diff --git a/innobase/ut/ut0ut.c b/innobase/ut/ut0ut.c
index 7ee32b9a8e2..6a5f273e731 100644
--- a/innobase/ut/ut0ut.c
+++ b/innobase/ut/ut0ut.c
@@ -111,6 +111,49 @@ ut_print_timestamp(
}
/**************************************************************
+Sprintfs a timestamp to a buffer. */
+
+void
+ut_sprintf_timestamp(
+/*=================*/
+ char* buf) /* in: buffer where to sprintf */
+{
+#ifdef __WIN__
+ SYSTEMTIME cal_tm;
+
+ GetLocalTime(&cal_tm);
+
+ sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
+ (int)cal_tm.wYear % 100,
+ (int)cal_tm.wMonth,
+ (int)cal_tm.wDay,
+ (int)cal_tm.wHour,
+ (int)cal_tm.wMinute,
+ (int)cal_tm.wSecond);
+#else
+ struct tm cal_tm;
+ struct tm* cal_tm_ptr;
+ time_t tm;
+
+ time(&tm);
+
+#ifdef HAVE_LOCALTIME_R
+ localtime_r(&tm, &cal_tm);
+ cal_tm_ptr = &cal_tm;
+#else
+ cal_tm_ptr = localtime(&tm);
+#endif
+ sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
+ cal_tm_ptr->tm_year % 100,
+ cal_tm_ptr->tm_mon + 1,
+ cal_tm_ptr->tm_mday,
+ cal_tm_ptr->tm_hour,
+ cal_tm_ptr->tm_min,
+ cal_tm_ptr->tm_sec);
+#endif
+}
+
+/**************************************************************
Returns current year, month, day. */
void
@@ -258,3 +301,26 @@ ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high)
UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high,
ut_ulint_cmp);
}
+
+/*****************************************************************
+Calculates fast the number rounded up to the nearest power of 2. */
+
+ulint
+ut_2_power_up(
+/*==========*/
+ /* out: first power of 2 which is >= n */
+ ulint n) /* in: number != 0 */
+{
+ ulint res;
+
+ res = 1;
+
+ ut_ad(n > 0);
+
+ while (res < n) {
+ res = res * 2;
+ }
+
+ return(res);
+}
+