summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--storage/archive/ha_archive.cc7
-rw-r--r--storage/heap/ha_heap.cc3
-rw-r--r--storage/heap/ha_heap.h3
-rw-r--r--storage/innobase/btr/btr0btr.c52
-rw-r--r--storage/innobase/btr/btr0cur.c90
-rw-r--r--storage/innobase/buf/buf0buddy.c409
-rw-r--r--storage/innobase/buf/buf0buf.c498
-rw-r--r--storage/innobase/buf/buf0flu.c4
-rw-r--r--storage/innobase/buf/buf0lru.c217
-rw-r--r--storage/innobase/handler/ha_innodb.cc1
-rw-r--r--storage/innobase/include/btr0btr.h15
-rw-r--r--storage/innobase/include/btr0cur.h26
-rw-r--r--storage/innobase/include/btr0cur.ic4
-rw-r--r--storage/innobase/include/buf0buddy.h46
-rw-r--r--storage/innobase/include/buf0buddy.ic68
-rw-r--r--storage/innobase/include/buf0buf.h54
-rw-r--r--storage/innobase/include/buf0buf.ic46
-rw-r--r--storage/innobase/include/buf0lru.h23
-rw-r--r--storage/innobase/include/buf0types.h17
-rw-r--r--storage/innobase/include/db0err.h2
-rw-r--r--storage/innobase/include/page0cur.ic5
-rw-r--r--storage/innobase/include/page0page.h39
-rw-r--r--storage/innobase/include/page0page.ic32
-rw-r--r--storage/innobase/include/row0row.h28
-rw-r--r--storage/innobase/include/row0row.ic25
-rw-r--r--storage/innobase/include/row0upd.ic4
-rw-r--r--storage/innobase/include/sync0rw.ic10
-rw-r--r--storage/innobase/include/sync0sync.h6
-rw-r--r--storage/innobase/include/trx0sys.h13
-rw-r--r--storage/innobase/include/trx0sys.ic24
-rw-r--r--storage/innobase/include/univ.i3
-rw-r--r--storage/innobase/page/page0cur.c17
-rw-r--r--storage/innobase/page/page0page.c52
-rw-r--r--storage/innobase/page/page0zip.c186
-rw-r--r--storage/innobase/rem/rem0rec.c10
-rw-r--r--storage/innobase/row/row0ins.c48
-rw-r--r--storage/innobase/row/row0mysql.c37
-rw-r--r--storage/innobase/row/row0row.c63
-rw-r--r--storage/innobase/row/row0upd.c35
-rw-r--r--storage/innobase/sync/sync0rw.c4
-rw-r--r--storage/innobase/sync/sync0sync.c12
-rw-r--r--storage/innobase/ut/ut0ut.c2
-rw-r--r--storage/myisam/ha_myisam.cc3
-rw-r--r--storage/myisam/ha_myisam.h3
-rw-r--r--storage/myisam/mi_check.c3
-rw-r--r--storage/myisam/mi_create.c3
-rw-r--r--storage/myisam/mi_range.c3
-rw-r--r--storage/myisam/mi_test1.c3
-rw-r--r--storage/myisam/mi_update.c3
-rw-r--r--storage/myisam/mi_write.c3
-rw-r--r--storage/myisam/myisamdef.h3
-rw-r--r--storage/myisammrg/ha_myisammrg.h3
-rw-r--r--storage/ndb/src/kernel/blocks/lgman.cpp7
53 files changed, 945 insertions, 1332 deletions
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index 4f2294b69f6..aec13a5fe6c 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@@ -12,8 +13,8 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- 02110-1301 USA */
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
#ifdef USE_PRAGMA_IMPLEMENTATION
#pragma implementation // gcc: Class implementation
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index 7378416dbe3..218aa35700f 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h
index 3577aeb08f4..f48235ba737 100644
--- a/storage/heap/ha_heap.h
+++ b/storage/heap/ha_heap.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
index fb2509a62ff..3d8fadeaf92 100644
--- a/storage/innobase/btr/btr0btr.c
+++ b/storage/innobase/btr/btr0btr.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -2275,7 +2275,7 @@ btr_attach_half_pages(
/*==================*/
dict_index_t* index, /*!< in: the index tree */
buf_block_t* block, /*!< in/out: page to be split */
- rec_t* split_rec, /*!< in: first record on upper
+ const rec_t* split_rec, /*!< in: first record on upper
half page */
buf_block_t* new_block, /*!< in/out: the new half page */
ulint direction, /*!< in: FSP_UP or FSP_DOWN */
@@ -2967,15 +2967,16 @@ btr_node_ptr_delete(
ut_a(err == DB_SUCCESS);
if (!compressed) {
- btr_cur_compress_if_useful(&cursor, mtr);
+ btr_cur_compress_if_useful(&cursor, FALSE, mtr);
}
}
/*************************************************************//**
If page is the only on its level, this function moves its records to the
-father page, thus reducing the tree height. */
+father page, thus reducing the tree height.
+@return father block */
static
-void
+buf_block_t*
btr_lift_page_up(
/*=============*/
dict_index_t* index, /*!< in: index tree */
@@ -3092,6 +3093,8 @@ btr_lift_page_up(
}
ut_ad(page_validate(father_page, index));
ut_ad(btr_check_node_ptr(index, father_block, mtr));
+
+ return(father_block);
}
/*************************************************************//**
@@ -3108,11 +3111,13 @@ UNIV_INTERN
ibool
btr_compress(
/*=========*/
- btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift;
- the page must not be empty: in record delete
- use btr_discard_page if the page would become
- empty */
- mtr_t* mtr) /*!< in: mtr */
+ btr_cur_t* cursor, /*!< in/out: cursor on the page to merge
+ or lift; the page must not be empty:
+ when deleting records, use btr_discard_page()
+ if the page would become empty */
+ ibool adjust, /*!< in: TRUE if should adjust the
+ cursor position even if compression occurs */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
dict_index_t* index;
ulint space;
@@ -3130,12 +3135,14 @@ btr_compress(
ulint* offsets;
ulint data_size;
ulint n_recs;
+ ulint nth_rec = 0; /* remove bogus warning */
ulint max_ins_size;
ulint max_ins_size_reorg;
block = btr_cur_get_block(cursor);
page = btr_cur_get_page(cursor);
index = btr_cur_get_index(cursor);
+
ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
@@ -3156,6 +3163,10 @@ btr_compress(
offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
&father_cursor);
+ if (adjust) {
+ nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
+ }
+
/* Decide the page to which we try to merge and which will inherit
the locks */
@@ -3182,9 +3193,9 @@ btr_compress(
} else {
/* The page is the only one on the level, lift the records
to the father */
- btr_lift_page_up(index, block, mtr);
- mem_heap_free(heap);
- return(TRUE);
+
+ merge_block = btr_lift_page_up(index, block, mtr);
+ goto func_exit;
}
n_recs = page_get_n_recs(page);
@@ -3266,6 +3277,10 @@ err_exit:
btr_node_ptr_delete(index, block, mtr);
lock_update_merge_left(merge_block, orig_pred, block);
+
+ if (adjust) {
+ nth_rec += page_rec_get_n_recs_before(orig_pred);
+ }
} else {
rec_t* orig_succ;
#ifdef UNIV_BTR_DEBUG
@@ -3330,7 +3345,6 @@ err_exit:
}
btr_blob_dbg_remove(page, index, "btr_compress");
- mem_heap_free(heap);
if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
/* Update the free bits of the B-tree page in the
@@ -3382,6 +3396,16 @@ err_exit:
btr_page_free(index, block, mtr);
ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+func_exit:
+ mem_heap_free(heap);
+
+ if (adjust) {
+ btr_cur_position(
+ index,
+ page_rec_get_nth(merge_block->frame, nth_rec),
+ merge_block, cursor);
+ }
+
return(TRUE);
}
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index b93dc1b9238..e1060af525c 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -1972,7 +1972,6 @@ btr_cur_optimistic_update(
ulint old_rec_size;
dtuple_t* new_entry;
roll_ptr_t roll_ptr;
- trx_t* trx;
mem_heap_t* heap;
ulint i;
ulint n_ext;
@@ -1990,7 +1989,8 @@ btr_cur_optimistic_update(
heap = mem_heap_create(1024);
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(rec, offsets));
+ ut_a(!rec_offs_any_null_extern(rec, offsets)
+ || trx_is_recv(thr_get_trx(thr)));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
#ifdef UNIV_DEBUG
@@ -2114,13 +2114,11 @@ any_extern:
page_cur_move_to_prev(page_cursor);
- trx = thr_get_trx(thr);
-
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
roll_ptr);
row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx->id);
+ thr_get_trx(thr)->id);
}
/* There are no externally stored columns in new_entry */
@@ -2206,7 +2204,9 @@ btr_cur_pessimistic_update(
/*=======================*/
ulint flags, /*!< in: undo logging, locking, and rollback
flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update */
+ btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
+ cursor may become invalid if *big_rec == NULL
+ || !(flags & BTR_KEEP_POS_FLAG) */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
@@ -2345,7 +2345,7 @@ btr_cur_pessimistic_update(
record to be inserted: we have to remember which fields were such */
ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
+ ut_ad(rec_offs_validate(rec, index, offsets));
n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
if (UNIV_LIKELY_NULL(page_zip)) {
@@ -2368,6 +2368,10 @@ make_external:
err = DB_TOO_BIG_RECORD;
goto return_after_reservations;
}
+
+ ut_ad(page_is_leaf(page));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(flags & BTR_KEEP_POS_FLAG);
}
/* Store state of explicit locks on rec on the page infimum record,
@@ -2395,6 +2399,8 @@ make_external:
rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
if (rec) {
+ page_cursor->rec = rec;
+
lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
rec, block);
@@ -2408,7 +2414,10 @@ make_external:
rec, index, offsets, mtr);
}
- btr_cur_compress_if_useful(cursor, mtr);
+ btr_cur_compress_if_useful(
+ cursor,
+ big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG),
+ mtr);
if (page_zip && !dict_index_is_clust(index)
&& page_is_leaf(page)) {
@@ -2428,6 +2437,21 @@ make_external:
}
}
+ if (big_rec_vec) {
+ ut_ad(page_is_leaf(page));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(flags & BTR_KEEP_POS_FLAG);
+
+ /* btr_page_split_and_insert() in
+ btr_cur_pessimistic_insert() invokes
+ mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK).
+ We must keep the index->lock when we created a
+ big_rec, so that row_upd_clust_rec() can store the
+ big_rec in the same mini-transaction. */
+
+ mtr_x_lock(dict_index_get_lock(index), mtr);
+ }
+
/* Was the record to be updated positioned as the first user
record on its page? */
was_first = page_cur_is_before_first(page_cursor);
@@ -2443,6 +2467,7 @@ make_external:
ut_a(rec);
ut_a(err == DB_SUCCESS);
ut_a(dummy_big_rec == NULL);
+ page_cursor->rec = rec;
if (dict_index_is_sec_or_ibuf(index)) {
/* Update PAGE_MAX_TRX_ID in the index page header.
@@ -2501,6 +2526,39 @@ return_after_reservations:
return(err);
}
+/**************************************************************//**
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+UNIV_INTERN
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+ btr_cur_t* cursor, /*!< in: cursor */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ buf_block_t* block;
+
+ block = btr_cur_get_block(cursor);
+
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ /* Keep the locks across the mtr_commit(mtr). */
+ rw_lock_x_lock(dict_index_get_lock(cursor->index));
+ rw_lock_x_lock(&block->lock);
+ mutex_enter(&block->mutex);
+ buf_block_buf_fix_inc(block, __FILE__, __LINE__);
+ mutex_exit(&block->mutex);
+ /* Write out the redo log. */
+ mtr_commit(mtr);
+ mtr_start(mtr);
+ /* Reassociate the locks with the mini-transaction.
+ They will be released on mtr_commit(mtr). */
+ mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK);
+ mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
+}
+
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
/****************************************************************//**
@@ -2881,10 +2939,12 @@ UNIV_INTERN
ibool
btr_cur_compress_if_useful(
/*=======================*/
- btr_cur_t* cursor, /*!< in: cursor on the page to compress;
- cursor does not stay valid if compression
- occurs */
- mtr_t* mtr) /*!< in: mtr */
+ btr_cur_t* cursor, /*!< in/out: cursor on the page to compress;
+ cursor does not stay valid if !adjust and
+ compression occurs */
+ ibool adjust, /*!< in: TRUE if should adjust the
+ cursor position even if compression occurs */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
ut_ad(mtr_memo_contains(mtr,
dict_index_get_lock(btr_cur_get_index(cursor)),
@@ -2893,7 +2953,7 @@ btr_cur_compress_if_useful(
MTR_MEMO_PAGE_X_FIX));
return(btr_cur_compress_recommendation(cursor, mtr)
- && btr_compress(cursor, mtr));
+ && btr_compress(cursor, adjust, mtr));
}
/*******************************************************//**
@@ -3135,7 +3195,7 @@ return_after_reservations:
mem_heap_free(heap);
if (ret == FALSE) {
- ret = btr_cur_compress_if_useful(cursor, mtr);
+ ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
}
if (n_extents > 0) {
@@ -4095,7 +4155,7 @@ btr_blob_free(
&& buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) {
- if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED
+ if (!buf_LRU_free_block(&block->page, all)
&& all && block->page.zip.data) {
/* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */
diff --git a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
index 75ba832c7e5..b11bf02c747 100644
--- a/storage/innobase/buf/buf0buddy.c
+++ b/storage/innobase/buf/buf0buddy.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -56,6 +56,14 @@ buf_buddy_get(
}
}
+/** Validate a given zip_free list. */
+#define BUF_BUDDY_LIST_VALIDATE(b, i) \
+ UT_LIST_VALIDATE(list, buf_page_t, \
+ b->zip_free[i], \
+ ut_ad(buf_page_get_state( \
+ ut_list_node_313) \
+ == BUF_BLOCK_ZIP_FREE))
+
/**********************************************************************//**
Add a block to the head of the appropriate buddy free list. */
UNIV_INLINE
@@ -67,21 +75,10 @@ buf_buddy_add_to_free(
ulint i) /*!< in: index of
buf_pool->zip_free[] */
{
-#ifdef UNIV_DEBUG_VALGRIND
- buf_page_t* b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
-
- if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
-#endif /* UNIV_DEBUG_VALGRIND */
-
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
ut_ad(buf_pool->zip_free[i].start != bpage);
UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
-
-#ifdef UNIV_DEBUG_VALGRIND
- if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
- UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
-#endif /* UNIV_DEBUG_VALGRIND */
}
/**********************************************************************//**
@@ -95,25 +92,17 @@ buf_buddy_remove_from_free(
ulint i) /*!< in: index of
buf_pool->zip_free[] */
{
-#ifdef UNIV_DEBUG_VALGRIND
+#ifdef UNIV_DEBUG
buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
- if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
- if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
-
ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
-#endif /* UNIV_DEBUG_VALGRIND */
+#endif /* UNIV_DEBUG */
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
-
-#ifdef UNIV_DEBUG_VALGRIND
- if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
- if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
-#endif /* UNIV_DEBUG_VALGRIND */
}
/**********************************************************************//**
@@ -130,17 +119,13 @@ buf_buddy_alloc_zip(
ut_ad(buf_pool_mutex_own(buf_pool));
ut_a(i < BUF_BUDDY_SIZES);
+ ut_a(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
+
+ ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
-#ifndef UNIV_DEBUG_VALGRIND
- /* Valgrind would complain about accessing free memory. */
- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
- ut_ad(buf_page_get_state(ut_list_node_313)
- == BUF_BLOCK_ZIP_FREE)));
-#endif /* !UNIV_DEBUG_VALGRIND */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
- UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
buf_buddy_remove_from_free(buf_pool, bpage, i);
@@ -159,13 +144,10 @@ buf_buddy_alloc_zip(
}
}
-#ifdef UNIV_DEBUG
if (bpage) {
- memset(bpage, ~i, BUF_BUDDY_LOW << i);
+ ut_d(memset(bpage, ~i, BUF_BUDDY_LOW << i));
+ UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
}
-#endif /* UNIV_DEBUG */
-
- UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
return(bpage);
}
@@ -253,6 +235,7 @@ buf_buddy_alloc_from(
{
ulint offs = BUF_BUDDY_LOW << j;
ut_ad(j <= BUF_BUDDY_SIZES);
+ ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_ad(j >= i);
ut_ad(!ut_align_offset(buf, offs));
@@ -266,13 +249,7 @@ buf_buddy_alloc_from(
bpage = (buf_page_t*) ((byte*) buf + offs);
ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
bpage->state = BUF_BLOCK_ZIP_FREE;
-#ifndef UNIV_DEBUG_VALGRIND
- /* Valgrind would complain about accessing free memory. */
- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
- ut_ad(buf_page_get_state(
- ut_list_node_313)
- == BUF_BLOCK_ZIP_FREE)));
-#endif /* !UNIV_DEBUG_VALGRIND */
+ ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
buf_buddy_add_to_free(buf_pool, bpage, j);
}
@@ -282,26 +259,27 @@ buf_buddy_alloc_from(
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
-The buf_pool->mutex may only be released and reacquired if lru != NULL.
-@return allocated block, possibly NULL if lru==NULL */
+The buf_pool_mutex may be released and reacquired.
+@return allocated block, never NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
/*================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
ulint i, /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that
will be assigned TRUE if storage was
allocated from the LRU list and
buf_pool->mutex was temporarily
- released, or NULL if the LRU list
- should not be used */
+ released */
{
buf_block_t* block;
+ ut_ad(lru);
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
if (i < BUF_BUDDY_SIZES) {
/* Try to allocate from the buddy system. */
@@ -320,11 +298,6 @@ buf_buddy_alloc_low(
goto alloc_big;
}
- if (!lru) {
-
- return(NULL);
- }
-
/* Try replacing an uncompressed page in the buffer pool. */
buf_pool_mutex_exit(buf_pool);
block = buf_LRU_get_free_block(buf_pool);
@@ -343,63 +316,6 @@ func_exit:
}
/**********************************************************************//**
-Try to relocate the control block of a compressed page.
-@return TRUE if relocated */
-static
-ibool
-buf_buddy_relocate_block(
-/*=====================*/
- buf_page_t* bpage, /*!< in: block to relocate */
- buf_page_t* dpage) /*!< in: free block to relocate to */
-{
- buf_page_t* b;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(buf_pool_mutex_own(buf_pool));
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_FILE_PAGE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- case BUF_BLOCK_ZIP_DIRTY:
- /* Cannot relocate dirty pages. */
- return(FALSE);
-
- case BUF_BLOCK_ZIP_PAGE:
- break;
- }
-
- mutex_enter(&buf_pool->zip_mutex);
-
- if (!buf_page_can_relocate(bpage)) {
- mutex_exit(&buf_pool->zip_mutex);
- return(FALSE);
- }
-
- buf_relocate(bpage, dpage);
- ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
-
- /* relocate buf_pool->zip_clean */
- b = UT_LIST_GET_PREV(list, dpage);
- UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
-
- if (b) {
- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
- } else {
- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
- }
-
- UNIV_MEM_INVALID(bpage, sizeof *bpage);
-
- mutex_exit(&buf_pool->zip_mutex);
- return(TRUE);
-}
-
-/**********************************************************************//**
Try to relocate a block.
@return TRUE if relocated */
static
@@ -415,108 +331,88 @@ buf_buddy_relocate(
buf_page_t* bpage;
const ulint size = BUF_BUDDY_LOW << i;
ullint usec = ut_time_us(NULL);
+ mutex_t* mutex;
+ ulint space;
+ ulint page_no;
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(!mutex_own(&buf_pool->zip_mutex));
ut_ad(!ut_align_offset(src, size));
ut_ad(!ut_align_offset(dst, size));
+ ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
UNIV_MEM_ASSERT_W(dst, size);
/* We assume that all memory from buf_buddy_alloc()
- is used for either compressed pages or buf_page_t
- objects covering compressed pages. */
+ is used for compressed page frames. */
/* We look inside the allocated objects returned by
- buf_buddy_alloc() and assume that anything of
- PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
- a valid space_id and page_no in the page header. Should the
- fields be invalid, we will be unable to relocate the block.
- We also assume that anything that fits sizeof(buf_page_t)
- actually is a properly initialized buf_page_t object. */
-
- if (size >= PAGE_ZIP_MIN_SIZE) {
- /* This is a compressed page. */
- mutex_t* mutex;
-
- /* The src block may be split into smaller blocks,
- some of which may be free. Thus, the
- mach_read_from_4() calls below may attempt to read
- from free memory. The memory is "owned" by the buddy
- allocator (and it has been allocated from the buffer
- pool), so there is nothing wrong about this. The
- mach_read_from_4() calls here will only trigger bogus
- Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
- ulint space = mach_read_from_4(
- (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- ulint page_no = mach_read_from_4(
- (const byte*) src + FIL_PAGE_OFFSET);
- /* Suppress Valgrind warnings about conditional jump
- on uninitialized value. */
- UNIV_MEM_VALID(&space, sizeof space);
- UNIV_MEM_VALID(&page_no, sizeof page_no);
- bpage = buf_page_hash_get(buf_pool, space, page_no);
-
- if (!bpage || bpage->zip.data != src) {
- /* The block has probably been freshly
- allocated by buf_LRU_get_free_block() but not
- added to buf_pool->page_hash yet. Obviously,
- it cannot be relocated. */
-
- return(FALSE);
- }
+ buf_buddy_alloc() and assume that each block is a compressed
+ page that contains a valid space_id and page_no in the page
+ header. Should the fields be invalid, we will be unable to
+ relocate the block. */
+
+ /* The src block may be split into smaller blocks,
+ some of which may be free. Thus, the
+ mach_read_from_4() calls below may attempt to read
+ from free memory. The memory is "owned" by the buddy
+ allocator (and it has been allocated from the buffer
+ pool), so there is nothing wrong about this. The
+ mach_read_from_4() calls here will only trigger bogus
+ Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
+ space = mach_read_from_4((const byte *) src
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ page_no = mach_read_from_4((const byte *) src
+ + FIL_PAGE_OFFSET);
+ /* Suppress Valgrind warnings about conditional jump
+ on uninitialized value. */
+ UNIV_MEM_VALID(&space, sizeof space);
+ UNIV_MEM_VALID(&page_no, sizeof page_no);
+ bpage = buf_page_hash_get(buf_pool, space, page_no);
+
+ if (!bpage || bpage->zip.data != src) {
+ /* The block has probably been freshly
+ allocated by buf_LRU_get_free_block() but not
+ added to buf_pool->page_hash yet. Obviously,
+ it cannot be relocated. */
- ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+ return(FALSE);
+ }
- if (page_zip_get_size(&bpage->zip) != size) {
- /* The block is of different size. We would
- have to relocate all blocks covered by src.
- For the sake of simplicity, give up. */
- ut_ad(page_zip_get_size(&bpage->zip) < size);
+ if (page_zip_get_size(&bpage->zip) != size) {
+ /* The block is of different size. We would
+ have to relocate all blocks covered by src.
+ For the sake of simplicity, give up. */
+ ut_ad(page_zip_get_size(&bpage->zip) < size);
- return(FALSE);
- }
+ return(FALSE);
+ }
- /* The block must have been allocated, but it may
- contain uninitialized data. */
- UNIV_MEM_ASSERT_W(src, size);
-
- mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(mutex);
-
- if (buf_page_can_relocate(bpage)) {
- /* Relocate the compressed page. */
- ut_a(bpage->zip.data == src);
- memcpy(dst, src, size);
- bpage->zip.data = dst;
- mutex_exit(mutex);
-success:
- UNIV_MEM_INVALID(src, size);
- {
- buf_buddy_stat_t* buddy_stat
- = &buf_pool->buddy_stat[i];
- buddy_stat->relocated++;
- buddy_stat->relocated_usec
- += ut_time_us(NULL) - usec;
- }
- return(TRUE);
- }
+ /* The block must have been allocated, but it may
+ contain uninitialized data. */
+ UNIV_MEM_ASSERT_W(src, size);
- mutex_exit(mutex);
- } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
- /* This must be a buf_page_t object. */
-#if UNIV_WORD_SIZE == 4
- /* On 32-bit systems, there is no padding in
- buf_page_t. On other systems, Valgrind could complain
- about uninitialized pad bytes. */
- UNIV_MEM_ASSERT_RW(src, size);
-#endif
- if (buf_buddy_relocate_block(src, dst)) {
+ mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(mutex);
- goto success;
+ if (buf_page_can_relocate(bpage)) {
+ /* Relocate the compressed page. */
+ ut_a(bpage->zip.data == src);
+ memcpy(dst, src, size);
+ bpage->zip.data = dst;
+ mutex_exit(mutex);
+ UNIV_MEM_INVALID(src, size);
+ {
+ buf_buddy_stat_t* buddy_stat
+ = &buf_pool->buddy_stat[i];
+ buddy_stat->relocated++;
+ buddy_stat->relocated_usec
+ += ut_time_us(NULL) - usec;
}
+ return(TRUE);
}
+ mutex_exit(mutex);
return(FALSE);
}
@@ -538,12 +434,13 @@ buf_buddy_free_low(
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(!mutex_own(&buf_pool->zip_mutex));
ut_ad(i <= BUF_BUDDY_SIZES);
+ ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_ad(buf_pool->buddy_stat[i].used > 0);
buf_pool->buddy_stat[i].used--;
recombine:
UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
- ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
+ ((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
if (i == BUF_BUDDY_SIZES) {
buf_buddy_block_free(buf_pool, buf);
@@ -554,32 +451,36 @@ recombine:
ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
ut_ad(!buf_pool_contains_zip(buf_pool, buf));
- /* Try to combine adjacent blocks. */
+ /* Do not recombine blocks if there are few free blocks.
+ We may waste up to 15360*max_len bytes to free blocks
+ (1024 + 2048 + 4096 + 8192 = 15360) */
+ if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
+ goto func_exit;
+ }
+ /* Try to combine adjacent blocks. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND
- /* Valgrind would complain about accessing free memory. */
+ /* When Valgrind instrumentation is not enabled, we can read
+ buddy->state to quickly determine that a block is not free.
+ When the block is not free, buddy->state belongs to a compressed
+ page frame that may be flagged uninitialized in our Valgrind
+ instrumentation. */
if (buddy->state != BUF_BLOCK_ZIP_FREE) {
goto buddy_nonfree;
}
-
- /* The field buddy->state can only be trusted for free blocks.
- If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
- it is in the free list. */
#endif /* !UNIV_DEBUG_VALGRIND */
for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
- UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
if (bpage == buddy) {
-buddy_free:
/* The buddy is free: recombine */
buf_buddy_remove_from_free(buf_pool, bpage, i);
-buddy_free2:
+buddy_is_free:
ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
i++;
@@ -589,122 +490,42 @@ buddy_free2:
}
ut_a(bpage != buf);
-
- {
- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
- UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
- bpage = next;
- }
+ UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
+ bpage = UT_LIST_GET_NEXT(list, bpage);
}
#ifndef UNIV_DEBUG_VALGRIND
buddy_nonfree:
- /* Valgrind would complain about accessing free memory. */
- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
- ut_ad(buf_page_get_state(ut_list_node_313)
- == BUF_BLOCK_ZIP_FREE)));
-#endif /* UNIV_DEBUG_VALGRIND */
+#endif /* !UNIV_DEBUG_VALGRIND */
+
+ ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
/* The buddy is not free. Is there a free block of this size? */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
+
/* Remove the block from the free list, because a successful
buf_buddy_relocate() will overwrite bpage->list. */
-
- UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
buf_buddy_remove_from_free(buf_pool, bpage, i);
/* Try to relocate the buddy of buf to the free block. */
if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
- ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
- goto buddy_free2;
+ buddy->state = BUF_BLOCK_ZIP_FREE;
+ goto buddy_is_free;
}
buf_buddy_add_to_free(buf_pool, bpage, i);
-
- /* Try to relocate the buddy of the free block to buf. */
- buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
- BUF_BUDDY_LOW << i);
-
-#ifndef UNIV_DEBUG_VALGRIND
- /* Valgrind would complain about accessing free memory. */
-
- /* The buddy must not be (completely) free, because we
- always recombine adjacent free blocks.
-
- (Parts of the buddy can be free in
- buf_pool->zip_free[j] with j < i.) */
- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
- ut_ad(buf_page_get_state(
- ut_list_node_313)
- == BUF_BLOCK_ZIP_FREE
- && ut_list_node_313 != buddy)));
-#endif /* !UNIV_DEBUG_VALGRIND */
-
- if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
-
- buf = bpage;
- UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
- ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
- goto buddy_free;
- }
}
+func_exit:
/* Free the block to the buddy list. */
bpage = buf;
-#ifdef UNIV_DEBUG
- if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
- /* This area has most likely been allocated for at
- least one compressed-only block descriptor. Check
- that there are no live objects in the area. This is
- not a complete check: it may yield false positives as
- well as false negatives. Also, due to buddy blocks
- being recombined, it is possible (although unlikely)
- that this branch is never reached. */
-
- char* c;
-
-# ifndef UNIV_DEBUG_VALGRIND
- /* Valgrind would complain about accessing
- uninitialized memory. Besides, Valgrind performs a
- more exhaustive check, at every memory access. */
- const buf_page_t* b = buf;
- const buf_page_t* const b_end = (buf_page_t*)
- ((char*) b + (BUF_BUDDY_LOW << i));
-
- for (; b < b_end; b++) {
- /* Avoid false positives (and cause false
- negatives) by checking for b->space < 1000. */
-
- if ((b->state == BUF_BLOCK_ZIP_PAGE
- || b->state == BUF_BLOCK_ZIP_DIRTY)
- && b->space > 0 && b->space < 1000) {
- fprintf(stderr,
- "buddy dirty %p %u (%u,%u) %p,%lu\n",
- (void*) b,
- b->state, b->space, b->offset,
- buf, i);
- }
- }
-# endif /* !UNIV_DEBUG_VALGRIND */
-
- /* Scramble the block. This should make any pointers
- invalid and trigger a segmentation violation. Because
- the scrambling can be reversed, it may be possible to
- track down the object pointing to the freed data by
- dereferencing the unscrambled bpage->LRU or
- bpage->list pointers. */
- for (c = (char*) buf + (BUF_BUDDY_LOW << i);
- c-- > (char*) buf; ) {
- *c = ~*c ^ i;
- }
- } else {
- /* Fill large blocks with a constant pattern. */
- memset(bpage, i, BUF_BUDDY_LOW << i);
- }
-#endif /* UNIV_DEBUG */
+
+ /* Fill large blocks with a constant pattern. */
+ ut_d(memset(bpage, i, BUF_BUDDY_LOW << i));
+ UNIV_MEM_INVALID(bpage, BUF_BUDDY_LOW << i);
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_add_to_free(buf_pool, bpage, i);
}
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
index 52358c52853..fe311ebfd8d 100644
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -1099,70 +1099,6 @@ buf_chunk_not_freed(
return(NULL);
}
-/*********************************************************************//**
-Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
-@return TRUE if all freed */
-static
-ibool
-buf_chunk_all_free(
-/*===============*/
- const buf_chunk_t* chunk) /*!< in: chunk being checked */
-{
- const buf_block_t* block;
- ulint i;
-
- block = chunk->blocks;
-
- for (i = chunk->size; i--; block++) {
-
- if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
-
- return(FALSE);
- }
- }
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Frees a chunk of buffer frames. */
-static
-void
-buf_chunk_free(
-/*===========*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_chunk_t* chunk) /*!< out: chunk of buffers */
-{
- buf_block_t* block;
- const buf_block_t* block_end;
-
- ut_ad(buf_pool_mutex_own(buf_pool));
-
- block_end = chunk->blocks + chunk->size;
-
- for (block = chunk->blocks; block < block_end; block++) {
- ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
- ut_a(!block->page.zip.data);
-
- ut_ad(!block->page.in_LRU_list);
- ut_ad(!block->in_unzip_LRU_list);
- ut_ad(!block->page.in_flush_list);
- /* Remove the block from the free list. */
- ut_ad(block->page.in_free_list);
- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
-
- /* Free the latches. */
- mutex_free(&block->mutex);
- rw_lock_free(&block->lock);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_free(&block->debug_latch);
-#endif /* UNIV_SYNC_DEBUG */
- UNIV_MEM_UNDESC(block);
- }
-
- os_mem_free_large(chunk->mem, chunk->mem_size);
-}
-
/********************************************************************//**
Set buffer pool size variables after resizing it */
static
@@ -1272,8 +1208,6 @@ buf_pool_free_instance(
chunk = chunks + buf_pool->n_chunks;
while (--chunk >= chunks) {
- /* Bypass the checks of buf_chunk_free(), since they
- would fail at shutdown. */
os_mem_free_large(chunk->mem, chunk->mem_size);
}
@@ -1533,281 +1467,6 @@ buf_relocate(
}
/********************************************************************//**
-Shrinks a buffer pool instance. */
-static
-void
-buf_pool_shrink_instance(
-/*=====================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint chunk_size) /*!< in: number of pages to remove */
-{
- buf_chunk_t* chunks;
- buf_chunk_t* chunk;
- ulint max_size;
- ulint max_free_size;
- buf_chunk_t* max_chunk;
- buf_chunk_t* max_free_chunk;
-
- ut_ad(!buf_pool_mutex_own(buf_pool));
-
-try_again:
- btr_search_disable(); /* Empty the adaptive hash index again */
- buf_pool_mutex_enter(buf_pool);
-
-shrink_again:
- if (buf_pool->n_chunks <= 1) {
-
- /* Cannot shrink if there is only one chunk */
- goto func_done;
- }
-
- /* Search for the largest free chunk
- not larger than the size difference */
- chunks = buf_pool->chunks;
- chunk = chunks + buf_pool->n_chunks;
- max_size = max_free_size = 0;
- max_chunk = max_free_chunk = NULL;
-
- while (--chunk >= chunks) {
- if (chunk->size <= chunk_size
- && chunk->size > max_free_size) {
- if (chunk->size > max_size) {
- max_size = chunk->size;
- max_chunk = chunk;
- }
-
- if (buf_chunk_all_free(chunk)) {
- max_free_size = chunk->size;
- max_free_chunk = chunk;
- }
- }
- }
-
- if (!max_free_size) {
-
- ulint dirty = 0;
- ulint nonfree = 0;
- buf_block_t* block;
- buf_block_t* bend;
-
- /* Cannot shrink: try again later
- (do not assign srv_buf_pool_old_size) */
- if (!max_chunk) {
-
- goto func_exit;
- }
-
- block = max_chunk->blocks;
- bend = block + max_chunk->size;
-
- /* Move the blocks of chunk to the end of the
- LRU list and try to flush them. */
- for (; block < bend; block++) {
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_NOT_USED:
- continue;
- case BUF_BLOCK_FILE_PAGE:
- break;
- default:
- nonfree++;
- continue;
- }
-
- mutex_enter(&block->mutex);
- /* The following calls will temporarily
- release block->mutex and buf_pool->mutex.
- Therefore, we have to always retry,
- even if !dirty && !nonfree. */
-
- if (!buf_flush_ready_for_replace(&block->page)) {
-
- buf_LRU_make_block_old(&block->page);
- dirty++;
- } else if (buf_LRU_free_block(&block->page, TRUE)
- != BUF_LRU_FREED) {
- nonfree++;
- }
-
- mutex_exit(&block->mutex);
- }
-
- buf_pool_mutex_exit(buf_pool);
-
- /* Request for a flush of the chunk if it helps.
- Do not flush if there are non-free blocks, since
- flushing will not make the chunk freeable. */
- if (nonfree) {
- /* Avoid busy-waiting. */
- os_thread_sleep(100000);
- } else if (dirty
- && buf_flush_LRU(buf_pool, dirty)
- == ULINT_UNDEFINED) {
-
- buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
- }
-
- goto try_again;
- }
-
- max_size = max_free_size;
- max_chunk = max_free_chunk;
-
- buf_pool->old_pool_size = buf_pool->curr_pool_size;
-
- /* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
- chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
- memcpy(chunks, buf_pool->chunks,
- (max_chunk - buf_pool->chunks) * sizeof *chunks);
- memcpy(chunks + (max_chunk - buf_pool->chunks),
- max_chunk + 1,
- buf_pool->chunks + buf_pool->n_chunks
- - (max_chunk + 1));
- ut_a(buf_pool->curr_size > max_chunk->size);
- buf_pool->curr_size -= max_chunk->size;
- buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
- chunk_size -= max_chunk->size;
- buf_chunk_free(buf_pool, max_chunk);
- mem_free(buf_pool->chunks);
- buf_pool->chunks = chunks;
- buf_pool->n_chunks--;
-
- /* Allow a slack of one megabyte. */
- if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
-
- goto shrink_again;
- }
- goto func_exit;
-
-func_done:
- buf_pool->old_pool_size = buf_pool->curr_pool_size;
-func_exit:
- buf_pool_mutex_exit(buf_pool);
- btr_search_enable();
-}
-
-/********************************************************************//**
-Shrinks the buffer pool. */
-static
-void
-buf_pool_shrink(
-/*============*/
- ulint chunk_size) /*!< in: number of pages to remove */
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
- ulint instance_chunk_size;
-
- instance_chunk_size = chunk_size / srv_buf_pool_instances;
- buf_pool = buf_pool_from_array(i);
- buf_pool_shrink_instance(buf_pool, instance_chunk_size);
- }
-
- buf_pool_set_sizes();
-}
-
-/********************************************************************//**
-Rebuild buf_pool->page_hash for a buffer pool instance. */
-static
-void
-buf_pool_page_hash_rebuild_instance(
-/*================================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- ulint i;
- buf_page_t* b;
- buf_chunk_t* chunk;
- ulint n_chunks;
- hash_table_t* zip_hash;
- hash_table_t* page_hash;
-
- buf_pool_mutex_enter(buf_pool);
-
- /* Free, create, and populate the hash table. */
- hash_table_free(buf_pool->page_hash);
- buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
- zip_hash = hash_create(2 * buf_pool->curr_size);
-
- HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
- BUF_POOL_ZIP_FOLD_BPAGE);
-
- hash_table_free(buf_pool->zip_hash);
- buf_pool->zip_hash = zip_hash;
-
- /* Insert the uncompressed file pages to buf_pool->page_hash. */
-
- chunk = buf_pool->chunks;
- n_chunks = buf_pool->n_chunks;
-
- for (i = 0; i < n_chunks; i++, chunk++) {
- ulint j;
- buf_block_t* block = chunk->blocks;
-
- for (j = 0; j < chunk->size; j++, block++) {
- if (buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE) {
- ut_ad(!block->page.in_zip_hash);
- ut_ad(block->page.in_page_hash);
-
- HASH_INSERT(buf_page_t, hash, page_hash,
- buf_page_address_fold(
- block->page.space,
- block->page.offset),
- &block->page);
- }
- }
- }
-
- /* Insert the compressed-only pages to buf_pool->page_hash.
- All such blocks are either in buf_pool->zip_clean or
- in buf_pool->flush_list. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
- ut_ad(!b->in_flush_list);
- ut_ad(b->in_LRU_list);
- ut_ad(b->in_page_hash);
- ut_ad(!b->in_zip_hash);
-
- HASH_INSERT(buf_page_t, hash, page_hash,
- buf_page_address_fold(b->space, b->offset), b);
- }
-
- buf_flush_list_mutex_enter(buf_pool);
- for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_ad(b->in_flush_list);
- ut_ad(b->in_LRU_list);
- ut_ad(b->in_page_hash);
- ut_ad(!b->in_zip_hash);
-
- switch (buf_page_get_state(b)) {
- case BUF_BLOCK_ZIP_DIRTY:
- HASH_INSERT(buf_page_t, hash, page_hash,
- buf_page_address_fold(b->space,
- b->offset), b);
- break;
- case BUF_BLOCK_FILE_PAGE:
- /* uncompressed page */
- break;
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- }
- }
-
- buf_flush_list_mutex_exit(buf_pool);
- buf_pool_mutex_exit(buf_pool);
-}
-
-/********************************************************************
Determine if a block is a sentinel for a buffer pool watch.
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
UNIV_INTERN
@@ -1913,123 +1572,6 @@ buf_pool_watch_set(
return(NULL);
}
-/********************************************************************//**
-Rebuild buf_pool->page_hash. */
-static
-void
-buf_pool_page_hash_rebuild(void)
-/*============================*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
- }
-}
-
-/********************************************************************//**
-Increase the buffer pool size of one buffer pool instance. */
-static
-void
-buf_pool_increase_instance(
-/*=======================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instane */
- ulint change_size) /*!< in: new size of the pool */
-{
- buf_chunk_t* chunks;
- buf_chunk_t* chunk;
-
- buf_pool_mutex_enter(buf_pool);
- chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
-
- memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
-
- chunk = &chunks[buf_pool->n_chunks];
-
- if (!buf_chunk_init(buf_pool, chunk, change_size)) {
- mem_free(chunks);
- } else {
- buf_pool->old_pool_size = buf_pool->curr_pool_size;
- buf_pool->curr_size += chunk->size;
- buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
- mem_free(buf_pool->chunks);
- buf_pool->chunks = chunks;
- buf_pool->n_chunks++;
- }
-
- buf_pool_mutex_exit(buf_pool);
-}
-
-/********************************************************************//**
-Increase the buffer pool size. */
-static
-void
-buf_pool_increase(
-/*==============*/
- ulint change_size)
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_increase_instance(
- buf_pool_from_array(i),
- change_size / srv_buf_pool_instances);
- }
-
- buf_pool_set_sizes();
-}
-
-/********************************************************************//**
-Resizes the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_resize(void)
-/*=================*/
-{
- ulint change_size;
- ulint min_change_size = 1048576 * srv_buf_pool_instances;
-
- buf_pool_mutex_enter_all();
-
- if (srv_buf_pool_old_size == srv_buf_pool_size) {
-
- buf_pool_mutex_exit_all();
-
- return;
-
- } else if (srv_buf_pool_curr_size + min_change_size
- > srv_buf_pool_size) {
-
- change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
- / UNIV_PAGE_SIZE;
-
- buf_pool_mutex_exit_all();
-
- /* Disable adaptive hash indexes and empty the index
- in order to free up memory in the buffer pool chunks. */
- buf_pool_shrink(change_size);
-
- } else if (srv_buf_pool_curr_size + min_change_size
- < srv_buf_pool_size) {
-
- /* Enlarge the buffer pool by at least one megabyte */
-
- change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
-
- buf_pool_mutex_exit_all();
-
- buf_pool_increase(change_size);
- } else {
- srv_buf_pool_size = srv_buf_pool_old_size;
-
- buf_pool_mutex_exit_all();
-
- return;
- }
-
- buf_pool_page_hash_rebuild();
-}
-
/****************************************************************//**
Remove the sentinel block for the watch before replacing it with a real block.
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
@@ -2365,7 +1907,7 @@ err_exit:
mutex_enter(block_mutex);
/* Discard the uncompressed page frame if possible. */
- if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) {
+ if (buf_LRU_free_block(bpage, FALSE)) {
mutex_exit(block_mutex);
goto lookup;
@@ -2768,12 +2310,8 @@ loop:
if (block) {
/* If the guess is a compressed page descriptor that
- has been allocated by buf_buddy_alloc(), it may have
- been invalidated by buf_buddy_relocate(). In that
- case, block could point to something that happens to
- contain the expected bits in block->page. Similarly,
- the guess may be pointing to a buffer pool chunk that
- has been released when resizing the buffer pool. */
+ has been allocated by buf_page_alloc_descriptor(),
+ it may have been freed by buf_relocate(). */
if (!buf_block_is_uncompressed(buf_pool, block)
|| offset != block->page.offset
@@ -2951,8 +2489,10 @@ wait_until_unfixed:
if (buf_page_get_state(&block->page)
== BUF_BLOCK_ZIP_PAGE) {
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
UT_LIST_REMOVE(list, buf_pool->zip_clean,
&block->page);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
ut_ad(!block->page.in_flush_list);
} else {
/* Relocate buf_pool->flush_list. */
@@ -2978,11 +2518,10 @@ wait_until_unfixed:
mutex_exit(&buf_pool->zip_mutex);
buf_pool->n_pend_unzip++;
- bpage->state = BUF_BLOCK_ZIP_FREE;
- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
-
buf_pool_mutex_exit(buf_pool);
+ buf_page_free_descriptor(bpage);
+
/* Decompress the page and apply buffered operations
while not holding buf_pool->mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums);
@@ -3028,7 +2567,7 @@ wait_until_unfixed:
/* Try to evict the block from the buffer pool, to use the
insert buffer (change buffer) as much as possible. */
- if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) {
+ if (buf_LRU_free_block(&block->page, TRUE)) {
mutex_exit(&block->mutex);
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
/* Set the watch, as it would have
@@ -3687,20 +3226,11 @@ err_exit:
mutex_exit(&block->mutex);
} else {
- /* Defer buf_buddy_alloc() until after the block has
- been found not to exist. The buf_buddy_alloc() and
- buf_buddy_free() calls may be expensive because of
- buf_buddy_relocate(). */
-
/* The compressed page must be allocated before the
control block (bpage), in order to avoid the
invocation of buf_buddy_relocate_block() on
uninitialized data. */
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
- bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
-
- /* Initialize the buf_pool pointer. */
- bpage->buf_pool_index = buf_pool_index(buf_pool);
/* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool->mutex. Thus, we must
@@ -3716,8 +3246,6 @@ err_exit:
/* The block was added by some other thread. */
watch_page = NULL;
- bpage->state = BUF_BLOCK_ZIP_FREE;
- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
buf_buddy_free(buf_pool, data, zip_size);
bpage = NULL;
@@ -3725,6 +3253,11 @@ err_exit:
}
}
+ bpage = buf_page_alloc_descriptor();
+
+ /* Initialize the buf_pool pointer. */
+ bpage->buf_pool_index = buf_pool_index(buf_pool);
+
page_zip_des_init(&bpage->zip);
page_zip_set_size(&bpage->zip, zip_size);
bpage->zip.data = data;
@@ -3739,7 +3272,6 @@ err_exit:
bpage->space = space;
bpage->offset = offset;
-
#ifdef UNIV_DEBUG
bpage->in_page_hash = FALSE;
bpage->in_zip_hash = FALSE;
@@ -3764,7 +3296,9 @@ err_exit:
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_LRU_insert_zip_clean(bpage);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_page_set_io_fix(bpage, BUF_IO_READ);
diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
index ebe96a82a10..06b3ec393c1 100644
--- a/storage/innobase/buf/buf0flu.c
+++ b/storage/innobase/buf/buf0flu.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -524,7 +524,9 @@ buf_flush_remove(
case BUF_BLOCK_ZIP_DIRTY:
buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_LRU_insert_zip_clean(bpage);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
break;
case BUF_BLOCK_FILE_PAGE:
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
index 64afcecfe3c..93c98719e29 100644
--- a/storage/innobase/buf/buf0lru.c
+++ b/storage/innobase/buf/buf0lru.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -353,31 +353,34 @@ scan_again:
while (bpage != NULL) {
buf_page_t* prev_bpage;
- ibool prev_bpage_buf_fix = FALSE;
+ mutex_t* block_mutex = NULL;
ut_a(buf_page_in_file(bpage));
prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
/* bpage->space and bpage->io_fix are protected by
- buf_pool->mutex and block_mutex. It is safe to check
- them while holding buf_pool->mutex only. */
+ buf_pool_mutex and block_mutex. It is safe to check
+ them while holding buf_pool_mutex only. */
if (buf_page_get_space(bpage) != id) {
/* Skip this block, as it does not belong to
the space that is being invalidated. */
+ goto next_page;
} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
all_freed = FALSE;
+ goto next_page;
} else {
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (bpage->buf_fix_count > 0) {
+ mutex_exit(block_mutex);
/* We cannot remove this page during
this scan yet; maybe the system is
currently reading it in, or flushing
@@ -387,106 +390,59 @@ scan_again:
goto next_page;
}
+ }
+
+ ut_ad(mutex_own(block_mutex));
#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Dropping space %lu page %lu\n",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
- }
+ if (buf_debug_prints) {
+ fprintf(stderr,
+ "Dropping space %lu page %lu\n",
+ (ulong) buf_page_get_space(bpage),
+ (ulong) buf_page_get_page_no(bpage));
+ }
#endif
- if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
- /* This is a compressed-only block
- descriptor. Ensure that prev_bpage
- cannot be relocated when bpage is freed. */
- if (UNIV_LIKELY(prev_bpage != NULL)) {
- switch (buf_page_get_state(
- prev_bpage)) {
- case BUF_BLOCK_FILE_PAGE:
- /* Descriptors of uncompressed
- blocks will not be relocated,
- because we are holding the
- buf_pool->mutex. */
- break;
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- /* Descriptors of compressed-
- only blocks can be relocated,
- unless they are buffer-fixed.
- Because both bpage and
- prev_bpage are protected by
- buf_pool_zip_mutex, it is
- not necessary to acquire
- further mutexes. */
- ut_ad(&buf_pool->zip_mutex
- == block_mutex);
- ut_ad(mutex_own(block_mutex));
- prev_bpage_buf_fix = TRUE;
- prev_bpage->buf_fix_count++;
- break;
- default:
- ut_error;
- }
- }
- } else if (((buf_block_t*) bpage)->is_hashed) {
- ulint page_no;
- ulint zip_size;
-
- buf_pool_mutex_exit(buf_pool);
+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+ /* This is a compressed-only block
+ descriptor. Do nothing. */
+ } else if (((buf_block_t*) bpage)->is_hashed) {
+ ulint page_no;
+ ulint zip_size;
- zip_size = buf_page_get_zip_size(bpage);
- page_no = buf_page_get_page_no(bpage);
+ buf_pool_mutex_exit(buf_pool);
- mutex_exit(block_mutex);
+ zip_size = buf_page_get_zip_size(bpage);
+ page_no = buf_page_get_page_no(bpage);
- /* Note that the following call will acquire
- an S-latch on the page */
+ mutex_exit(block_mutex);
- btr_search_drop_page_hash_when_freed(
- id, zip_size, page_no);
- goto scan_again;
- }
+ /* Note that the following call will acquire
+ an S-latch on the page */
- if (bpage->oldest_modification != 0) {
+ btr_search_drop_page_hash_when_freed(
+ id, zip_size, page_no);
+ goto scan_again;
+ }
- buf_flush_remove(bpage);
- }
+ if (bpage->oldest_modification != 0) {
- /* Remove from the LRU list. */
+ buf_flush_remove(bpage);
+ }
- if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
- != BUF_BLOCK_ZIP_FREE) {
- buf_LRU_block_free_hashed_page((buf_block_t*)
- bpage);
- } else {
- /* The block_mutex should have been
- released by buf_LRU_block_remove_hashed_page()
- when it returns BUF_BLOCK_ZIP_FREE. */
- ut_ad(block_mutex == &buf_pool->zip_mutex);
- ut_ad(!mutex_own(block_mutex));
-
- if (prev_bpage_buf_fix) {
- /* We temporarily buffer-fixed
- prev_bpage, so that
- buf_buddy_free() could not
- relocate it, in case it was a
- compressed-only block
- descriptor. */
-
- mutex_enter(block_mutex);
- ut_ad(prev_bpage->buf_fix_count > 0);
- prev_bpage->buf_fix_count--;
- mutex_exit(block_mutex);
- }
+ /* Remove from the LRU list. */
- goto next_page_no_mutex;
- }
-next_page:
+ if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+ != BUF_BLOCK_ZIP_FREE) {
+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
mutex_exit(block_mutex);
+ } else {
+ /* The block_mutex should have been released
+ by buf_LRU_block_remove_hashed_page() when it
+ returns BUF_BLOCK_ZIP_FREE. */
+ ut_ad(block_mutex == &buf_pool->zip_mutex);
+ ut_ad(!mutex_own(block_mutex));
}
-
-next_page_no_mutex:
+next_page:
bpage = prev_bpage;
}
@@ -525,6 +481,7 @@ buf_LRU_invalidate_tablespace(
}
}
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
UNIV_INTERN
@@ -557,6 +514,7 @@ buf_LRU_insert_zip_clean(
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
}
}
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/******************************************************************//**
Try to free an uncompressed page of a compressed block from the unzip
@@ -600,7 +558,7 @@ buf_LRU_free_from_unzip_LRU_list(
UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
- enum buf_lru_free_block_status freed;
+ ibool freed;
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->in_unzip_LRU_list);
@@ -610,24 +568,9 @@ buf_LRU_free_from_unzip_LRU_list(
freed = buf_LRU_free_block(&block->page, FALSE);
mutex_exit(&block->mutex);
- switch (freed) {
- case BUF_LRU_FREED:
+ if (freed) {
return(TRUE);
-
- case BUF_LRU_CANNOT_RELOCATE:
- /* If we failed to relocate, try
- regular LRU eviction. */
- return(FALSE);
-
- case BUF_LRU_NOT_FREED:
- /* The block was buffer-fixed or I/O-fixed.
- Keep looking. */
- continue;
}
-
- /* inappropriate return value from
- buf_LRU_free_block() */
- ut_error;
}
return(FALSE);
@@ -660,10 +603,9 @@ buf_LRU_free_from_common_LRU_list(
UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
- enum buf_lru_free_block_status freed;
- unsigned accessed;
- mutex_t* block_mutex
- = buf_page_get_mutex(bpage);
+ ibool freed;
+ unsigned accessed;
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
@@ -673,8 +615,7 @@ buf_LRU_free_from_common_LRU_list(
freed = buf_LRU_free_block(bpage, TRUE);
mutex_exit(block_mutex);
- switch (freed) {
- case BUF_LRU_FREED:
+ if (freed) {
/* Keep track of pages that are evicted without
ever being accessed. This gives us a measure of
the effectiveness of readahead */
@@ -682,21 +623,7 @@ buf_LRU_free_from_common_LRU_list(
++buf_pool->stat.n_ra_pages_evicted;
}
return(TRUE);
-
- case BUF_LRU_NOT_FREED:
- /* The block was dirty, buffer-fixed, or I/O-fixed.
- Keep looking. */
- continue;
-
- case BUF_LRU_CANNOT_RELOCATE:
- /* This should never occur, because we
- want to discard the compressed page too. */
- break;
}
-
- /* inappropriate return value from
- buf_LRU_free_block() */
- ut_error;
}
return(FALSE);
@@ -1422,17 +1349,16 @@ buf_LRU_make_block_old(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
-NOTE: If this function returns BUF_LRU_FREED, it will temporarily
+NOTE: If this function returns TRUE, it will temporarily
release buf_pool->mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool->mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
-@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
-BUF_LRU_NOT_FREED otherwise. */
+@return TRUE if freed, FALSE otherwise. */
UNIV_INTERN
-enum buf_lru_free_block_status
+ibool
buf_LRU_free_block(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */
@@ -1458,7 +1384,7 @@ buf_LRU_free_block(
if (!buf_page_can_relocate(bpage)) {
/* Do not free buffer-fixed or I/O-fixed blocks. */
- return(BUF_LRU_NOT_FREED);
+ return(FALSE);
}
#ifdef UNIV_IBUF_COUNT_DEBUG
@@ -1470,7 +1396,7 @@ buf_LRU_free_block(
/* Do not completely free dirty blocks. */
if (bpage->oldest_modification) {
- return(BUF_LRU_NOT_FREED);
+ return(FALSE);
}
} else if (bpage->oldest_modification) {
/* Do not completely free dirty blocks. */
@@ -1478,7 +1404,7 @@ buf_LRU_free_block(
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
ut_ad(buf_page_get_state(bpage)
== BUF_BLOCK_ZIP_DIRTY);
- return(BUF_LRU_NOT_FREED);
+ return(FALSE);
}
goto alloc;
@@ -1487,14 +1413,8 @@ buf_LRU_free_block(
If it cannot be allocated (without freeing a block
from the LRU list), refuse to free bpage. */
alloc:
- buf_pool_mutex_exit_forbid(buf_pool);
- b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
- buf_pool_mutex_exit_allow(buf_pool);
-
- if (UNIV_UNLIKELY(!b)) {
- return(BUF_LRU_CANNOT_RELOCATE);
- }
-
+ b = buf_page_alloc_descriptor();
+ ut_a(b);
memcpy(b, bpage, sizeof *b);
}
@@ -1598,7 +1518,9 @@ alloc:
}
if (b->state == BUF_BLOCK_ZIP_PAGE) {
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_LRU_insert_zip_clean(b);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
} else {
/* Relocate on buf_pool->flush_list. */
buf_flush_relocate_on_flush_list(bpage, b);
@@ -1665,7 +1587,7 @@ alloc:
mutex_enter(block_mutex);
}
- return(BUF_LRU_FREED);
+ return(TRUE);
}
/******************************************************************//**
@@ -1884,7 +1806,9 @@ buf_LRU_block_remove_hashed_page(
ut_a(bpage->zip.data);
ut_a(buf_page_get_zip_size(bpage));
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
mutex_exit(&buf_pool->zip_mutex);
buf_pool_mutex_exit_forbid(buf_pool);
@@ -1893,11 +1817,8 @@ buf_LRU_block_remove_hashed_page(
buf_pool, bpage->zip.data,
page_zip_get_size(&bpage->zip));
- bpage->state = BUF_BLOCK_ZIP_FREE;
- buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
buf_pool_mutex_exit_allow(buf_pool);
-
- UNIV_MEM_UNDESC(bpage);
+ buf_page_free_descriptor(bpage);
return(BUF_BLOCK_ZIP_FREE);
case BUF_BLOCK_FILE_PAGE:
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 11640acbce4..c4d2226227e 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -990,7 +990,6 @@ convert_error_code_to_mysql(
misleading, a new MySQL error
code should be introduced */
- case DB_COL_APPEARS_TWICE_IN_INDEX:
case DB_CORRUPTION:
return(HA_ERR_CRASHED);
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 5b3e166371d..24f3801c7f8 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -481,11 +481,14 @@ UNIV_INTERN
ibool
btr_compress(
/*=========*/
- btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift;
- the page must not be empty: in record delete
- use btr_discard_page if the page would become
- empty */
- mtr_t* mtr); /*!< in: mtr */
+ btr_cur_t* cursor, /*!< in/out: cursor on the page to merge
+ or lift; the page must not be empty:
+ when deleting records, use btr_discard_page()
+ if the page would become empty */
+ ibool adjust, /*!< in: TRUE if should adjust the
+ cursor position even if compression occurs */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/*************************************************************//**
Discards a page from a B-tree. This is used to remove the last record from
a B-tree page: the whole page must be removed at the same time. This cannot
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index be918439f59..e0907ac7bd0 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -36,6 +36,9 @@ Created 10/16/1994 Heikki Tuuri
#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
update vector or inserted entry */
+#define BTR_KEEP_POS_FLAG 8 /* btr_cur_pessimistic_update()
+ must keep cursor position when
+ moving columns to big_rec */
#ifndef UNIV_HOTBACKUP
#include "que0types.h"
@@ -310,7 +313,9 @@ btr_cur_pessimistic_update(
/*=======================*/
ulint flags, /*!< in: undo logging, locking, and rollback
flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update */
+ btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
+ cursor may become invalid if *big_rec == NULL
+ || !(flags & BTR_KEEP_POS_FLAG) */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
@@ -322,6 +327,16 @@ btr_cur_pessimistic_update(
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr; must be committed before
latching any further pages */
+/*****************************************************************
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+UNIV_INTERN
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+ btr_cur_t* cursor, /*!< in: cursor */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ UNIV_COLD __attribute__((nonnull));
/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
@@ -364,10 +379,13 @@ UNIV_INTERN
ibool
btr_cur_compress_if_useful(
/*=======================*/
- btr_cur_t* cursor, /*!< in: cursor on the page to compress;
+ btr_cur_t* cursor, /*!< in/out: cursor on the page to compress;
cursor does not stay valid if compression
occurs */
- mtr_t* mtr); /*!< in: mtr */
+ ibool adjust, /*!< in: TRUE if should adjust the
+ cursor position even if compression occurs */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/*******************************************************//**
Removes the record on which the tree cursor is positioned. It is assumed
that the mtr has an x-latch on the page where the cursor is positioned,
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index 280583f6ccf..c833b3e8572 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -139,7 +139,7 @@ btr_cur_compress_recommendation(
btr_cur_t* cursor, /*!< in: btr cursor */
mtr_t* mtr) /*!< in: mtr */
{
- page_t* page;
+ const page_t* page;
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
index b255d8c9351..2d7d6146092 100644
--- a/storage/innobase/include/buf0buddy.h
+++ b/storage/innobase/include/buf0buddy.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -37,37 +37,37 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool->mutex and must not hold buf_pool->zip_mutex or any
-block->mutex. The buf_pool->mutex may only be released and reacquired
-if lru != NULL. This function should only be used for allocating
-compressed page frames or control blocks (buf_page_t). Allocated
-control blocks must be properly initialized immediately after
-buf_buddy_alloc() has returned the memory, before releasing
-buf_pool->mutex.
-@return allocated block, possibly NULL if lru == NULL */
+block->mutex. The buf_pool->mutex may be released and reacquired.
+This function should only be used for allocating compressed page frames.
+@return allocated block, never NULL */
UNIV_INLINE
-void*
+byte*
buf_buddy_alloc(
/*============*/
- buf_pool_t* buf_pool,
- /*!< buffer pool in which the block resides */
- ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
- ibool* lru) /*!< in: pointer to a variable that will be assigned
- TRUE if storage was allocated from the LRU list
- and buf_pool->mutex was temporarily released,
- or NULL if the LRU list should not be used */
- __attribute__((malloc));
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
+ the page resides */
+ ulint size, /*!< in: compressed page size
+ (between PAGE_ZIP_MIN_SIZE and
+ UNIV_PAGE_SIZE) */
+ ibool* lru) /*!< in: pointer to a variable
+ that will be assigned TRUE if
+ storage was allocated from the
+ LRU list and buf_pool->mutex was
+ temporarily released */
+ __attribute__((malloc, nonnull));
/**********************************************************************//**
-Release a block. */
+Deallocate a block. */
UNIV_INLINE
void
buf_buddy_free(
/*===========*/
- buf_pool_t* buf_pool,
- /*!< buffer pool in which the block resides */
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
- ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
+ the block resides */
+ void* buf, /*!< in: block to be freed, must not
+ be pointed to by the buffer pool */
+ ulint size) /*!< in: block size,
+ up to UNIV_PAGE_SIZE */
__attribute__((nonnull));
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
index e50c33ea15a..b8281f7341a 100644
--- a/storage/innobase/include/buf0buddy.ic
+++ b/storage/innobase/include/buf0buddy.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -36,21 +36,21 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
-The buf_pool->mutex may only be released and reacquired if lru != NULL.
-@return allocated block, possibly NULL if lru==NULL */
+The buf_pool_mutex may be released and reacquired.
+@return allocated block, never NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
/*================*/
- buf_pool_t* buf_pool,
- /*!< in: buffer pool in which the page resides */
- ulint i, /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
- ibool* lru) /*!< in: pointer to a variable that will be assigned
- TRUE if storage was allocated from the LRU list
- and buf_pool->mutex was temporarily released,
- or NULL if the LRU list should not be used */
- __attribute__((malloc));
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
+ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+ ibool* lru) /*!< in: pointer to a variable that
+ will be assigned TRUE if storage was
+ allocated from the LRU list and
+ buf_pool->mutex was temporarily
+ released */
+ __attribute__((malloc, nonnull));
/**********************************************************************//**
Deallocate a block. */
@@ -77,6 +77,8 @@ buf_buddy_get_slot(
ulint i;
ulint s;
+ ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+
for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
}
@@ -87,31 +89,31 @@ buf_buddy_get_slot(
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool->mutex and must not hold buf_pool->zip_mutex or any
-block->mutex. The buf_pool->mutex may only be released and reacquired
-if lru != NULL. This function should only be used for allocating
-compressed page frames or control blocks (buf_page_t). Allocated
-control blocks must be properly initialized immediately after
-buf_buddy_alloc() has returned the memory, before releasing
-buf_pool->mutex.
-@return allocated block, possibly NULL if lru == NULL */
+block->mutex. The buf_pool->mutex may be released and reacquired.
+This function should only be used for allocating compressed page frames.
+@return allocated block, never NULL */
UNIV_INLINE
-void*
+byte*
buf_buddy_alloc(
/*============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool in which
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
the page resides */
- ulint size, /*!< in: block size, up to
- UNIV_PAGE_SIZE */
+ ulint size, /*!< in: compressed page size
+ (between PAGE_ZIP_MIN_SIZE and
+ UNIV_PAGE_SIZE) */
ibool* lru) /*!< in: pointer to a variable
that will be assigned TRUE if
storage was allocated from the
LRU list and buf_pool->mutex was
- temporarily released, or NULL if
- the LRU list should not be used */
+ temporarily released */
{
ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(ut_is_2pow(size));
+ ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+ ut_ad(size <= UNIV_PAGE_SIZE);
- return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
+ return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
+ lru));
}
/**********************************************************************//**
@@ -120,13 +122,17 @@ UNIV_INLINE
void
buf_buddy_free(
/*===========*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
- ulint size) /*!< in: block size, up to
- UNIV_PAGE_SIZE */
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
+ the block resides */
+ void* buf, /*!< in: block to be freed, must not
+ be pointed to by the buffer pool */
+ ulint size) /*!< in: block size,
+ up to UNIV_PAGE_SIZE */
{
ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(ut_is_2pow(size));
+ ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+ ut_ad(size <= UNIV_PAGE_SIZE);
buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
}
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index d83d484bace..b7621c679f0 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -246,12 +246,6 @@ buf_relocate(
BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
buf_page_t* dpage) /*!< in/out: destination control block */
__attribute__((nonnull));
-/********************************************************************//**
-Resizes the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_resize(void);
-/*=================*/
/*********************************************************************//**
Gets the current size of buffer buf_pool in bytes.
@return size in bytes */
@@ -275,6 +269,23 @@ ib_uint64_t
buf_pool_get_oldest_modification(void);
/*==================================*/
/********************************************************************//**
+Allocates a buf_page_t descriptor. This function must succeed. In case
+of failure we assert in this function. */
+UNIV_INLINE
+buf_page_t*
+buf_page_alloc_descriptor(void)
+/*===========================*/
+ __attribute__((malloc));
+/********************************************************************//**
+Free a buf_page_t descriptor. */
+UNIV_INLINE
+void
+buf_page_free_descriptor(
+/*=====================*/
+ buf_page_t* bpage) /*!< in: bpage descriptor to free. */
+ __attribute__((nonnull));
+
+/********************************************************************//**
Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
UNIV_INTERN
@@ -582,6 +593,31 @@ buf_block_get_modify_clock(
#else /* !UNIV_HOTBACKUP */
# define buf_block_modify_clock_inc(block) ((void) 0)
#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_func(
+/*=======================*/
+#ifdef UNIV_SYNC_DEBUG
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line */
+#endif /* UNIV_SYNC_DEBUG */
+ buf_block_t* block) /*!< in/out: block to bufferfix */
+ __attribute__((nonnull));
+#ifdef UNIV_SYNC_DEBUG
+/** Increments the bufferfix count.
+@param b in/out: block to bufferfix
+@param f in: file name where requested
+@param l in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
+#else /* UNIV_SYNC_DEBUG */
+/** Increments the bufferfix count.
+@param b in/out: block to bufferfix
+@param f in: file name where requested
+@param l in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
+#endif /* UNIV_SYNC_DEBUG */
/********************************************************************//**
Calculates a page checksum which is stored to the page when it is written
to a file. Note that we must be careful to calculate the same value
@@ -1196,7 +1232,7 @@ ulint
buf_get_free_list_len(void);
/*=======================*/
-/********************************************************************
+/********************************************************************//**
Determine if a block is a sentinel for a buffer pool watch.
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
UNIV_INTERN
@@ -1732,8 +1768,10 @@ struct buf_pool_struct{
frames and buf_page_t descriptors of blocks that exist
in the buffer pool only in compressed form. */
/* @{ */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
/*!< unmodified compressed pages */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES];
/*!< buddy free lists */
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index e2e83de0a78..4fdaf6ff43e 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -754,6 +754,35 @@ buf_block_get_lock_hash_val(
}
/********************************************************************//**
+Allocates a buf_page_t descriptor. This function must succeed. In case
+of failure we assert in this function.
+@return: the allocated descriptor. */
+UNIV_INLINE
+buf_page_t*
+buf_page_alloc_descriptor(void)
+/*===========================*/
+{
+ buf_page_t* bpage;
+
+ bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
+ ut_d(memset(bpage, 0, sizeof *bpage));
+ UNIV_MEM_ALLOC(bpage, sizeof *bpage);
+
+ return(bpage);
+}
+
+/********************************************************************//**
+Free a buf_page_t descriptor. */
+UNIV_INLINE
+void
+buf_page_free_descriptor(
+/*=====================*/
+ buf_page_t* bpage) /*!< in: bpage descriptor to free. */
+{
+ ut_free(bpage);
+}
+
+/********************************************************************//**
Frees a buffer block which does not contain a file page. */
UNIV_INLINE
void
@@ -897,19 +926,6 @@ buf_block_buf_fix_inc_func(
block->page.buf_fix_count++;
}
-#ifdef UNIV_SYNC_DEBUG
-/** Increments the bufferfix count.
-@param b in/out: block to bufferfix
-@param f in: file name where requested
-@param l in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
-#else /* UNIV_SYNC_DEBUG */
-/** Increments the bufferfix count.
-@param b in/out: block to bufferfix
-@param f in: file name where requested
-@param l in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
-#endif /* UNIV_SYNC_DEBUG */
/*******************************************************************//**
Decrements the bufferfix count. */
@@ -1160,7 +1176,7 @@ buf_block_dbg_add_level(
where we have acquired latch */
ulint level) /*!< in: latching order level */
{
- sync_thread_add_level(&block->lock, level);
+ sync_thread_add_level(&block->lock, level, FALSE);
}
#endif /* UNIV_SYNC_DEBUG */
/********************************************************************//**
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 9b150188b03..f42894a138c 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,18 +30,6 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h"
#include "buf0types.h"
-/** The return type of buf_LRU_free_block() */
-enum buf_lru_free_block_status {
- /** freed */
- BUF_LRU_FREED = 0,
- /** not freed because the caller asked to remove the
- uncompressed frame but the control block cannot be
- relocated */
- BUF_LRU_CANNOT_RELOCATE,
- /** not freed because of some other reason */
- BUF_LRU_NOT_FREED
-};
-
/******************************************************************//**
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
@@ -85,6 +73,7 @@ void
buf_LRU_invalidate_tablespace(
/*==========================*/
ulint id); /*!< in: space id */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
UNIV_INTERN
@@ -92,22 +81,22 @@ void
buf_LRU_insert_zip_clean(
/*=====================*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/******************************************************************//**
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
-NOTE: If this function returns BUF_LRU_FREED, it will temporarily
+NOTE: If this function returns TRUE, it will temporarily
release buf_pool->mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool->mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
-@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
-BUF_LRU_NOT_FREED otherwise. */
+@return TRUE if freed, FALSE otherwise. */
UNIV_INTERN
-enum buf_lru_free_block_status
+ibool
buf_LRU_free_block(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index a2175098704..0cc2defb3ff 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,8 @@ Created 11/17/1995 Heikki Tuuri
#ifndef buf0types_h
#define buf0types_h
+#include "page0types.h"
+
/** Buffer page (uncompressed or compressed) */
typedef struct buf_page_struct buf_page_t;
/** Buffer block for which an uncompressed page exists */
@@ -60,17 +62,10 @@ enum buf_io_fix {
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */
-#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
-/** Base-2 logarithm of the smallest buddy block size */
-# define BUF_BUDDY_LOW_SHIFT 6
-#else /* 64-bit system */
-/** Base-2 logarithm of the smallest buddy block size */
-# define BUF_BUDDY_LOW_SHIFT 7
-#endif
+#define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT
+
#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
- /*!< minimum block size in the binary
- buddy system; must be at least
- sizeof(buf_page_t) */
+
#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
/*!< number of buddy sizes */
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index 74a2354bce3..28ef64500cc 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -64,8 +64,6 @@ enum db_err {
DB_CANNOT_ADD_CONSTRAINT, /* adding a foreign key constraint
to a table failed */
DB_CORRUPTION, /* data structure corruption noticed */
- DB_COL_APPEARS_TWICE_IN_INDEX, /* InnoDB cannot handle an index
- where same column appears twice */
DB_CANNOT_DROP_CONSTRAINT, /* dropping a foreign key constraint
from a table failed */
DB_NO_SAVEPOINT, /* no savepoint exists with the given
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index 3520677dfb3..81474fa35f5 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,6 +27,8 @@ Created 10/4/1994 Heikki Tuuri
#include "buf0types.h"
#ifdef UNIV_DEBUG
+# include "rem0cmp.h"
+
/*********************************************************//**
Gets pointer to the page frame where the cursor is positioned.
@return page */
@@ -268,6 +270,7 @@ page_cur_tuple_insert(
index, rec, offsets, mtr);
}
+ ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, offsets));
mem_heap_free(heap);
return(rec);
}
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index 7091d8c2707..346f65302f7 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -284,16 +284,42 @@ page_get_supremum_offset(
const page_t* page); /*!< in: page which must have record(s) */
#define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page))
#define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page))
+
/************************************************************//**
-Returns the middle record of record list. If there are an even number
-of records in the list, returns the first record of upper half-list.
-@return middle record */
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return nth record */
UNIV_INTERN
+const rec_t*
+page_rec_get_nth_const(
+/*===================*/
+ const page_t* page, /*!< in: page */
+ ulint nth) /*!< in: nth record */
+ __attribute__((nonnull, warn_unused_result));
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return nth record */
+UNIV_INLINE
+rec_t*
+page_rec_get_nth(
+/*=============*/
+ page_t* page, /*< in: page */
+ ulint nth) /*!< in: nth record */
+ __attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list.
+@return middle record */
+UNIV_INLINE
rec_t*
page_get_middle_rec(
/*================*/
- page_t* page); /*!< in: page */
-#ifndef UNIV_HOTBACKUP
+ page_t* page) /*!< in: page */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Compares a data tuple to a physical record. Differs from the function
cmp_dtuple_rec_with_match in the way that the record must reside on an
@@ -348,6 +374,7 @@ page_get_n_recs(
/***************************************************************//**
Returns the number of records before the given record in chain.
The number includes infimum and supremum records.
+This is the inverse function of page_rec_get_nth().
@return number of records */
UNIV_INTERN
ulint
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index 115cee64f8b..c1a0ce73982 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -419,7 +419,37 @@ page_rec_is_infimum(
return(page_rec_is_infimum_low(page_offset(rec)));
}
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return nth record */
+UNIV_INLINE
+rec_t*
+page_rec_get_nth(
+/*=============*/
+ page_t* page, /*!< in: page */
+ ulint nth) /*!< in: nth record */
+{
+ return((rec_t*) page_rec_get_nth_const(page, nth));
+}
+
#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list.
+@return middle record */
+UNIV_INLINE
+rec_t*
+page_get_middle_rec(
+/*================*/
+ page_t* page) /*!< in: page */
+{
+ ulint middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+
+ return(page_rec_get_nth(page, middle));
+}
+
/*************************************************************//**
Compares a data tuple to a physical record. Differs from the function
cmp_dtuple_rec_with_match in the way that the record must reside on an
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index 75e15d67246..c2849be7c3e 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,27 +38,16 @@ Created 4/20/1996 Heikki Tuuri
#include "btr0types.h"
/*********************************************************************//**
-Gets the offset of the trx id field, in bytes relative to the origin of
+Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
a clustered index record.
@return offset of DATA_TRX_ID */
UNIV_INLINE
ulint
-row_get_trx_id_offset_func(
-/*=======================*/
-#ifdef UNIV_DEBUG
- const rec_t* rec, /*!< in: record */
-#endif /* UNIV_DEBUG */
+row_get_trx_id_offset(
+/*==================*/
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const ulint* offsets)/*!< in: record offsets */
__attribute__((nonnull, warn_unused_result));
-#ifdef UNIV_DEBUG
-# define row_get_trx_id_offset(rec, index, offsets) \
- row_get_trx_id_offset_func(rec, index, offsets)
-#else /* UNIV_DEBUG */
-# define row_get_trx_id_offset(rec, index, offsets) \
- row_get_trx_id_offset_func(index, offsets)
-#endif /* UNIV_DEBUG */
-
/*********************************************************************//**
Reads the trx id field from a clustered index record.
@return value of the field */
@@ -77,9 +66,10 @@ UNIV_INLINE
roll_ptr_t
row_get_rec_roll_ptr(
/*=================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ __attribute__((nonnull, warn_unused_result));
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
the entry to be inserted into or purged from an index on the table.
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index 9d19e430e16..0b9ca982af8 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,25 +28,22 @@ Created 4/20/1996 Heikki Tuuri
#include "trx0undo.h"
/*********************************************************************//**
-Gets the offset of trx id field, in bytes relative to the origin of
+Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
a clustered index record.
@return offset of DATA_TRX_ID */
UNIV_INLINE
ulint
-row_get_trx_id_offset_func(
-/*=======================*/
-#ifdef UNIV_DEBUG
- const rec_t* rec, /*!< in: record */
-#endif /* UNIV_DEBUG */
+row_get_trx_id_offset(
+/*==================*/
const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const ulint* offsets)/*!< in: record offsets */
{
ulint pos;
ulint offset;
ulint len;
ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(rec_offs_validate(NULL, index, offsets));
pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
@@ -76,7 +73,7 @@ row_get_rec_trx_id(
offset = index->trx_id_offset;
if (!offset) {
- offset = row_get_trx_id_offset(rec, index, offsets);
+ offset = row_get_trx_id_offset(index, offsets);
}
return(trx_read_trx_id(rec + offset));
@@ -89,9 +86,9 @@ UNIV_INLINE
roll_ptr_t
row_get_rec_roll_ptr(
/*=================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint offset;
@@ -101,7 +98,7 @@ row_get_rec_roll_ptr(
offset = index->trx_id_offset;
if (!offset) {
- offset = row_get_trx_id_offset(rec, index, offsets);
+ offset = row_get_trx_id_offset(index, offsets);
}
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index 18e22f1eca9..0894ed373b0 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -171,7 +171,7 @@ row_upd_rec_sys_fields(
ulint offset = index->trx_id_offset;
if (!offset) {
- offset = row_get_trx_id_offset(rec, index, offsets);
+ offset = row_get_trx_id_offset(index, offsets);
}
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index 2ffd9fdafb5..5d15677ccce 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -603,16 +603,16 @@ rw_lock_x_unlock_direct(
ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
-#endif
-
if (lock->lock_word == 0) {
lock->recursive = FALSE;
UNIV_MEM_INVALID(&lock->writer_thread,
sizeof lock->writer_thread);
}
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
+#endif
+
lock->lock_word += X_LOCK_DECR;
ut_ad(!lock->waiters);
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index b823c9d5259..dd74ccee523 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -400,8 +400,10 @@ void
sync_thread_add_level(
/*==================*/
void* latch, /*!< in: pointer to a mutex or an rw-lock */
- ulint level); /*!< in: level in the latching order; if
+ ulint level, /*!< in: level in the latching order; if
SYNC_LEVEL_VARYING, nothing is done */
+ ibool relock) /*!< in: TRUE if re-entering an x-lock */
+ __attribute__((nonnull));
/******************************************************************//**
Removes a latch from the thread level array if it is found there.
@return TRUE if found in the array; it is no error if the latch is
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index dc0ca2285b9..9cec1933e9d 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -278,6 +278,17 @@ ibool
trx_in_trx_list(
/*============*/
trx_t* in_trx);/*!< in: trx */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/***********************************************************//**
+Assert that a transaction has been recovered.
+@return TRUE */
+UNIV_INLINE
+ibool
+trx_assert_recovered(
+/*=================*/
+ trx_id_t trx_id) /*!< in: transaction identifier */
+ __attribute__((warn_unused_result));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
/*****************************************************************//**
Updates the offset information about the end of the MySQL binlog entry
which corresponds to the transaction just being committed. In a MySQL
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
index 355f118a1ec..5e702b25325 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innobase/include/trx0sys.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -277,6 +277,28 @@ trx_get_on_id(
return(NULL);
}
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/***********************************************************//**
+Assert that a transaction has been recovered.
+@return TRUE */
+UNIV_INLINE
+ibool
+trx_assert_recovered(
+/*=================*/
+ trx_id_t trx_id) /*!< in: transaction identifier */
+{
+ trx_t* trx;
+
+ mutex_enter(&kernel_mutex);
+ trx = trx_get_on_id(trx_id);
+ ut_a(trx);
+ ut_a(trx->is_recovered);
+ mutex_exit(&kernel_mutex);
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
/****************************************************************//**
Returns the minumum trx id in trx list. This is the smallest id for which
the trx can possibly be active. (But, you must look at the trx->conc_state to
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 7b5e138501d..cb175c2c234 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -1,8 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2009, Sun Microsystems, Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
diff --git a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c
index 936762b986a..b8c492328e8 100644
--- a/storage/innobase/page/page0cur.c
+++ b/storage/innobase/page/page0cur.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1180,14 +1180,15 @@ page_cur_insert_rec_zip_reorg(
/* Before trying to reorganize the page,
store the number of preceding records on the page. */
pos = page_rec_get_n_recs_before(rec);
+ ut_ad(pos > 0);
if (page_zip_reorganize(block, index, mtr)) {
/* The page was reorganized: Find rec by seeking to pos,
and update *current_rec. */
- rec = page + PAGE_NEW_INFIMUM;
-
- while (--pos) {
- rec = page + rec_get_next_offs(rec, TRUE);
+ if (pos > 1) {
+ rec = page_rec_get_nth(page, pos - 1);
+ } else {
+ rec = page + PAGE_NEW_INFIMUM;
}
*current_rec = rec;
@@ -1283,6 +1284,12 @@ page_cur_insert_rec_zip(
insert_rec = page_cur_insert_rec_zip_reorg(
current_rec, block, index, insert_rec,
page, page_zip, mtr);
+#ifdef UNIV_DEBUG
+ if (insert_rec) {
+ rec_offs_make_valid(
+ insert_rec, index, offsets);
+ }
+#endif /* UNIV_DEBUG */
}
return(insert_rec);
diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
index 6064d028ae1..1c74a1d5cab 100644
--- a/storage/innobase/page/page0page.c
+++ b/storage/innobase/page/page0page.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1465,55 +1465,54 @@ page_dir_balance_slot(
}
}
-#ifndef UNIV_HOTBACKUP
/************************************************************//**
-Returns the middle record of the record list. If there are an even number
-of records in the list, returns the first record of the upper half-list.
-@return middle record */
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return nth record */
UNIV_INTERN
-rec_t*
-page_get_middle_rec(
-/*================*/
- page_t* page) /*!< in: page */
+const rec_t*
+page_rec_get_nth_const(
+/*===================*/
+ const page_t* page, /*!< in: page */
+ ulint nth) /*!< in: nth record */
{
- page_dir_slot_t* slot;
- ulint middle;
+ const page_dir_slot_t* slot;
ulint i;
ulint n_owned;
- ulint count;
- rec_t* rec;
-
- /* This many records we must leave behind */
- middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+ const rec_t* rec;
- count = 0;
+ ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
for (i = 0;; i++) {
slot = page_dir_get_nth_slot(page, i);
n_owned = page_dir_slot_get_n_owned(slot);
- if (count + n_owned > middle) {
+ if (n_owned > nth) {
break;
} else {
- count += n_owned;
+ nth -= n_owned;
}
}
ut_ad(i > 0);
slot = page_dir_get_nth_slot(page, i - 1);
- rec = (rec_t*) page_dir_slot_get_rec(slot);
- rec = page_rec_get_next(rec);
-
- /* There are now count records behind rec */
+ rec = page_dir_slot_get_rec(slot);
- for (i = 0; i < middle - count; i++) {
- rec = page_rec_get_next(rec);
+ if (page_is_comp(page)) {
+ do {
+ rec = page_rec_get_next_low(rec, TRUE);
+ ut_ad(rec);
+ } while (nth--);
+ } else {
+ do {
+ rec = page_rec_get_next_low(rec, FALSE);
+ ut_ad(rec);
+ } while (nth--);
}
return(rec);
}
-#endif /* !UNIV_HOTBACKUP */
/***************************************************************//**
Returns the number of records before the given record in chain.
@@ -1575,6 +1574,7 @@ page_rec_get_n_recs_before(
n--;
ut_ad(n >= 0);
+ ut_ad(n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
return((ulint) n);
}
diff --git a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c
index c92ad9dc25b..fb618beac7e 100644
--- a/storage/innobase/page/page0zip.c
+++ b/storage/innobase/page/page0zip.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -151,6 +151,20 @@ page_zip_empty_size(
#endif /* !UNIV_HOTBACKUP */
/*************************************************************//**
+Gets the number of elements in the dense page directory,
+including deleted records (the free list).
+@return number of elements in the dense page directory */
+UNIV_INLINE
+ulint
+page_zip_dir_elems(
+/*===============*/
+ const page_zip_des_t* page_zip) /*!< in: compressed page */
+{
+ /* Exclude the page infimum and supremum from the record count. */
+ return(page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW);
+}
+
+/*************************************************************//**
Gets the size of the compressed page trailer (the dense page directory),
including deleted records (the free list).
@return length of dense page directory, in bytes */
@@ -160,14 +174,42 @@ page_zip_dir_size(
/*==============*/
const page_zip_des_t* page_zip) /*!< in: compressed page */
{
- /* Exclude the page infimum and supremum from the record count. */
- ulint size = PAGE_ZIP_DIR_SLOT_SIZE
- * (page_dir_get_n_heap(page_zip->data)
- - PAGE_HEAP_NO_USER_LOW);
- return(size);
+ return(PAGE_ZIP_DIR_SLOT_SIZE * page_zip_dir_elems(page_zip));
+}
+
+/*************************************************************//**
+Gets an offset to the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@return offset of the dense page directory */
+UNIV_INLINE
+ulint
+page_zip_dir_start_offs(
+/*====================*/
+ const page_zip_des_t* page_zip, /*!< in: compressed page */
+ ulint n_dense) /*!< in: directory size */
+{
+ ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip));
+
+ return(page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
}
/*************************************************************//**
+Gets a pointer to the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@param[in] page_zip compressed page
+@param[in] n_dense number of entries in the directory
+@return pointer to the dense page directory */
+#define page_zip_dir_start_low(page_zip, n_dense) \
+ ((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense))
+/*************************************************************//**
+Gets a pointer to the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@param[in] page_zip compressed page
+@return pointer to the dense page directory */
+#define page_zip_dir_start(page_zip) \
+ page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip))
+
+/*************************************************************//**
Gets the size of the compressed page trailer (the dense page directory),
only including user records (excluding the free list).
@return length of dense page directory comprising existing records, in bytes */
@@ -2242,8 +2284,7 @@ zlib_done:
}
/* Restore the uncompressed columns in heap_no order. */
- storage = page_zip->data + page_zip_get_size(page_zip)
- - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
+ storage = page_zip_dir_start_low(page_zip, n_dense);
for (slot = 0; slot < n_dense; slot++) {
rec_t* rec = recs[slot];
@@ -2728,8 +2769,7 @@ zlib_done:
return(FALSE);
}
- storage = page_zip->data + page_zip_get_size(page_zip)
- - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
+ storage = page_zip_dir_start_low(page_zip, n_dense);
externs = storage - n_dense
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
@@ -3457,9 +3497,7 @@ page_zip_write_rec(
}
/* Write the data bytes. Store the uncompressed bytes separately. */
- storage = page_zip->data + page_zip_get_size(page_zip)
- - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
- * PAGE_ZIP_DIR_SLOT_SIZE;
+ storage = page_zip_dir_start(page_zip);
if (page_is_leaf(page)) {
ulint len;
@@ -3755,9 +3793,7 @@ corrupt:
field = page + offset;
storage = page_zip->data + z_offset;
- storage_end = page_zip->data + page_zip_get_size(page_zip)
- - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
- * PAGE_ZIP_DIR_SLOT_SIZE;
+ storage_end = page_zip_dir_start(page_zip);
heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
@@ -3793,7 +3829,9 @@ page_zip_write_node_ptr(
{
byte* field;
byte* storage;
+#ifdef UNIV_DEBUG
page_t* page = page_align(rec);
+#endif /* UNIV_DEBUG */
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
ut_ad(page_simple_validate_new(page));
@@ -3810,9 +3848,7 @@ page_zip_write_node_ptr(
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
UNIV_MEM_ASSERT_RW(rec, size);
- storage = page_zip->data + page_zip_get_size(page_zip)
- - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
- * PAGE_ZIP_DIR_SLOT_SIZE
+ storage = page_zip_dir_start(page_zip)
- (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
field = rec + size - REC_NODE_PTR_SIZE;
@@ -3861,7 +3897,9 @@ page_zip_write_trx_id_and_roll_ptr(
{
byte* field;
byte* storage;
+#ifdef UNIV_DEBUG
page_t* page = page_align(rec);
+#endif /* UNIV_DEBUG */
ulint len;
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
@@ -3879,9 +3917,7 @@ page_zip_write_trx_id_and_roll_ptr(
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
- storage = page_zip->data + page_zip_get_size(page_zip)
- - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
- * PAGE_ZIP_DIR_SLOT_SIZE
+ storage = page_zip_dir_start(page_zip)
- (rec_get_heap_no_new(rec) - 1)
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
@@ -3912,17 +3948,9 @@ page_zip_write_trx_id_and_roll_ptr(
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
}
-#ifdef UNIV_ZIP_DEBUG
-/** Set this variable in a debugger to disable page_zip_clear_rec().
-The only observable effect should be the compression ratio due to
-deleted records not being zeroed out. In rare cases, there can be
-page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
-columns if the space is reallocated for a smaller record. */
-UNIV_INTERN ibool page_zip_clear_rec_disable;
-#endif /* UNIV_ZIP_DEBUG */
-
/**********************************************************************//**
-Clear an area on the uncompressed and compressed page, if possible. */
+Clear an area on the uncompressed and compressed page.
+Do not clear the data payload, as that would grow the modification log. */
static
void
page_zip_clear_rec(
@@ -3934,6 +3962,9 @@ page_zip_clear_rec(
{
ulint heap_no;
page_t* page = page_align(rec);
+ byte* storage;
+ byte* field;
+ ulint len;
/* page_zip_validate() would fail here if a record
containing externally stored columns is being deleted. */
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -3949,60 +3980,38 @@ page_zip_clear_rec(
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
rec_offs_extra_size(offsets));
- if (
-#ifdef UNIV_ZIP_DEBUG
- !page_zip_clear_rec_disable &&
-#endif /* UNIV_ZIP_DEBUG */
- page_zip->m_end
- + 1 + ((heap_no - 1) >= 64)/* size of the log entry */
- + page_zip_get_trailer_len(page_zip,
- dict_index_is_clust(index), NULL)
- < page_zip_get_size(page_zip)) {
- byte* data;
-
- /* Clear only the data bytes, because the allocator and
- the decompressor depend on the extra bytes. */
- memset(rec, 0, rec_offs_data_size(offsets));
-
- if (!page_is_leaf(page)) {
- /* Clear node_ptr on the compressed page. */
- byte* storage = page_zip->data
- + page_zip_get_size(page_zip)
- - (page_dir_get_n_heap(page)
- - PAGE_HEAP_NO_USER_LOW)
- * PAGE_ZIP_DIR_SLOT_SIZE;
-
- memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
- 0, REC_NODE_PTR_SIZE);
- } else if (dict_index_is_clust(index)) {
- /* Clear trx_id and roll_ptr on the compressed page. */
- byte* storage = page_zip->data
- + page_zip_get_size(page_zip)
- - (page_dir_get_n_heap(page)
- - PAGE_HEAP_NO_USER_LOW)
- * PAGE_ZIP_DIR_SLOT_SIZE;
-
- memset(storage - (heap_no - 1)
- * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
- 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- }
+ if (!page_is_leaf(page)) {
+ /* Clear node_ptr. On the compressed page,
+ there is an array of node_ptr immediately before the
+ dense page directory, at the very end of the page. */
+ storage = page_zip_dir_start(page_zip);
+ ut_ad(dict_index_get_n_unique_in_tree(index) ==
+ rec_offs_n_fields(offsets) - 1);
+ field = rec_get_nth_field(rec, offsets,
+ rec_offs_n_fields(offsets) - 1,
+ &len);
+ ut_ad(len == REC_NODE_PTR_SIZE);
- /* Log that the data was zeroed out. */
- data = page_zip->data + page_zip->m_end;
- ut_ad(!*data);
- if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
- *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
- ut_ad(!*data);
- }
- *data++ = (byte) ((heap_no - 1) << 1 | 1);
- ut_ad(!*data);
- ut_ad((ulint) (data - page_zip->data)
- < page_zip_get_size(page_zip));
- page_zip->m_end = data - page_zip->data;
- page_zip->m_nonempty = TRUE;
- } else if (page_is_leaf(page) && dict_index_is_clust(index)) {
- /* Do not clear the record, because there is not enough space
- to log the operation. */
+ ut_ad(!rec_offs_any_extern(offsets));
+ memset(field, 0, REC_NODE_PTR_SIZE);
+ memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
+ 0, REC_NODE_PTR_SIZE);
+ } else if (dict_index_is_clust(index)) {
+ /* Clear trx_id and roll_ptr. On the compressed page,
+ there is an array of these fields immediately before the
+ dense page directory, at the very end of the page. */
+ const ulint trx_id_pos
+ = dict_col_get_clust_pos(
+ dict_table_get_sys_col(
+ index->table, DATA_TRX_ID), index);
+ storage = page_zip_dir_start(page_zip);
+ field = rec_get_nth_field(rec, offsets, trx_id_pos, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+
+ memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ memset(storage - (heap_no - 1)
+ * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+ 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
if (rec_offs_any_extern(offsets)) {
ulint i;
@@ -4011,15 +4020,18 @@ page_zip_clear_rec(
/* Clear all BLOB pointers in order to make
page_zip_validate() pass. */
if (rec_offs_nth_extern(offsets, i)) {
- ulint len;
- byte* field = rec_get_nth_field(
+ field = rec_get_nth_field(
rec, offsets, i, &len);
+ ut_ad(len
+ == BTR_EXTERN_FIELD_REF_SIZE);
memset(field + len
- BTR_EXTERN_FIELD_REF_SIZE,
0, BTR_EXTERN_FIELD_REF_SIZE);
}
}
}
+ } else {
+ ut_ad(!rec_offs_any_extern(offsets));
}
#ifdef UNIV_ZIP_DEBUG
diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c
index 5a96e608ab5..30fc28561fa 100644
--- a/storage/innobase/rem/rem0rec.c
+++ b/storage/innobase/rem/rem0rec.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -408,7 +408,7 @@ rec_init_offsets(
do {
ulint len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- len = offs += 4;
+ len = offs += REC_NODE_PTR_SIZE;
goto resolved;
}
@@ -640,7 +640,7 @@ rec_get_offsets_reverse(
do {
ulint len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- len = offs += 4;
+ len = offs += REC_NODE_PTR_SIZE;
goto resolved;
}
@@ -1131,9 +1131,9 @@ rec_convert_dtuple_to_rec_comp(
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
- ut_ad(len == 4);
+ ut_ad(len == REC_NODE_PTR_SIZE);
memcpy(end, dfield_get_data(field), len);
- end += 4;
+ end += REC_NODE_PTR_SIZE;
break;
}
diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
index e0414cde80d..715e376f8f9 100644
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -345,9 +345,9 @@ row_ins_clust_index_entry_by_modify(
return(DB_LOCK_TABLE_FULL);
}
- err = btr_cur_pessimistic_update(0, cursor,
- heap, big_rec, update,
- 0, thr, mtr);
+ err = btr_cur_pessimistic_update(
+ BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update,
+ 0, thr, mtr);
}
return(err);
@@ -1973,6 +1973,7 @@ row_ins_index_entry_low(
ulint modify = 0; /* remove warning */
rec_t* insert_rec;
rec_t* rec;
+ ulint* offsets;
ulint err;
ulint n_unique;
big_rec_t* big_rec = NULL;
@@ -2081,6 +2082,42 @@ row_ins_index_entry_low(
err = row_ins_clust_index_entry_by_modify(
mode, &cursor, &heap, &big_rec, entry,
thr, &mtr);
+
+ if (big_rec) {
+ ut_a(err == DB_SUCCESS);
+ /* Write out the externally stored
+ columns while still x-latching
+ index->lock and block->lock. We have
+ to mtr_commit(mtr) first, so that the
+ redo log will be written in the
+ correct order. Otherwise, we would run
+ into trouble on crash recovery if mtr
+ freed B-tree pages on which some of
+ the big_rec fields will be written. */
+ btr_cur_mtr_commit_and_start(&cursor, &mtr);
+
+ rec = btr_cur_get_rec(&cursor);
+ offsets = rec_get_offsets(
+ rec, index, NULL,
+ ULINT_UNDEFINED, &heap);
+
+ err = btr_store_big_rec_extern_fields(
+ index, btr_cur_get_block(&cursor),
+ rec, offsets, &mtr, FALSE, big_rec);
+ /* If writing big_rec fails (for
+ example, because of DB_OUT_OF_FILE_SPACE),
+ the record will be corrupted. Even if
+ we did not update any externally
+ stored columns, our update could cause
+ the record to grow so that a
+ non-updated column was selected for
+ external storage. This non-update
+ would not have been written to the
+ undo log, and thus the record cannot
+ be rolled back. */
+ ut_a(err == DB_SUCCESS);
+ goto stored_big_rec;
+ }
} else {
ut_ad(!n_ext);
err = row_ins_sec_index_entry_by_modify(
@@ -2109,8 +2146,6 @@ function_exit:
mtr_commit(&mtr);
if (UNIV_LIKELY_NULL(big_rec)) {
- rec_t* rec;
- ulint* offsets;
mtr_start(&mtr);
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
@@ -2124,6 +2159,7 @@ function_exit:
index, btr_cur_get_block(&cursor),
rec, offsets, &mtr, FALSE, big_rec);
+stored_big_rec:
if (modify) {
dtuple_big_rec_free(big_rec);
} else {
diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
index e1ada387729..a56d419d1f0 100644
--- a/storage/innobase/row/row0mysql.c
+++ b/storage/innobase/row/row0mysql.c
@@ -2015,41 +2015,13 @@ row_create_index_for_mysql(
trx_start_if_not_started(trx);
- /* Check that the same column does not appear twice in the index.
- Starting from 4.0.14, InnoDB should be able to cope with that, but
- safer not to allow them. */
-
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- ulint j;
-
- for (j = 0; j < i; j++) {
- if (0 == ut_strcmp(
- dict_index_get_nth_field(index, j)->name,
- dict_index_get_nth_field(index, i)->name)) {
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: column ", stderr);
- ut_print_name(stderr, trx, FALSE,
- dict_index_get_nth_field(
- index, i)->name);
- fputs(" appears twice in ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: This is not allowed"
- " in InnoDB.\n", stderr);
-
- err = DB_COL_APPEARS_TWICE_IN_INDEX;
-
- goto error_handling;
- }
- }
-
- /* Check also that prefix_len and actual length
- is less than that from DICT_MAX_FIELD_LEN_BY_FORMAT() */
+ for (i = 0; i < index->n_def; i++) {
+ /* Check that prefix_len and actual length
+ < DICT_MAX_INDEX_COL_LEN */
len = dict_index_get_nth_field(index, i)->prefix_len;
- if (field_lengths) {
+ if (field_lengths && field_lengths[i]) {
len = ut_max(len, field_lengths[i]);
}
@@ -2057,6 +2029,7 @@ row_create_index_for_mysql(
if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) {
err = DB_TOO_BIG_INDEX_COL;
+ dict_mem_index_free(index);
goto error_handling;
}
}
diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c
index abbea1b585f..74bc96b8191 100644
--- a/storage/innobase/row/row0row.c
+++ b/storage/innobase/row/row0row.c
@@ -101,12 +101,27 @@ row_build_index_entry(
dfield_copy(dfield, dfield2);
- if (dfield_is_null(dfield) || ind_field->prefix_len == 0) {
+ if (dfield_is_null(dfield)) {
continue;
}
- /* If a column prefix index, take only the prefix.
- Prefix-indexed columns may be externally stored. */
+ if (ind_field->prefix_len == 0
+ && (!dfield_is_ext(dfield)
+ || dict_index_is_clust(index))) {
+ /* The dfield_copy() above suffices for
+ columns that are stored in-page, or for
+ clustered index record columns that are not
+ part of a column prefix in the PRIMARY KEY. */
+ continue;
+ }
+
+ /* If the column is stored externally (off-page) in
+ the clustered index, it must be an ordering field in
+ the secondary index. In the Antelope format, only
+ prefix-indexed columns may be stored off-page in the
+ clustered index record. In the Barracuda format, also
+ fully indexed long CHAR or VARCHAR columns may be
+ stored off-page. */
ut_ad(col->ord_part);
if (UNIV_LIKELY_NULL(ext)) {
@@ -119,11 +134,32 @@ row_build_index_entry(
}
dfield_set_data(dfield, buf, len);
}
+
+ if (ind_field->prefix_len == 0) {
+ /* In the Barracuda format
+ (ROW_FORMAT=DYNAMIC or
+ ROW_FORMAT=COMPRESSED), we can have a
+ secondary index on an entire column
+ that is stored off-page in the
+ clustered index. As this is not a
+ prefix index (prefix_len == 0),
+ include the entire off-page column in
+ the secondary index record. */
+ continue;
+ }
} else if (dfield_is_ext(dfield)) {
+ /* This table should be in Antelope format
+ (ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT).
+ In that format, the maximum column prefix
+ index length is 767 bytes, and the clustered
+ index record contains a 768-byte prefix of
+ each off-page column. */
ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
len -= BTR_EXTERN_FIELD_REF_SIZE;
}
+ /* If a column prefix index, take only the prefix. */
+ ut_ad(ind_field->prefix_len);
len = dtype_get_at_most_n_mbchars(
col->prtype, col->mbminmaxlen,
ind_field->prefix_len, len, dfield_get_data(dfield));
@@ -192,6 +228,7 @@ row_build(
ut_ad(index && rec && heap);
ut_ad(dict_index_is_clust(index));
+ ut_ad(!mutex_own(&kernel_mutex));
if (!offsets) {
offsets = rec_get_offsets(rec, index, offsets_,
@@ -200,12 +237,20 @@ row_build(
ut_ad(rec_offs_validate(rec, index, offsets));
}
-#if 0 /* defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG */
- /* This one can fail in trx_rollback_active() if
- the server crashed during an insert before the
- btr_store_big_rec_extern_fields() did mtr_commit()
- all BLOB pointers to the clustered index record. */
- ut_a(!rec_offs_any_null_extern(rec, offsets));
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ /* This condition can occur during crash recovery before
+ trx_rollback_active() has completed execution.
+
+ This condition is possible if the server crashed
+ during an insert or update before
+ btr_store_big_rec_extern_fields() did mtr_commit() all
+ BLOB pointers to the clustered index record.
+
+ If the record contains a null BLOB pointer, look up the
+ transaction that holds the implicit lock on this record, and
+ assert that it was recovered (and will soon be rolled back). */
+ ut_a(!rec_offs_any_null_extern(rec, offsets)
+ || trx_assert_recovered(row_get_rec_trx_id(rec, index, offsets)));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
if (type != ROW_COPY_POINTERS) {
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index 04b3dcb3a4a..a2f6c17413f 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1999,28 +1999,43 @@ row_upd_clust_rec(
ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
dict_table_is_comp(index->table)));
- err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
- &heap, &big_rec, node->update,
- node->cmpl_info, thr, mtr);
- mtr_commit(mtr);
-
- if (err == DB_SUCCESS && big_rec) {
+ err = btr_cur_pessimistic_update(
+ BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
+ &heap, &big_rec, node->update, node->cmpl_info, thr, mtr);
+ if (big_rec) {
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_t* rec;
rec_offs_init(offsets_);
- mtr_start(mtr);
+ ut_a(err == DB_SUCCESS);
+ /* Write out the externally stored columns while still
+ x-latching index->lock and block->lock. We have to
+ mtr_commit(mtr) first, so that the redo log will be
+ written in the correct order. Otherwise, we would run
+ into trouble on crash recovery if mtr freed B-tree
+ pages on which some of the big_rec fields will be
+ written. */
+ btr_cur_mtr_commit_and_start(btr_cur, mtr);
- ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(btr_cur), rec,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
mtr, TRUE, big_rec);
- mtr_commit(mtr);
+ /* If writing big_rec fails (for example, because of
+ DB_OUT_OF_FILE_SPACE), the record will be corrupted.
+ Even if we did not update any externally stored
+ columns, our update could cause the record to grow so
+ that a non-updated column was selected for external
+ storage. This non-update would not have been written
+ to the undo log, and thus the record cannot be rolled
+ back. */
+ ut_a(err == DB_SUCCESS);
}
+ mtr_commit(mtr);
+
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c
index 397d505df50..5b72e0afdf4 100644
--- a/storage/innobase/sync/sync0rw.c
+++ b/storage/innobase/sync/sync0rw.c
@@ -782,7 +782,9 @@ rw_lock_add_debug_info(
rw_lock_debug_mutex_exit();
if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_add_level(lock, lock->level);
+ sync_thread_add_level(lock, lock->level,
+ lock_type == RW_LOCK_EX
+ && lock->lock_word < 0);
}
}
diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
index 0b56e736209..251a392a02c 100644
--- a/storage/innobase/sync/sync0sync.c
+++ b/storage/innobase/sync/sync0sync.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -690,7 +690,7 @@ mutex_set_debug_info(
ut_ad(mutex);
ut_ad(file_name);
- sync_thread_add_level(mutex, mutex->level);
+ sync_thread_add_level(mutex, mutex->level, FALSE);
mutex->file_name = file_name;
mutex->line = line;
@@ -1133,8 +1133,9 @@ void
sync_thread_add_level(
/*==================*/
void* latch, /*!< in: pointer to a mutex or an rw-lock */
- ulint level) /*!< in: level in the latching order; if
+ ulint level, /*!< in: level in the latching order; if
SYNC_LEVEL_VARYING, nothing is done */
+ ibool relock) /*!< in: TRUE if re-entering an x-lock */
{
ulint i;
sync_level_t* slot;
@@ -1185,6 +1186,10 @@ sync_thread_add_level(
array = thread_slot->levels;
+ if (relock) {
+ goto levels_ok;
+ }
+
/* NOTE that there is a problem with _NODE and _LEAF levels: if the
B-tree height changes, then a leaf can change to an internal node
or the other way around. We do not know at present if this can cause
@@ -1350,6 +1355,7 @@ sync_thread_add_level(
ut_error;
}
+levels_ok:
if (array->next_free == ULINT_UNDEFINED) {
ut_a(array->n_elems < array->max_elems);
diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c
index a9c0d381e16..1ef1a082bb2 100644
--- a/storage/innobase/ut/ut0ut.c
+++ b/storage/innobase/ut/ut0ut.c
@@ -674,8 +674,6 @@ ut_strerr(
return("Cannot add constraint");
case DB_CORRUPTION:
return("Data structure corruption");
- case DB_COL_APPEARS_TWICE_IN_INDEX:
- return("Column appears twice in index");
case DB_CANNOT_DROP_CONSTRAINT:
return("Cannot drop constraint");
case DB_NO_SAVEPOINT:
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index 94b335f91f6..8f7400308ba 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index 05acfd1fa1d..7e96b54c57f 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 2a47dba13d4..f4b78939778 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c
index cb05dc7eacc..8873b8f71fb 100644
--- a/storage/myisam/mi_create.c
+++ b/storage/myisam/mi_create.c
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisam/mi_range.c b/storage/myisam/mi_range.c
index 13292c41600..a86503e0cc3 100644
--- a/storage/myisam/mi_range.c
+++ b/storage/myisam/mi_range.c
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c
index 160fdd8473e..9e0d4fb511f 100644
--- a/storage/myisam/mi_test1.c
+++ b/storage/myisam/mi_test1.c
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisam/mi_update.c b/storage/myisam/mi_update.c
index 26bb4c3a56f..8228771dc79 100644
--- a/storage/myisam/mi_update.c
+++ b/storage/myisam/mi_update.c
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c
index 5ec68a7d5b4..246255395df 100644
--- a/storage/myisam/mi_write.c
+++ b/storage/myisam/mi_write.c
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index 6e469231616..6b4fc8ea59f 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h
index 2fc47e9b8bd..21974e91581 100644
--- a/storage/myisammrg/ha_myisammrg.h
+++ b/storage/myisammrg/ha_myisammrg.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/ndb/src/kernel/blocks/lgman.cpp b/storage/ndb/src/kernel/blocks/lgman.cpp
index 7dc71e7399a..cd3fc0d4fbb 100644
--- a/storage/ndb/src/kernel/blocks/lgman.cpp
+++ b/storage/ndb/src/kernel/blocks/lgman.cpp
@@ -1,4 +1,5 @@
-/* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+/*
+ Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@@ -12,8 +13,8 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- 02110-1301 USA */
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
#include "lgman.hpp"
#include "diskpage.hpp"