summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@oracle.com>2011-10-26 11:44:28 +0300
committerMarko Mäkelä <marko.makela@oracle.com>2011-10-26 11:44:28 +0300
commit91b5e9352a82b096197aa9f24f149cf3bf892b8a (patch)
tree644aa74ed246e7c4530392e1c9e24800dbfdfdc9 /storage
parent5cd2cb0c4f6f63d44f9d130d4f2e1f11b5d4064e (diff)
downloadmariadb-git-91b5e9352a82b096197aa9f24f149cf3bf892b8a.tar.gz
Revert most of revno 3560.9.1 (Bug#12704861)
This was an attempt to address problems with the Bug#12612184 fix. Even with this follow-up fix, crash recovery can be broken. Let us fix the bug later.
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/btr/btr0btr.c211
-rw-r--r--storage/innobase/btr/btr0cur.c92
-rw-r--r--storage/innobase/fsp/fsp0fsp.c159
-rw-r--r--storage/innobase/include/btr0btr.h31
-rw-r--r--storage/innobase/include/btr0cur.h14
-rw-r--r--storage/innobase/include/fsp0fsp.h6
-rw-r--r--storage/innobase/include/mtr0mtr.h7
-rw-r--r--storage/innobase/include/mtr0mtr.ic4
-rw-r--r--storage/innobase/mtr/mtr0mtr.c10
-rw-r--r--storage/innobase/row/row0ins.c32
-rw-r--r--storage/innobase/row/row0row.c38
-rw-r--r--storage/innobase/row/row0upd.c23
-rw-r--r--storage/innobase/trx/trx0undo.c2
-rw-r--r--storage/innodb_plugin/ChangeLog9
-rw-r--r--storage/innodb_plugin/btr/btr0btr.c211
-rw-r--r--storage/innodb_plugin/btr/btr0cur.c114
-rw-r--r--storage/innodb_plugin/fsp/fsp0fsp.c224
-rw-r--r--storage/innodb_plugin/include/btr0btr.h34
-rw-r--r--storage/innodb_plugin/include/btr0cur.h38
-rw-r--r--storage/innodb_plugin/include/fsp0fsp.h25
-rw-r--r--storage/innodb_plugin/include/mtr0mtr.h11
-rw-r--r--storage/innodb_plugin/include/mtr0mtr.ic4
-rw-r--r--storage/innodb_plugin/mtr/mtr0mtr.c5
-rw-r--r--storage/innodb_plugin/row/row0ins.c31
-rw-r--r--storage/innodb_plugin/row/row0row.c27
-rw-r--r--storage/innodb_plugin/row/row0upd.c23
-rw-r--r--storage/innodb_plugin/trx/trx0undo.c2
27 files changed, 415 insertions, 972 deletions
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
index ad99913cf3b..e8e065a3116 100644
--- a/storage/innobase/btr/btr0btr.c
+++ b/storage/innobase/btr/btr0btr.c
@@ -300,30 +300,29 @@ btr_page_alloc_for_ibuf(
/******************************************************************
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents! */
-static
-ulint
-btr_page_alloc_low(
-/*===============*/
- /* out: allocated page number,
- FIL_NULL if out of space */
+
+page_t*
+btr_page_alloc(
+/*===========*/
+ /* out: new allocated page, x-latched;
+ NULL if out of space */
dict_index_t* index, /* in: index */
ulint hint_page_no, /* in: hint of a good page */
byte file_direction, /* in: direction where a possible
page split is made */
ulint level, /* in: level where the page is placed
in the tree */
- mtr_t* mtr, /* in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr) /* in/out: mini-transaction
- in which the page should be
- initialized (may be the same
- as mtr), or NULL if it should
- not be initialized (the page
- at hint was previously freed
- in mtr) */
+ mtr_t* mtr) /* in: mtr */
{
fseg_header_t* seg_header;
page_t* root;
+ page_t* new_page;
+ ulint new_page_no;
+
+ if (index->type & DICT_IBUF) {
+
+ return(btr_page_alloc_for_ibuf(index, mtr));
+ }
root = btr_root_get(index, mtr);
@@ -337,61 +336,19 @@ btr_page_alloc_low(
reservation for free extents, and thus we know that a page can
be allocated: */
- return(fseg_alloc_free_page_general(seg_header, hint_page_no,
- file_direction, TRUE,
- mtr, init_mtr));
-}
-
-/**************************************************************//**
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents! */
-
-page_t*
-btr_page_alloc(
-/*===========*/
- /* out: new allocated block, x-latched;
- NULL if out of space */
- dict_index_t* index, /* in: index */
- ulint hint_page_no, /* in: hint of a good page */
- byte file_direction, /* in: direction where a possible
- page split is made */
- ulint level, /* in: level where the page is placed
- in the tree */
- mtr_t* mtr, /* in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr) /* in/out: mini-transaction
- for x-latching and initializing
- the page */
-{
- page_t* new_page;
- ulint new_page_no;
-
- if (index->type & DICT_IBUF) {
-
- return(btr_page_alloc_for_ibuf(index, mtr));
- }
-
- new_page_no = btr_page_alloc_low(
- index, hint_page_no, file_direction, level, mtr, init_mtr);
-
+ new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
+ file_direction, TRUE, mtr);
if (new_page_no == FIL_NULL) {
return(NULL);
}
new_page = buf_page_get(dict_index_get_space(index), new_page_no,
- RW_X_LATCH, init_mtr);
+ RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
#endif /* UNIV_SYNC_DEBUG */
- if (mtr->freed_clust_leaf) {
- mtr_memo_release(mtr, new_page, MTR_MEMO_FREE_CLUST_LEAF);
- ut_ad(!mtr_memo_contains(mtr, buf_block_align(new_page),
- MTR_MEMO_FREE_CLUST_LEAF));
- }
-
- ut_ad(btr_freed_leaves_validate(mtr));
return(new_page);
}
@@ -538,138 +495,8 @@ btr_page_free(
level = btr_page_get_level(page, mtr);
btr_page_free_low(index, page, level, mtr);
-
- /* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
-
- if (level == 0 && (index->type & DICT_CLUSTERED)) {
- /* We may have to call btr_mark_freed_leaves() to
- temporarily mark the block nonfree for invoking
- btr_store_big_rec_extern_fields() after an
- update. Remember that the block was freed. */
- mtr->freed_clust_leaf = TRUE;
- mtr_memo_push(mtr, buf_block_align(page),
- MTR_MEMO_FREE_CLUST_LEAF);
- }
-
- ut_ad(btr_freed_leaves_validate(mtr));
}
-/**************************************************************//**
-Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
-For invoking btr_store_big_rec_extern_fields() after an update,
-we must temporarily mark freed clustered index pages allocated, so
-that off-page columns will not be allocated from them. Between the
-btr_store_big_rec_extern_fields() and mtr_commit() we have to
-mark the pages free again, so that no pages will be leaked. */
-
-void
-btr_mark_freed_leaves(
-/*==================*/
- dict_index_t* index, /* in/out: clustered index */
- mtr_t* mtr, /* in/out: mini-transaction */
- ibool nonfree)/* in: TRUE=mark nonfree, FALSE=mark freed */
-{
- /* This is loosely based on mtr_memo_release(). */
-
- ulint offset;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- if (!mtr->freed_clust_leaf) {
- return;
- }
-
- offset = dyn_array_get_data_size(&mtr->memo);
-
- while (offset > 0) {
- mtr_memo_slot_t* slot;
- buf_block_t* block;
-
- offset -= sizeof *slot;
-
- slot = dyn_array_get_element(&mtr->memo, offset);
-
- if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
- continue;
- }
-
- /* Because btr_page_alloc() does invoke
- mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
- blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
- memo must still be clustered index leaf tree pages. */
- block = slot->object;
- ut_a(buf_block_get_space(block)
- == dict_index_get_space(index));
- ut_a(fil_page_get_type(buf_block_get_frame(block))
- == FIL_PAGE_INDEX);
- ut_a(btr_page_get_level(buf_block_get_frame(block), mtr) == 0);
-
- if (nonfree) {
- /* Allocate the same page again. */
- ulint page_no;
- page_no = btr_page_alloc_low(
- index, buf_block_get_page_no(block),
- FSP_NO_DIR, 0, mtr, NULL);
- ut_a(page_no == buf_block_get_page_no(block));
- } else {
- /* Assert that the page is allocated and free it. */
- btr_page_free_low(index, buf_block_get_frame(block),
- 0, mtr);
- }
- }
-
- ut_ad(btr_freed_leaves_validate(mtr));
-}
-
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
-See btr_mark_freed_leaves(). */
-
-ibool
-btr_freed_leaves_validate(
-/*======================*/
- /* out: TRUE if valid */
- mtr_t* mtr) /* in: mini-transaction */
-{
- ulint offset;
-
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- offset = dyn_array_get_data_size(&mtr->memo);
-
- while (offset > 0) {
- mtr_memo_slot_t* slot;
- buf_block_t* block;
-
- offset -= sizeof *slot;
-
- slot = dyn_array_get_element(&mtr->memo, offset);
-
- if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
- continue;
- }
-
- ut_a(mtr->freed_clust_leaf);
- /* Because btr_page_alloc() does invoke
- mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
- blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
- memo must still be clustered index leaf tree pages. */
- block = slot->object;
- ut_a(fil_page_get_type(buf_block_get_frame(block))
- == FIL_PAGE_INDEX);
- ut_a(btr_page_get_level(buf_block_get_frame(block), mtr) == 0);
- }
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
/******************************************************************
Sets the child node file address in a node pointer. */
UNIV_INLINE
@@ -1199,7 +1026,7 @@ btr_root_raise_and_insert(
a node pointer to the new page, and then splitting the new page. */
new_page = btr_page_alloc(index, 0, FSP_NO_DIR,
- btr_page_get_level(root, mtr), mtr, mtr);
+ btr_page_get_level(root, mtr), mtr);
btr_page_create(new_page, index, mtr);
@@ -1820,7 +1647,7 @@ func_start:
/* 2. Allocate a new page to the index */
new_page = btr_page_alloc(cursor->index, hint_page_no, direction,
- btr_page_get_level(page, mtr), mtr, mtr);
+ btr_page_get_level(page, mtr), mtr);
btr_page_create(new_page, cursor->index, mtr);
/* 3. Calculate the first record on the upper half-page, and the
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index 37bb3188785..95d87344e93 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -2051,6 +2051,43 @@ return_after_reservations:
return(err);
}
+/*****************************************************************
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+ btr_cur_t* cursor, /* in: cursor */
+ mtr_t* mtr) /* in/out: mini-transaction */
+{
+ buf_block_t* block;
+
+ block = buf_block_align(btr_cur_get_rec(cursor));
+
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ /* Keep the locks across the mtr_commit(mtr). */
+ rw_lock_x_lock(dict_index_get_lock(cursor->index));
+ rw_lock_x_lock(&block->lock);
+ mutex_enter(&block->mutex);
+#ifdef UNIV_SYNC_DEBUG
+ buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
+#else
+ buf_block_buf_fix_inc(block);
+#endif
+ mutex_exit(&block->mutex);
+ /* Write out the redo log. */
+ mtr_commit(mtr);
+ mtr_start(mtr);
+ /* Reassociate the locks with the mini-transaction.
+ They will be released on mtr_commit(mtr). */
+ mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK);
+ mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
+}
+
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
/********************************************************************
@@ -3449,11 +3486,6 @@ btr_store_big_rec_extern_fields(
this function returns */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
- mtr_t* alloc_mtr, /* in/out: in an insert, NULL;
- in an update, local_mtr for
- allocating BLOB pages and
- updating BLOB pointers; alloc_mtr
- must not have freed any leaf pages */
mtr_t* local_mtr __attribute__((unused))) /* in: mtr
containing the latch to rec and to the
tree */
@@ -3474,8 +3506,6 @@ btr_store_big_rec_extern_fields(
ulint i;
mtr_t mtr;
- ut_ad(local_mtr);
- ut_ad(!alloc_mtr || alloc_mtr == local_mtr);
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
@@ -3485,25 +3515,6 @@ btr_store_big_rec_extern_fields(
space_id = buf_frame_get_space_id(rec);
- if (alloc_mtr) {
- /* Because alloc_mtr will be committed after
- mtr, it is possible that the tablespace has been
- extended when the B-tree record was updated or
- inserted, or it will be extended while allocating
- pages for big_rec.
-
- TODO: In mtr (not alloc_mtr), write a redo log record
- about extending the tablespace to its current size,
- and remember the current size. Whenever the tablespace
- grows as pages are allocated, write further redo log
- records to mtr. (Currently tablespace extension is not
- covered by the redo log. If it were, the record would
- only be written to alloc_mtr, which is committed after
- mtr.) */
- } else {
- alloc_mtr = &mtr;
- }
-
/* We have to create a file segment to the tablespace
for each field and put the pointer to the field in rec */
@@ -3530,7 +3541,7 @@ btr_store_big_rec_extern_fields(
}
page = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, alloc_mtr, &mtr);
+ FSP_NO_DIR, 0, &mtr);
if (page == NULL) {
mtr_commit(&mtr);
@@ -3584,42 +3595,37 @@ btr_store_big_rec_extern_fields(
extern_len -= store_len;
- if (alloc_mtr == &mtr) {
#ifdef UNIV_SYNC_DEBUG
- rec_page =
+ rec_page =
#endif /* UNIV_SYNC_DEBUG */
- buf_page_get(
- space_id,
- buf_frame_get_page_no(data),
- RW_X_LATCH, &mtr);
+ buf_page_get(space_id,
+ buf_frame_get_page_no(data),
+ RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(
- rec_page, SYNC_NO_ORDER_CHECK);
+ buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
- }
-
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, alloc_mtr);
+ MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
big_rec_vec->fields[i].len
- extern_len,
- MLOG_4BYTES, alloc_mtr);
+ MLOG_4BYTES, &mtr);
if (prev_page_no == FIL_NULL) {
mlog_write_ulint(data + local_len
+ BTR_EXTERN_SPACE_ID,
space_id,
- MLOG_4BYTES, alloc_mtr);
+ MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len
+ BTR_EXTERN_PAGE_NO,
page_no,
- MLOG_4BYTES, alloc_mtr);
+ MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len
+ BTR_EXTERN_OFFSET,
FIL_PAGE_DATA,
- MLOG_4BYTES, alloc_mtr);
+ MLOG_4BYTES, &mtr);
/* Set the bit denoting that this field
in rec is stored externally */
@@ -3627,7 +3633,7 @@ btr_store_big_rec_extern_fields(
rec_set_nth_field_extern_bit(
rec, index,
big_rec_vec->fields[i].field_no,
- TRUE, alloc_mtr);
+ TRUE, &mtr);
}
prev_page_no = page_no;
diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
index d5be8fca38f..90e6ad34a9a 100644
--- a/storage/innobase/fsp/fsp0fsp.c
+++ b/storage/innobase/fsp/fsp0fsp.c
@@ -300,12 +300,8 @@ fseg_alloc_free_page_low(
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr, /* in/out: mini-transaction */
- mtr_t* init_mtr);/* in/out: mini-transaction in which the
- page should be initialized
- (may be the same as mtr), or NULL if it
- should not be initialized (the page at hint
- was previously freed in mtr) */
+ mtr_t* mtr); /* in/out: mini-transaction */
+
/**************************************************************************
Reads the file space size stored in the header page. */
@@ -1375,43 +1371,6 @@ fsp_alloc_free_extent(
return(descr);
}
-/**********************************************************************//**
-Allocates a single free page from a space. */
-static __attribute__((nonnull))
-void
-fsp_alloc_from_free_frag(
-/*=====================*/
- fsp_header_t* header, /* in/out: tablespace header */
- xdes_t* descr, /* in/out: extent descriptor */
- ulint bit, /* in: slot to allocate in the extent */
- mtr_t* mtr) /* in/out: mini-transaction */
-{
- ulint frag_n_used;
-
- ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
- ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr));
- xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
-
- /* Update the FRAG_N_USED field */
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- frag_n_used++;
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
- mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FULL_FRAG, mtr);
-
- flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
- mtr);
- }
-}
-
/**************************************************************************
Allocates a single free page from a space. The page is marked as used. */
static
@@ -1422,22 +1381,19 @@ fsp_alloc_free_page(
be allocated */
ulint space, /* in: space id */
ulint hint, /* in: hint of which page would be desirable */
- mtr_t* mtr, /* in/out: mini-transaction */
- mtr_t* init_mtr)/* in/out: mini-transaction in which the
- page should be initialized
- (may be the same as mtr) */
+ mtr_t* mtr) /* in/out: mini-transaction */
{
fsp_header_t* header;
fil_addr_t first;
xdes_t* descr;
page_t* page;
ulint free;
+ ulint frag_n_used;
ulint page_no;
ulint space_size;
ibool success;
ut_ad(mtr);
- ut_ad(init_mtr);
header = fsp_get_space_header(space, mtr);
@@ -1517,21 +1473,40 @@ fsp_alloc_free_page(
}
}
- fsp_alloc_from_free_frag(header, descr, free, mtr);
+ xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
+
+ /* Update the FRAG_N_USED field */
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
+ mtr);
+ frag_n_used++;
+ mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
+ mtr);
+ if (xdes_is_full(descr, mtr)) {
+ /* The fragment is full: move it to another list */
+ flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ xdes_set_state(descr, XDES_FULL_FRAG, mtr);
+
+ flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
+ mtr);
+ }
/* Initialize the allocated page to the buffer pool, so that it can
be obtained immediately with buf_page_get without need for a disk
read. */
- buf_page_create(space, page_no, init_mtr);
+ buf_page_create(space, page_no, mtr);
- page = buf_page_get(space, page_no, RW_X_LATCH, init_mtr);
+ page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
/* Prior contents of the page should be ignored */
- fsp_init_file_page(page, init_mtr);
+ fsp_init_file_page(page, mtr);
return(page_no);
}
@@ -1750,7 +1725,7 @@ fsp_alloc_seg_inode_page(
space = buf_frame_get_space_id(space_header);
- page_no = fsp_alloc_free_page(space, 0, mtr, mtr);
+ page_no = fsp_alloc_free_page(space, 0, mtr);
if (page_no == FIL_NULL) {
@@ -2120,8 +2095,7 @@ fseg_create_general(
}
if (page == 0) {
- page = fseg_alloc_free_page_low(space,
- inode, 0, FSP_UP, mtr, mtr);
+ page = fseg_alloc_free_page_low(space, inode, 0, FSP_UP, mtr);
if (page == FIL_NULL) {
@@ -2365,12 +2339,7 @@ fseg_alloc_free_page_low(
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr, /* in/out: mini-transaction */
- mtr_t* init_mtr)/* in/out: mini-transaction in which the
- page should be initialized
- (may be the same as mtr), or NULL if it
- should not be initialized (the page at hint
- was previously freed in mtr) */
+ mtr_t* mtr) /* in/out: mini-transaction */
{
fsp_header_t* space_header;
ulint space_size;
@@ -2382,6 +2351,7 @@ fseg_alloc_free_page_low(
if could not be allocated */
xdes_t* ret_descr; /* the extent of the allocated page */
page_t* page;
+ ibool frag_page_allocated = FALSE;
ibool success;
ulint n;
@@ -2402,8 +2372,6 @@ fseg_alloc_free_page_low(
if (descr == NULL) {
/* Hint outside space or too high above free limit: reset
hint */
- ut_a(init_mtr);
- /* The file space header page is always allocated. */
hint = 0;
descr = xdes_get_descriptor(space, hint, mtr);
}
@@ -2415,20 +2383,15 @@ fseg_alloc_free_page_low(
mtr), seg_id))
&& (xdes_get_bit(descr, XDES_FREE_BIT,
hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
-take_hinted_page:
+
/* 1. We can take the hinted page
=================================*/
ret_descr = descr;
ret_page = hint;
- /* Skip the check for extending the tablespace. If the
- page hint were not within the size of the tablespace,
- we would have got (descr == NULL) above and reset the hint. */
- goto got_hinted_page;
/*-----------------------------------------------------------*/
- } else if (xdes_get_state(descr, mtr) == XDES_FREE
- && (!init_mtr
- || ((reserved - used < reserved / FSEG_FILLFACTOR)
- && used >= FSEG_FRAG_LIMIT))) {
+ } else if ((xdes_get_state(descr, mtr) == XDES_FREE)
+ && ((reserved - used) < reserved / FSEG_FILLFACTOR)
+ && (used >= FSEG_FRAG_LIMIT)) {
/* 2. We allocate the free extent from space and can take
=========================================================
@@ -2446,20 +2409,8 @@ take_hinted_page:
/* Try to fill the segment free list */
fseg_fill_free_list(seg_inode, space,
hint + FSP_EXTENT_SIZE, mtr);
- goto take_hinted_page;
- /*-----------------------------------------------------------*/
- } else if (!init_mtr) {
- ut_a(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
- fsp_alloc_from_free_frag(space_header, descr,
- hint % FSP_EXTENT_SIZE, mtr);
ret_page = hint;
- ret_descr = NULL;
-
- /* Put the page in the fragment page array of the segment */
- n = fseg_find_free_frag_page_slot(seg_inode, mtr);
- ut_a(n != FIL_NULL);
- fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr);
- goto got_hinted_page;
+ /*-----------------------------------------------------------*/
} else if ((direction != FSP_NO_DIR)
&& ((reserved - used) < reserved / FSEG_FILLFACTOR)
&& (used >= FSEG_FRAG_LIMIT)
@@ -2517,9 +2468,11 @@ take_hinted_page:
} else if (used < FSEG_FRAG_LIMIT) {
/* 6. We allocate an individual page from the space
===================================================*/
- ret_page = fsp_alloc_free_page(space, hint, mtr, init_mtr);
+ ret_page = fsp_alloc_free_page(space, hint, mtr);
ret_descr = NULL;
+ frag_page_allocated = TRUE;
+
if (ret_page != FIL_NULL) {
/* Put the page in the fragment page array of the
segment */
@@ -2529,10 +2482,6 @@ take_hinted_page:
fseg_set_nth_frag_page_no(seg_inode, n, ret_page,
mtr);
}
-
- /* fsp_alloc_free_page() invoked fsp_init_file_page()
- already. */
- return(ret_page);
/*-----------------------------------------------------------*/
} else {
/* 7. We allocate a new extent and take its first page
@@ -2579,31 +2528,22 @@ take_hinted_page:
}
}
-got_hinted_page:
- {
+ if (!frag_page_allocated) {
/* Initialize the allocated page to buffer pool, so that it
can be obtained immediately with buf_page_get without need
for a disk read */
- mtr_t* block_mtr = init_mtr ? init_mtr : mtr;
- page = buf_page_create(space, ret_page, block_mtr);
+ page = buf_page_create(space, ret_page, mtr);
- ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH,
- block_mtr));
+ ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, mtr));
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
- if (init_mtr) {
- /* The prior contents of the page should be ignored */
- fsp_init_file_page(page, init_mtr);
- }
- }
+ /* The prior contents of the page should be ignored */
+ fsp_init_file_page(page, mtr);
- /* ret_descr == NULL if the block was allocated from free_frag
- (XDES_FREE_FRAG) */
- if (ret_descr != NULL) {
/* At this point we know the extent and the page offset.
The extent is still in the appropriate list (FSEG_NOT_FULL
or FSEG_FREE), and the page is not yet marked as used. */
@@ -2640,11 +2580,7 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr, /* in/out: mini-transaction handle */
- mtr_t* init_mtr)/* in/out: mtr or another mini-transaction
- in which the page should be initialized,
- or NULL if this is a "fake allocation" of
- a page that was previously freed in mtr */
+ mtr_t* mtr) /* in/out: mini-transaction */
{
fseg_inode_t* inode;
ulint space;
@@ -2682,8 +2618,7 @@ fseg_alloc_free_page_general(
}
page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode),
- inode, hint, direction,
- mtr, init_mtr);
+ inode, hint, direction, mtr);
if (!has_done_reservation) {
fil_space_release_free_extents(space, n_reserved);
}
@@ -2711,7 +2646,7 @@ fseg_alloc_free_page(
mtr_t* mtr) /* in: mtr handle */
{
return(fseg_alloc_free_page_general(seg_header, hint, direction,
- FALSE, mtr, mtr));
+ FALSE, mtr));
}
/**************************************************************************
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 3988019589d..269fa355558 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -379,11 +379,7 @@ btr_page_alloc(
page split is made */
ulint level, /* in: level where the page is placed
in the tree */
- mtr_t* mtr, /* in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr); /* in/out: mini-transaction
- for x-latching and initializing
- the page */
+ mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
@@ -406,31 +402,6 @@ btr_page_free_low(
page_t* page, /* in: page to be freed, x-latched */
ulint level, /* in: page level */
mtr_t* mtr); /* in: mtr */
-/**************************************************************//**
-Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
-For invoking btr_store_big_rec_extern_fields() after an update,
-we must temporarily mark freed clustered index pages allocated, so
-that off-page columns will not be allocated from them. Between the
-btr_store_big_rec_extern_fields() and mtr_commit() we have to
-mark the pages free again, so that no pages will be leaked. */
-
-void
-btr_mark_freed_leaves(
-/*==================*/
- dict_index_t* index, /* in/out: clustered index */
- mtr_t* mtr, /* in/out: mini-transaction */
- ibool nonfree);/* in: TRUE=mark nonfree, FALSE=mark freed */
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
-See btr_mark_freed_leaves(). */
-
-ibool
-btr_freed_leaves_validate(
-/*======================*/
- /* out: TRUE if valid */
- mtr_t* mtr); /* in: mini-transaction */
-#endif /* UNIV_DEBUG */
#ifdef UNIV_BTR_PRINT
/*****************************************************************
Prints size info of a B-tree. */
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index c2bf84ef9cb..c068d8d3318 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -252,6 +252,15 @@ btr_cur_pessimistic_update(
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
+/*****************************************************************
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+ btr_cur_t* cursor, /* in: cursor */
+ mtr_t* mtr); /* in/out: mini-transaction */
/***************************************************************
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
@@ -462,11 +471,6 @@ btr_store_big_rec_extern_fields(
this function returns */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
- mtr_t* alloc_mtr, /* in/out: in an insert, NULL;
- in an update, local_mtr for
- allocating BLOB pages and
- updating BLOB pointers; alloc_mtr
- must not have freed any leaf pages */
mtr_t* local_mtr); /* in: mtr containing the latch to
rec and to the tree */
/***********************************************************************
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 4c58d6075e6..b7322944189 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -179,11 +179,7 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr, /* in/out: mini-transaction */
- mtr_t* init_mtr);/* in/out: mtr or another mini-transaction
- in which the page should be initialized,
- or NULL if this is a "fake allocation" of
- a page that was previously freed in mtr */
+ mtr_t* mtr); /* in/out: mini-transaction */
/**************************************************************************
Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index 58983063361..2b41fa0059a 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -36,8 +36,6 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
#define MTR_MEMO_MODIFY 54
#define MTR_MEMO_S_LOCK 55
#define MTR_MEMO_X_LOCK 56
-/* The mini-transaction freed a clustered index leaf page. */
-#define MTR_MEMO_FREE_CLUST_LEAF 57
/* Log item types: we have made them to be of the type 'byte'
for the compiler to warn if val and type parameters are switched
@@ -317,12 +315,9 @@ struct mtr_struct{
ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
dyn_array_t memo; /* memo stack for locks etc. */
dyn_array_t log; /* mini-transaction log */
- unsigned modifications:1;
+ ibool modifications;
/* TRUE if the mtr made modifications to
buffer pool pages */
- unsigned freed_clust_leaf:1;
- /* TRUE if MTR_MEMO_FREE_CLUST_LEAF
- was logged in the mini-transaction */
ulint n_log_recs;
/* count of how many page initial log records
have been written to the mtr log */
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
index 6b4cacf0766..81eec3bfc92 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -26,7 +26,6 @@ mtr_start(
mtr->log_mode = MTR_LOG_ALL;
mtr->modifications = FALSE;
- mtr->freed_clust_leaf = FALSE;
mtr->n_log_recs = 0;
#ifdef UNIV_DEBUG
@@ -51,8 +50,7 @@ mtr_memo_push(
ut_ad(object);
ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
- ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF);
- ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf);
+ ut_ad(type <= MTR_MEMO_X_LOCK);
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
diff --git a/storage/innobase/mtr/mtr0mtr.c b/storage/innobase/mtr/mtr0mtr.c
index 33b71f0766c..728c37ce564 100644
--- a/storage/innobase/mtr/mtr0mtr.c
+++ b/storage/innobase/mtr/mtr0mtr.c
@@ -53,13 +53,17 @@ mtr_memo_slot_release(
buf_page_release((buf_block_t*)object, type, mtr);
} else if (type == MTR_MEMO_S_LOCK) {
rw_lock_s_unlock((rw_lock_t*)object);
- } else if (type != MTR_MEMO_X_LOCK) {
- ut_ad(type == MTR_MEMO_MODIFY
- || type == MTR_MEMO_FREE_CLUST_LEAF);
+#ifdef UNIV_DEBUG
+ } else if (type == MTR_MEMO_X_LOCK) {
+ rw_lock_x_unlock((rw_lock_t*)object);
+ } else {
+ ut_ad(type == MTR_MEMO_MODIFY);
ut_ad(mtr_memo_contains(mtr, object,
MTR_MEMO_PAGE_X_FIX));
+#else
} else {
rw_lock_x_unlock((rw_lock_t*)object);
+#endif
}
}
diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
index f6e6c81534b..c3b8f54a3c5 100644
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
@@ -2090,20 +2090,15 @@ row_ins_index_entry_low(
if (big_rec) {
ut_a(err == DB_SUCCESS);
/* Write out the externally stored
- columns, but allocate the pages and
- write the pointers using the
- mini-transaction of the record update.
- If any pages were freed in the update,
- temporarily mark them allocated so
- that off-page columns will not
- overwrite them. We must do this,
- because we will write the redo log for
- the BLOB writes before writing the
- redo log for the record update. Thus,
- redo log application at crash recovery
- will see BLOBs being written to free pages. */
-
- btr_mark_freed_leaves(index, &mtr, TRUE);
+ columns while still x-latching
+ index->lock and block->lock. We have
+ to mtr_commit(mtr) first, so that the
+ redo log will be written in the
+ correct order. Otherwise, we would run
+ into trouble on crash recovery if mtr
+ freed B-tree pages on which some of
+ the big_rec fields will be written. */
+ btr_cur_mtr_commit_and_start(&cursor, &mtr);
rec = btr_cur_get_rec(&cursor);
offsets = rec_get_offsets(rec, index, offsets,
@@ -2111,8 +2106,7 @@ row_ins_index_entry_low(
&heap);
err = btr_store_big_rec_extern_fields(
- index, rec, offsets, big_rec,
- &mtr, &mtr);
+ index, rec, offsets, big_rec, &mtr);
/* If writing big_rec fails (for
example, because of DB_OUT_OF_FILE_SPACE),
the record will be corrupted. Even if
@@ -2125,9 +2119,6 @@ row_ins_index_entry_low(
undo log, and thus the record cannot
be rolled back. */
ut_a(err == DB_SUCCESS);
- /* Free the pages again
- in order to avoid a leak. */
- btr_mark_freed_leaves(index, &mtr, FALSE);
goto stored_big_rec;
}
} else {
@@ -2175,8 +2166,7 @@ function_exit:
ULINT_UNDEFINED, &heap);
err = btr_store_big_rec_extern_fields(index, rec,
- offsets, big_rec,
- NULL, &mtr);
+ offsets, big_rec, &mtr);
stored_big_rec:
if (modify) {
dtuple_big_rec_free(big_rec);
diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c
index ccb3c1f7781..171039e34ac 100644
--- a/storage/innobase/row/row0row.c
+++ b/storage/innobase/row/row0row.c
@@ -212,27 +212,23 @@ row_build(
}
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- if (rec_offs_any_null_extern(rec, offsets)) {
- /* This condition can occur during crash recovery
- before trx_rollback_or_clean_all_without_sess() has
- completed execution.
-
- This condition is possible if the server crashed
- during an insert or update before
- btr_store_big_rec_extern_fields() did mtr_commit() all
- BLOB pointers to the clustered index record.
-
- If the record contains a null BLOB pointer, look up the
- transaction that holds the implicit lock on this record, and
- assert that it is active. (In this version of InnoDB, we
- cannot assert that it was recovered, because there is no
- trx->is_recovered field.) */
-
- ut_a(trx_assert_active(
- row_get_rec_trx_id(rec, index, offsets)));
- ut_a(trx_undo_roll_ptr_is_insert(
- row_get_rec_roll_ptr(rec, index, offsets)));
- }
+ /* This condition can occur during crash recovery before
+ trx_rollback_or_clean_all_without_sess() has completed
+ execution.
+
+ This condition is possible if the server crashed
+ during an insert or update before
+ btr_store_big_rec_extern_fields() did mtr_commit() all
+ BLOB pointers to the clustered index record.
+
+ If the record contains a null BLOB pointer, look up the
+ transaction that holds the implicit lock on this record, and
+ assert that it is active. (In this version of InnoDB, we
+ cannot assert that it was recovered, because there is no
+ trx->is_recovered field.) */
+
+ ut_a(!rec_offs_any_null_extern(rec, offsets)
+ || trx_assert_active(row_get_rec_trx_id(rec, index, offsets)));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
if (type != ROW_COPY_POINTERS) {
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index 58739edfd98..694b00ea265 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -1591,22 +1591,21 @@ row_upd_clust_rec(
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
ut_a(err == DB_SUCCESS);
- /* Write out the externally stored columns, but
- allocate the pages and write the pointers using the
- mini-transaction of the record update. If any pages
- were freed in the update, temporarily mark them
- allocated so that off-page columns will not overwrite
- them. We must do this, because we write the redo log
- for the BLOB writes before writing the redo log for
- the record update. */
-
- btr_mark_freed_leaves(index, mtr, TRUE);
+ /* Write out the externally stored columns while still
+ x-latching index->lock and block->lock. We have to
+ mtr_commit(mtr) first, so that the redo log will be
+ written in the correct order. Otherwise, we would run
+ into trouble on crash recovery if mtr freed B-tree
+ pages on which some of the big_rec fields will be
+ written. */
+ btr_cur_mtr_commit_and_start(btr_cur, mtr);
+
rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields(
index, rec,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
- big_rec, mtr, mtr);
+ big_rec, mtr);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@@ -1619,8 +1618,6 @@ row_upd_clust_rec(
to the undo log, and thus the record cannot be rolled
back. */
ut_a(err == DB_SUCCESS);
- /* Free the pages again in order to avoid a leak. */
- btr_mark_freed_leaves(index, mtr, FALSE);
}
mtr_commit(mtr);
diff --git a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c
index ce09862f317..329565943c8 100644
--- a/storage/innobase/trx/trx0undo.c
+++ b/storage/innobase/trx/trx0undo.c
@@ -864,7 +864,7 @@ trx_undo_add_page(
page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
+ TRX_UNDO_FSEG_HEADER,
undo->top_page_no + 1, FSP_UP,
- TRUE, mtr, mtr);
+ TRUE, mtr);
fil_space_release_free_extents(undo->space, n_reserved);
diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog
index 4e6e2be615a..e6724eb08c2 100644
--- a/storage/innodb_plugin/ChangeLog
+++ b/storage/innodb_plugin/ChangeLog
@@ -50,15 +50,6 @@
* include/trx0undo.h, trx/trx0rec.c, trx/trx0undo.c:
Fix Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE
-2011-08-29 The InnoDB Team
-
- * btr/btr0btr.c, btr/btr0cur.c, fsp/fsp0fsp.c,
- include/btr0btr.h, include/btr0cur.h, include/fsp0fsp.h,
- include/mtr0mtr.h, include/mtr0mtr.ic, mtr/mtr0mtr.c,
- row/row0ins.c, row/row0row.c, row/row0upd.c, trx/trx0undo.c:
- Fix Bug#12704861 Corruption after a crash during BLOB update
- and other regressions from the fix of Bug#12612184
-
2011-08-15 The InnoDB Team
* btr/btr0btr.c, btr/btr0cur.c, btr/btr0pcur.c, btr/btr0sea.c,
diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c
index 71e1599d19e..cb94ef08cd6 100644
--- a/storage/innodb_plugin/btr/btr0btr.c
+++ b/storage/innodb_plugin/btr/btr0btr.c
@@ -906,29 +906,28 @@ btr_page_alloc_for_ibuf(
/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents!
-@return allocated page number, FIL_NULL if out of space */
-static __attribute__((nonnull(1,5), warn_unused_result))
-ulint
-btr_page_alloc_low(
-/*===============*/
+@return new allocated block, x-latched; NULL if out of space */
+UNIV_INTERN
+buf_block_t*
+btr_page_alloc(
+/*===========*/
dict_index_t* index, /*!< in: index */
ulint hint_page_no, /*!< in: hint of a good page */
byte file_direction, /*!< in: direction where a possible
page split is made */
ulint level, /*!< in: level where the page is placed
in the tree */
- mtr_t* mtr, /*!< in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr) /*!< in/out: mini-transaction
- in which the page should be
- initialized (may be the same
- as mtr), or NULL if it should
- not be initialized (the page
- at hint was previously freed
- in mtr) */
+ mtr_t* mtr) /*!< in: mtr */
{
fseg_header_t* seg_header;
page_t* root;
+ buf_block_t* new_block;
+ ulint new_page_no;
+
+ if (dict_index_is_ibuf(index)) {
+
+ return(btr_page_alloc_for_ibuf(index, mtr));
+ }
root = btr_root_get(index, mtr);
@@ -942,42 +941,8 @@ btr_page_alloc_low(
reservation for free extents, and thus we know that a page can
be allocated: */
- return(fseg_alloc_free_page_general(
- seg_header, hint_page_no, file_direction,
- TRUE, mtr, init_mtr));
-}
-
-/**************************************************************//**
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents!
-@return new allocated block, x-latched; NULL if out of space */
-UNIV_INTERN
-buf_block_t*
-btr_page_alloc(
-/*===========*/
- dict_index_t* index, /*!< in: index */
- ulint hint_page_no, /*!< in: hint of a good page */
- byte file_direction, /*!< in: direction where a possible
- page split is made */
- ulint level, /*!< in: level where the page is placed
- in the tree */
- mtr_t* mtr, /*!< in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr) /*!< in/out: mini-transaction
- for x-latching and initializing
- the page */
-{
- buf_block_t* new_block;
- ulint new_page_no;
-
- if (dict_index_is_ibuf(index)) {
-
- return(btr_page_alloc_for_ibuf(index, mtr));
- }
-
- new_page_no = btr_page_alloc_low(
- index, hint_page_no, file_direction, level, mtr, init_mtr);
-
+ new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
+ file_direction, TRUE, mtr);
if (new_page_no == FIL_NULL) {
return(NULL);
@@ -985,16 +950,9 @@ btr_page_alloc(
new_block = buf_page_get(dict_index_get_space(index),
dict_table_zip_size(index->table),
- new_page_no, RW_X_LATCH, init_mtr);
+ new_page_no, RW_X_LATCH, mtr);
buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
- if (mtr->freed_clust_leaf) {
- mtr_memo_release(mtr, new_block, MTR_MEMO_FREE_CLUST_LEAF);
- ut_ad(!mtr_memo_contains(mtr, new_block,
- MTR_MEMO_FREE_CLUST_LEAF));
- }
-
- ut_ad(btr_freed_leaves_validate(mtr));
return(new_block);
}
@@ -1129,139 +1087,12 @@ btr_page_free(
buf_block_t* block, /*!< in: block to be freed, x-latched */
mtr_t* mtr) /*!< in: mtr */
{
- const page_t* page = buf_block_get_frame(block);
- ulint level = btr_page_get_level(page, mtr);
-
- ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX);
- btr_page_free_low(index, block, level, mtr);
-
- /* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- if (level == 0 && dict_index_is_clust(index)) {
- /* We may have to call btr_mark_freed_leaves() to
- temporarily mark the block nonfree for invoking
- btr_store_big_rec_extern_fields_func() after an
- update. Remember that the block was freed. */
- mtr->freed_clust_leaf = TRUE;
- mtr_memo_push(mtr, block, MTR_MEMO_FREE_CLUST_LEAF);
- }
-
- ut_ad(btr_freed_leaves_validate(mtr));
-}
-
-/**************************************************************//**
-Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
-For invoking btr_store_big_rec_extern_fields() after an update,
-we must temporarily mark freed clustered index pages allocated, so
-that off-page columns will not be allocated from them. Between the
-btr_store_big_rec_extern_fields() and mtr_commit() we have to
-mark the pages free again, so that no pages will be leaked. */
-UNIV_INTERN
-void
-btr_mark_freed_leaves(
-/*==================*/
- dict_index_t* index, /*!< in/out: clustered index */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */
-{
- /* This is loosely based on mtr_memo_release(). */
-
- ulint offset;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- if (!mtr->freed_clust_leaf) {
- return;
- }
-
- offset = dyn_array_get_data_size(&mtr->memo);
-
- while (offset > 0) {
- mtr_memo_slot_t* slot;
- buf_block_t* block;
-
- offset -= sizeof *slot;
-
- slot = dyn_array_get_element(&mtr->memo, offset);
-
- if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
- continue;
- }
-
- /* Because btr_page_alloc() does invoke
- mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
- blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
- memo must still be clustered index leaf tree pages. */
- block = slot->object;
- ut_a(buf_block_get_space(block)
- == dict_index_get_space(index));
- ut_a(fil_page_get_type(buf_block_get_frame(block))
- == FIL_PAGE_INDEX);
- ut_a(page_is_leaf(buf_block_get_frame(block)));
-
- if (nonfree) {
- /* Allocate the same page again. */
- ulint page_no;
- page_no = btr_page_alloc_low(
- index, buf_block_get_page_no(block),
- FSP_NO_DIR, 0, mtr, NULL);
- ut_a(page_no == buf_block_get_page_no(block));
- } else {
- /* Assert that the page is allocated and free it. */
- btr_page_free_low(index, block, 0, mtr);
- }
- }
-
- ut_ad(btr_freed_leaves_validate(mtr));
-}
-
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
-@see btr_mark_freed_leaves()
-@return TRUE */
-UNIV_INTERN
-ibool
-btr_freed_leaves_validate(
-/*======================*/
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- ulint offset;
-
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- offset = dyn_array_get_data_size(&mtr->memo);
-
- while (offset > 0) {
- const mtr_memo_slot_t* slot;
- const buf_block_t* block;
-
- offset -= sizeof *slot;
-
- slot = dyn_array_get_element(&mtr->memo, offset);
-
- if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
- continue;
- }
+ ulint level;
- ut_a(mtr->freed_clust_leaf);
- /* Because btr_page_alloc() does invoke
- mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
- blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
- memo must still be clustered index leaf tree pages. */
- block = slot->object;
- ut_a(fil_page_get_type(buf_block_get_frame(block))
- == FIL_PAGE_INDEX);
- ut_a(page_is_leaf(buf_block_get_frame(block)));
- }
+ level = btr_page_get_level(buf_block_get_frame(block), mtr);
- return(TRUE);
+ btr_page_free_low(index, block, level, mtr);
}
-#endif /* UNIV_DEBUG */
/**************************************************************//**
Sets the child node file address in a node pointer. */
@@ -1984,7 +1815,7 @@ btr_root_raise_and_insert(
level = btr_page_get_level(root, mtr);
- new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr);
+ new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
new_page = buf_block_get_frame(new_block);
new_page_zip = buf_block_get_page_zip(new_block);
ut_a(!new_page_zip == !root_page_zip);
@@ -2720,7 +2551,7 @@ func_start:
/* 2. Allocate a new page to the index */
new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
- btr_page_get_level(page, mtr), mtr, mtr);
+ btr_page_get_level(page, mtr), mtr);
new_page = buf_block_get_frame(new_block);
new_page_zip = buf_block_get_page_zip(new_block);
btr_page_create(new_block, new_page_zip, cursor->index,
diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c
index 1e603a6fc81..4c862f061c5 100644
--- a/storage/innodb_plugin/btr/btr0cur.c
+++ b/storage/innodb_plugin/btr/btr0cur.c
@@ -2421,6 +2421,39 @@ return_after_reservations:
return(err);
}
+/**************************************************************//**
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+UNIV_INTERN
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+ btr_cur_t* cursor, /*!< in: cursor */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ buf_block_t* block;
+
+ block = btr_cur_get_block(cursor);
+
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ /* Keep the locks across the mtr_commit(mtr). */
+ rw_lock_x_lock(dict_index_get_lock(cursor->index));
+ rw_lock_x_lock(&block->lock);
+ mutex_enter(&block->mutex);
+ buf_block_buf_fix_inc(block, __FILE__, __LINE__);
+ mutex_exit(&block->mutex);
+ /* Write out the redo log. */
+ mtr_commit(mtr);
+ mtr_start(mtr);
+ /* Reassociate the locks with the mini-transaction.
+ They will be released on mtr_commit(mtr). */
+ mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
+ MTR_MEMO_X_LOCK);
+ mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
+}
+
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
/****************************************************************//**
@@ -3863,9 +3896,6 @@ btr_store_big_rec_extern_fields_func(
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
- const big_rec_t*big_rec_vec, /*!< in: vector containing fields
- to be stored externally */
-
#ifdef UNIV_DEBUG
mtr_t* local_mtr, /*!< in: mtr containing the
latch to rec and to the tree */
@@ -3874,11 +3904,9 @@ btr_store_big_rec_extern_fields_func(
ibool update_in_place,/*! in: TRUE if the record is updated
in place (not delete+insert) */
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL;
- in an update, local_mtr for
- allocating BLOB pages and
- updating BLOB pointers; alloc_mtr
- must not have freed any leaf pages */
+ const big_rec_t*big_rec_vec) /*!< in: vector containing fields
+ to be stored externally */
+
{
ulint rec_page_no;
byte* field_ref;
@@ -3897,9 +3925,6 @@ btr_store_big_rec_extern_fields_func(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_any_extern(offsets));
- ut_ad(local_mtr);
- ut_ad(!alloc_mtr || alloc_mtr == local_mtr);
- ut_ad(!update_in_place || alloc_mtr);
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
@@ -3915,25 +3940,6 @@ btr_store_big_rec_extern_fields_func(
rec_page_no = buf_block_get_page_no(rec_block);
ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
- if (alloc_mtr) {
- /* Because alloc_mtr will be committed after
- mtr, it is possible that the tablespace has been
- extended when the B-tree record was updated or
- inserted, or it will be extended while allocating
- pages for big_rec.
-
- TODO: In mtr (not alloc_mtr), write a redo log record
- about extending the tablespace to its current size,
- and remember the current size. Whenever the tablespace
- grows as pages are allocated, write further redo log
- records to mtr. (Currently tablespace extension is not
- covered by the redo log. If it were, the record would
- only be written to alloc_mtr, which is committed after
- mtr.) */
- } else {
- alloc_mtr = &mtr;
- }
-
if (UNIV_LIKELY_NULL(page_zip)) {
int err;
@@ -4010,7 +4016,7 @@ btr_store_big_rec_extern_fields_func(
}
block = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, alloc_mtr, &mtr);
+ FSP_NO_DIR, 0, &mtr);
if (UNIV_UNLIKELY(block == NULL)) {
mtr_commit(&mtr);
@@ -4137,15 +4143,11 @@ btr_store_big_rec_extern_fields_func(
goto next_zip_page;
}
- if (alloc_mtr == &mtr) {
- rec_block = buf_page_get(
- space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(
- rec_block,
- SYNC_NO_ORDER_CHECK);
- }
+ rec_block = buf_page_get(space_id, zip_size,
+ rec_page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(rec_block,
+ SYNC_NO_ORDER_CHECK);
if (err == Z_STREAM_END) {
mach_write_to_4(field_ref
@@ -4179,8 +4181,7 @@ btr_store_big_rec_extern_fields_func(
page_zip_write_blob_ptr(
page_zip, rec, index, offsets,
- big_rec_vec->fields[i].field_no,
- alloc_mtr);
+ big_rec_vec->fields[i].field_no, &mtr);
next_zip_page:
prev_page_no = page_no;
@@ -4225,23 +4226,19 @@ next_zip_page:
extern_len -= store_len;
- if (alloc_mtr == &mtr) {
- rec_block = buf_page_get(
- space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(
- rec_block,
- SYNC_NO_ORDER_CHECK);
- }
+ rec_block = buf_page_get(space_id, zip_size,
+ rec_page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(rec_block,
+ SYNC_NO_ORDER_CHECK);
mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, alloc_mtr);
+ MLOG_4BYTES, &mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_LEN + 4,
big_rec_vec->fields[i].len
- extern_len,
- MLOG_4BYTES, alloc_mtr);
+ MLOG_4BYTES, &mtr);
if (prev_page_no == FIL_NULL) {
btr_blob_dbg_add_blob(
@@ -4251,19 +4248,18 @@ next_zip_page:
mlog_write_ulint(field_ref
+ BTR_EXTERN_SPACE_ID,
- space_id, MLOG_4BYTES,
- alloc_mtr);
+ space_id,
+ MLOG_4BYTES, &mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_PAGE_NO,
- page_no, MLOG_4BYTES,
- alloc_mtr);
+ page_no,
+ MLOG_4BYTES, &mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_OFFSET,
FIL_PAGE_DATA,
- MLOG_4BYTES,
- alloc_mtr);
+ MLOG_4BYTES, &mtr);
}
prev_page_no = page_no;
diff --git a/storage/innodb_plugin/fsp/fsp0fsp.c b/storage/innodb_plugin/fsp/fsp0fsp.c
index 19846b63d5b..fee7fde2e5c 100644
--- a/storage/innodb_plugin/fsp/fsp0fsp.c
+++ b/storage/innodb_plugin/fsp/fsp0fsp.c
@@ -312,9 +312,8 @@ fsp_fill_free_list(
descriptor page and ibuf bitmap page;
then we do not allocate more extents */
ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- __attribute__((nonnull));
+ fsp_header_t* header, /*!< in: space header */
+ mtr_t* mtr); /*!< in: mtr */
/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
@@ -334,13 +333,7 @@ fseg_alloc_free_page_low(
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mini-transaction in which the
- page should be initialized
- (may be the same as mtr), or NULL if it
- should not be initialized (the page at hint
- was previously freed in mtr) */
- __attribute__((warn_unused_result, nonnull(3,6)));
+ mtr_t* mtr); /*!< in/out: mini-transaction */
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
@@ -708,18 +701,17 @@ list, if not free limit == space size. This adding is necessary to make the
descriptor defined, as they are uninitialized above the free limit.
@return pointer to the extent descriptor, NULL if the page does not
exist in the space or if the offset exceeds the free limit */
-UNIV_INLINE __attribute__((nonnull, warn_unused_result))
+UNIV_INLINE
xdes_t*
xdes_get_descriptor_with_space_hdr(
/*===============================*/
- fsp_header_t* sp_header, /*!< in/out: space header, x-latched
- in mtr */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset; if equal
- to the free limit, we try to
- add new extents to the space
- free list */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fsp_header_t* sp_header,/*!< in/out: space header, x-latched */
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: page offset;
+ if equal to the free limit,
+ we try to add new extents to
+ the space free list */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ulint limit;
ulint size;
@@ -727,9 +719,11 @@ xdes_get_descriptor_with_space_hdr(
ulint descr_page_no;
page_t* descr_page;
+ ut_ad(mtr);
ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX)
+ || mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
/* Read free limit and space size */
limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
@@ -779,7 +773,7 @@ is necessary to make the descriptor defined, as they are uninitialized
above the free limit.
@return pointer to the extent descriptor, NULL if the page does not
exist in the space or if the offset exceeds the free limit */
-static __attribute__((nonnull, warn_unused_result))
+static
xdes_t*
xdes_get_descriptor(
/*================*/
@@ -788,7 +782,7 @@ xdes_get_descriptor(
or 0 for uncompressed pages */
ulint offset, /*!< in: page offset; if equal to the free limit,
we try to add new extents to the space free list */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ mtr_t* mtr) /*!< in: mtr handle */
{
buf_block_t* block;
fsp_header_t* sp_header;
@@ -1166,14 +1160,14 @@ fsp_header_get_tablespace_size(void)
Tries to extend a single-table tablespace so that a page would fit in the
data file.
@return TRUE if success */
-static __attribute__((nonnull, warn_unused_result))
+static
ibool
fsp_try_extend_data_file_with_pages(
/*================================*/
ulint space, /*!< in: space */
ulint page_no, /*!< in: page number */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fsp_header_t* header, /*!< in: space header */
+ mtr_t* mtr) /*!< in: mtr */
{
ibool success;
ulint actual_size;
@@ -1198,7 +1192,7 @@ fsp_try_extend_data_file_with_pages(
/***********************************************************************//**
Tries to extend the last data file of a tablespace if it is auto-extending.
@return FALSE if not auto-extending */
-static __attribute__((nonnull))
+static
ibool
fsp_try_extend_data_file(
/*=====================*/
@@ -1208,8 +1202,8 @@ fsp_try_extend_data_file(
the actual file size rounded down to
megabyte */
ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ fsp_header_t* header, /*!< in: space header */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint size;
ulint zip_size;
@@ -1345,7 +1339,7 @@ fsp_fill_free_list(
then we do not allocate more extents */
ulint space, /*!< in: space */
fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint limit;
ulint size;
@@ -1544,46 +1538,9 @@ fsp_alloc_free_extent(
}
/**********************************************************************//**
-Allocates a single free page from a space. */
-static __attribute__((nonnull))
-void
-fsp_alloc_from_free_frag(
-/*=====================*/
- fsp_header_t* header, /*!< in/out: tablespace header */
- xdes_t* descr, /*!< in/out: extent descriptor */
- ulint bit, /*!< in: slot to allocate in the extent */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint frag_n_used;
-
- ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
- ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr));
- xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
-
- /* Update the FRAG_N_USED field */
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- frag_n_used++;
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
- mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FULL_FRAG, mtr);
-
- flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
- mtr);
- }
-}
-
-/**********************************************************************//**
Allocates a single free page from a space. The page is marked as used.
@return the page offset, FIL_NULL if no page could be allocated */
-static __attribute__((nonnull, warn_unused_result))
+static
ulint
fsp_alloc_free_page(
/*================*/
@@ -1591,22 +1548,19 @@ fsp_alloc_free_page(
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint hint, /*!< in: hint of which page would be desirable */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mini-transaction in which the
- page should be initialized
- (may be the same as mtr) */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
fsp_header_t* header;
fil_addr_t first;
xdes_t* descr;
buf_block_t* block;
ulint free;
+ ulint frag_n_used;
ulint page_no;
ulint space_size;
ibool success;
ut_ad(mtr);
- ut_ad(init_mtr);
header = fsp_get_space_header(space, zip_size, mtr);
@@ -1688,19 +1642,38 @@ fsp_alloc_free_page(
}
}
- fsp_alloc_from_free_frag(header, descr, free, mtr);
+ xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
+
+ /* Update the FRAG_N_USED field */
+ frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
+ mtr);
+ frag_n_used++;
+ mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
+ mtr);
+ if (xdes_is_full(descr, mtr)) {
+ /* The fragment is full: move it to another list */
+ flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ xdes_set_state(descr, XDES_FULL_FRAG, mtr);
+
+ flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
+ mtr);
+ mlog_write_ulint(header + FSP_FRAG_N_USED,
+ frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
+ mtr);
+ }
/* Initialize the allocated page to the buffer pool, so that it can
be obtained immediately with buf_page_get without need for a disk
read. */
- buf_page_create(space, page_no, zip_size, init_mtr);
+ buf_page_create(space, page_no, zip_size, mtr);
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, init_mtr);
+ block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
/* Prior contents of the page should be ignored */
- fsp_init_file_page(block, init_mtr);
+ fsp_init_file_page(block, mtr);
return(page_no);
}
@@ -1936,7 +1909,7 @@ fsp_alloc_seg_inode_page(
zip_size = dict_table_flags_to_zip_size(
mach_read_from_4(FSP_SPACE_FLAGS + space_header));
- page_no = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr);
+ page_no = fsp_alloc_free_page(space, zip_size, 0, mtr);
if (page_no == FIL_NULL) {
@@ -2350,7 +2323,7 @@ fseg_create_general(
if (page == 0) {
page = fseg_alloc_free_page_low(space, zip_size,
- inode, 0, FSP_UP, mtr, mtr);
+ inode, 0, FSP_UP, mtr);
if (page == FIL_NULL) {
@@ -2606,12 +2579,7 @@ fseg_alloc_free_page_low(
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mini-transaction in which the
- page should be initialized
- (may be the same as mtr), or NULL if it
- should not be initialized (the page at hint
- was previously freed in mtr) */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
fsp_header_t* space_header;
ulint space_size;
@@ -2622,6 +2590,7 @@ fseg_alloc_free_page_low(
ulint ret_page; /*!< the allocated page offset, FIL_NULL
if could not be allocated */
xdes_t* ret_descr; /*!< the extent of the allocated page */
+ ibool frag_page_allocated = FALSE;
ibool success;
ulint n;
@@ -2643,8 +2612,6 @@ fseg_alloc_free_page_low(
if (descr == NULL) {
/* Hint outside space or too high above free limit: reset
hint */
- ut_a(init_mtr);
- /* The file space header page is always allocated. */
hint = 0;
descr = xdes_get_descriptor(space, zip_size, hint, mtr);
}
@@ -2656,20 +2623,15 @@ fseg_alloc_free_page_low(
mtr), seg_id))
&& (xdes_get_bit(descr, XDES_FREE_BIT,
hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
-take_hinted_page:
+
/* 1. We can take the hinted page
=================================*/
ret_descr = descr;
ret_page = hint;
- /* Skip the check for extending the tablespace. If the
- page hint were not within the size of the tablespace,
- we would have got (descr == NULL) above and reset the hint. */
- goto got_hinted_page;
/*-----------------------------------------------------------*/
- } else if (xdes_get_state(descr, mtr) == XDES_FREE
- && (!init_mtr
- || ((reserved - used < reserved / FSEG_FILLFACTOR)
- && used >= FSEG_FRAG_LIMIT))) {
+ } else if ((xdes_get_state(descr, mtr) == XDES_FREE)
+ && ((reserved - used) < reserved / FSEG_FILLFACTOR)
+ && (used >= FSEG_FRAG_LIMIT)) {
/* 2. We allocate the free extent from space and can take
=========================================================
@@ -2687,20 +2649,8 @@ take_hinted_page:
/* Try to fill the segment free list */
fseg_fill_free_list(seg_inode, space, zip_size,
hint + FSP_EXTENT_SIZE, mtr);
- goto take_hinted_page;
- /*-----------------------------------------------------------*/
- } else if (!init_mtr) {
- ut_a(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
- fsp_alloc_from_free_frag(space_header, descr,
- hint % FSP_EXTENT_SIZE, mtr);
ret_page = hint;
- ret_descr = NULL;
-
- /* Put the page in the fragment page array of the segment */
- n = fseg_find_free_frag_page_slot(seg_inode, mtr);
- ut_a(n != FIL_NULL);
- fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr);
- goto got_hinted_page;
+ /*-----------------------------------------------------------*/
} else if ((direction != FSP_NO_DIR)
&& ((reserved - used) < reserved / FSEG_FILLFACTOR)
&& (used >= FSEG_FRAG_LIMIT)
@@ -2760,10 +2710,11 @@ take_hinted_page:
} else if (used < FSEG_FRAG_LIMIT) {
/* 6. We allocate an individual page from the space
===================================================*/
- ret_page = fsp_alloc_free_page(space, zip_size, hint,
- mtr, init_mtr);
+ ret_page = fsp_alloc_free_page(space, zip_size, hint, mtr);
ret_descr = NULL;
+ frag_page_allocated = TRUE;
+
if (ret_page != FIL_NULL) {
/* Put the page in the fragment page array of the
segment */
@@ -2773,10 +2724,6 @@ take_hinted_page:
fseg_set_nth_frag_page_no(seg_inode, n, ret_page,
mtr);
}
-
- /* fsp_alloc_free_page() invoked fsp_init_file_page()
- already. */
- return(ret_page);
/*-----------------------------------------------------------*/
} else {
/* 7. We allocate a new extent and take its first page
@@ -2824,34 +2771,26 @@ take_hinted_page:
}
}
-got_hinted_page:
- {
+ if (!frag_page_allocated) {
/* Initialize the allocated page to buffer pool, so that it
can be obtained immediately with buf_page_get without need
for a disk read */
buf_block_t* block;
ulint zip_size = dict_table_flags_to_zip_size(
mach_read_from_4(FSP_SPACE_FLAGS + space_header));
- mtr_t* block_mtr = init_mtr ? init_mtr : mtr;
- block = buf_page_create(space, ret_page, zip_size, block_mtr);
+ block = buf_page_create(space, ret_page, zip_size, mtr);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size,
ret_page, RW_X_LATCH,
- block_mtr))) {
+ mtr))) {
ut_error;
}
- if (init_mtr) {
- /* The prior contents of the page should be ignored */
- fsp_init_file_page(block, init_mtr);
- }
- }
+ /* The prior contents of the page should be ignored */
+ fsp_init_file_page(block, mtr);
- /* ret_descr == NULL if the block was allocated from free_frag
- (XDES_FREE_FRAG) */
- if (ret_descr != NULL) {
/* At this point we know the extent and the page offset.
The extent is still in the appropriate list (FSEG_NOT_FULL
or FSEG_FREE), and the page is not yet marked as used. */
@@ -2888,11 +2827,7 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr, /*!< in/out: mini-transaction handle */
- mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
- in which the page should be initialized,
- or NULL if this is a "fake allocation" of
- a page that was previously freed in mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
fseg_inode_t* inode;
ulint space;
@@ -2934,8 +2869,7 @@ fseg_alloc_free_page_general(
}
page_no = fseg_alloc_free_page_low(space, zip_size,
- inode, hint, direction,
- mtr, init_mtr);
+ inode, hint, direction, mtr);
if (!has_done_reservation) {
fil_space_release_free_extents(space, n_reserved);
}
@@ -2944,6 +2878,28 @@ fseg_alloc_free_page_general(
}
/**********************************************************************//**
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation.
+@return allocated page offset, FIL_NULL if no page could be allocated */
+UNIV_INTERN
+ulint
+fseg_alloc_free_page(
+/*=================*/
+ fseg_header_t* seg_header,/*!< in: segment header */
+ ulint hint, /*!< in: hint of which page would be desirable */
+ byte direction,/*!< in: if the new page is needed because
+ of an index page split, and records are
+ inserted there in order, into which
+ direction they go alphabetically: FSP_DOWN,
+ FSP_UP, FSP_NO_DIR */
+ mtr_t* mtr) /*!< in: mtr handle */
+{
+ return(fseg_alloc_free_page_general(seg_header, hint, direction,
+ FALSE, mtr));
+}
+
+/**********************************************************************//**
Checks that we have at least 2 frag pages free in the first extent of a
single-table tablespace, and they are also physically initialized to the data
file. That is we have already extended the data file so that those pages are
diff --git a/storage/innodb_plugin/include/btr0btr.h b/storage/innodb_plugin/include/btr0btr.h
index 476ad29adac..c0a038dd21d 100644
--- a/storage/innodb_plugin/include/btr0btr.h
+++ b/storage/innodb_plugin/include/btr0btr.h
@@ -557,12 +557,7 @@ btr_page_alloc(
page split is made */
ulint level, /*!< in: level where the page is placed
in the tree */
- mtr_t* mtr, /*!< in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr) /*!< in/out: mini-transaction
- for x-latching and initializing
- the page */
- __attribute__((nonnull, warn_unused_result));
+ mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
@@ -585,33 +580,6 @@ btr_page_free_low(
buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level */
mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
-For invoking btr_store_big_rec_extern_fields() after an update,
-we must temporarily mark freed clustered index pages allocated, so
-that off-page columns will not be allocated from them. Between the
-btr_store_big_rec_extern_fields() and mtr_commit() we have to
-mark the pages free again, so that no pages will be leaked. */
-UNIV_INTERN
-void
-btr_mark_freed_leaves(
-/*==================*/
- dict_index_t* index, /*!< in/out: clustered index */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */
- __attribute__((nonnull));
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
-@see btr_mark_freed_leaves()
-@return TRUE */
-UNIV_INTERN
-ibool
-btr_freed_leaves_validate(
-/*======================*/
- mtr_t* mtr) /*!< in: mini-transaction */
- __attribute__((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
#ifdef UNIV_BTR_PRINT
/*************************************************************//**
Prints size info of a B-tree. */
diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h
index 1d97c5b9452..6094a2a6c7a 100644
--- a/storage/innodb_plugin/include/btr0cur.h
+++ b/storage/innodb_plugin/include/btr0cur.h
@@ -326,6 +326,16 @@ btr_cur_pessimistic_update(
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr; must be committed before
latching any further pages */
+/*****************************************************************
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+UNIV_INTERN
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+ btr_cur_t* cursor, /*!< in: cursor */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
@@ -530,8 +540,6 @@ btr_store_big_rec_extern_fields_func(
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
- const big_rec_t*big_rec_vec, /*!< in: vector containing fields
- to be stored externally */
#ifdef UNIV_DEBUG
mtr_t* local_mtr, /*!< in: mtr containing the
latch to rec and to the tree */
@@ -540,12 +548,9 @@ btr_store_big_rec_extern_fields_func(
ibool update_in_place,/*! in: TRUE if the record is updated
in place (not delete+insert) */
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL;
- in an update, local_mtr for
- allocating BLOB pages and
- updating BLOB pointers; alloc_mtr
- must not have freed any leaf pages */
- __attribute__((nonnull(1,2,3,4,5), warn_unused_result));
+ const big_rec_t*big_rec_vec) /*!< in: vector containing fields
+ to be stored externally */
+ __attribute__((nonnull));
/** Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The extern flags in rec will have to be set beforehand.
@@ -554,22 +559,21 @@ file segment of the index tree.
@param index in: clustered index; MUST be X-latched by mtr
@param b in/out: block containing rec; MUST be X-latched by mtr
@param rec in/out: clustered index record
-@param offs in: rec_get_offsets(rec, index);
+@param offsets in: rec_get_offsets(rec, index);
the "external storage" flags in offsets will not be adjusted
-@param big in: vector containing fields to be stored externally
@param mtr in: mini-transaction that holds x-latch on index and b
@param upd in: TRUE if the record is updated in place (not delete+insert)
-@param rmtr in/out: in updates, the mini-transaction that holds rec
+@param big in: vector containing fields to be stored externally
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
#ifdef UNIV_DEBUG
-# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \
- btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,mtr,upd,rmtr)
+# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
+ btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big)
#elif defined UNIV_BLOB_LIGHT_DEBUG
-# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \
- btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,upd,rmtr)
+# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
+ btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big)
#else
-# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \
- btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,rmtr)
+# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
+ btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big)
#endif
/*******************************************************************//**
diff --git a/storage/innodb_plugin/include/fsp0fsp.h b/storage/innodb_plugin/include/fsp0fsp.h
index 2221380c9a2..403e1d404a8 100644
--- a/storage/innodb_plugin/include/fsp0fsp.h
+++ b/storage/innodb_plugin/include/fsp0fsp.h
@@ -176,18 +176,19 @@ fseg_n_reserved_pages(
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize
file space fragmentation.
-@param[in/out] seg_header segment header
-@param[in] hint hint of which page would be desirable
-@param[in] direction if the new page is needed because
+@return the allocated page offset FIL_NULL if no page could be allocated */
+UNIV_INTERN
+ulint
+fseg_alloc_free_page(
+/*=================*/
+ fseg_header_t* seg_header, /*!< in: segment header */
+ ulint hint, /*!< in: hint of which page would be desirable */
+ byte direction, /*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR
-@param[in/out] mtr mini-transaction
-@return the allocated page offset FIL_NULL if no page could be allocated */
-#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \
- fseg_alloc_free_page_general(seg_header, hint, direction, \
- FALSE, mtr, mtr)
+ FSP_UP, FSP_NO_DIR */
+ mtr_t* mtr); /*!< in: mtr handle */
/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
@@ -209,11 +210,7 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
- in which the page should be initialized,
- or NULL if this is a "fake allocation" of
- a page that was previously freed in mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
__attribute__((warn_unused_result, nonnull(1,5)));
/**********************************************************************//**
Reserves free pages from a tablespace. All mini-transactions which may
diff --git a/storage/innodb_plugin/include/mtr0mtr.h b/storage/innodb_plugin/include/mtr0mtr.h
index 3529519e7f4..8a9ec8ea7f0 100644
--- a/storage/innodb_plugin/include/mtr0mtr.h
+++ b/storage/innodb_plugin/include/mtr0mtr.h
@@ -53,8 +53,6 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
#define MTR_MEMO_MODIFY 54
#define MTR_MEMO_S_LOCK 55
#define MTR_MEMO_X_LOCK 56
-/** The mini-transaction freed a clustered index leaf page. */
-#define MTR_MEMO_FREE_CLUST_LEAF 57
/** @name Log item types
The log items are declared 'byte' so that the compiler can warn if val
@@ -379,12 +377,9 @@ struct mtr_struct{
#endif
dyn_array_t memo; /*!< memo stack for locks etc. */
dyn_array_t log; /*!< mini-transaction log */
- unsigned modifications:1;
- /*!< TRUE if the mini-transaction
- modified buffer pool pages */
- unsigned freed_clust_leaf:1;
- /*!< TRUE if MTR_MEMO_FREE_CLUST_LEAF
- was logged in the mini-transaction */
+ ibool modifications;
+ /* TRUE if the mtr made modifications to
+ buffer pool pages */
ulint n_log_recs;
/* count of how many page initial log records
have been written to the mtr log */
diff --git a/storage/innodb_plugin/include/mtr0mtr.ic b/storage/innodb_plugin/include/mtr0mtr.ic
index 9c0ddff9132..9f92d2b06a1 100644
--- a/storage/innodb_plugin/include/mtr0mtr.ic
+++ b/storage/innodb_plugin/include/mtr0mtr.ic
@@ -44,7 +44,6 @@ mtr_start(
mtr->log_mode = MTR_LOG_ALL;
mtr->modifications = FALSE;
- mtr->freed_clust_leaf = FALSE;
mtr->n_log_recs = 0;
ut_d(mtr->state = MTR_ACTIVE);
@@ -68,8 +67,7 @@ mtr_memo_push(
ut_ad(object);
ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
- ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF);
- ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf);
+ ut_ad(type <= MTR_MEMO_X_LOCK);
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
diff --git a/storage/innodb_plugin/mtr/mtr0mtr.c b/storage/innodb_plugin/mtr/mtr0mtr.c
index e3fefbedec7..5fad61b2922 100644
--- a/storage/innodb_plugin/mtr/mtr0mtr.c
+++ b/storage/innodb_plugin/mtr/mtr0mtr.c
@@ -58,11 +58,12 @@ mtr_memo_slot_release(
buf_page_release((buf_block_t*)object, type, mtr);
} else if (type == MTR_MEMO_S_LOCK) {
rw_lock_s_unlock((rw_lock_t*)object);
+#ifdef UNIV_DEBUG
} else if (type != MTR_MEMO_X_LOCK) {
- ut_ad(type == MTR_MEMO_MODIFY
- || type == MTR_MEMO_FREE_CLUST_LEAF);
+ ut_ad(type == MTR_MEMO_MODIFY);
ut_ad(mtr_memo_contains(mtr, object,
MTR_MEMO_PAGE_X_FIX));
+#endif /* UNIV_DEBUG */
} else {
rw_lock_x_unlock((rw_lock_t*)object);
}
diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c
index cd135e8ba8f..f0f6eca627f 100644
--- a/storage/innodb_plugin/row/row0ins.c
+++ b/storage/innodb_plugin/row/row0ins.c
@@ -2097,20 +2097,15 @@ row_ins_index_entry_low(
if (big_rec) {
ut_a(err == DB_SUCCESS);
/* Write out the externally stored
- columns, but allocate the pages and
- write the pointers using the
- mini-transaction of the record update.
- If any pages were freed in the update,
- temporarily mark them allocated so
- that off-page columns will not
- overwrite them. We must do this,
- because we will write the redo log for
- the BLOB writes before writing the
- redo log for the record update. Thus,
- redo log application at crash recovery
- will see BLOBs being written to free pages. */
-
- btr_mark_freed_leaves(index, &mtr, TRUE);
+ columns while still x-latching
+ index->lock and block->lock. We have
+ to mtr_commit(mtr) first, so that the
+ redo log will be written in the
+ correct order. Otherwise, we would run
+ into trouble on crash recovery if mtr
+ freed B-tree pages on which some of
+ the big_rec fields will be written. */
+ btr_cur_mtr_commit_and_start(&cursor, &mtr);
rec = btr_cur_get_rec(&cursor);
offsets = rec_get_offsets(
@@ -2119,8 +2114,7 @@ row_ins_index_entry_low(
err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(&cursor),
- rec, offsets, big_rec, &mtr,
- FALSE, &mtr);
+ rec, offsets, &mtr, FALSE, big_rec);
/* If writing big_rec fails (for
example, because of DB_OUT_OF_FILE_SPACE),
the record will be corrupted. Even if
@@ -2133,9 +2127,6 @@ row_ins_index_entry_low(
undo log, and thus the record cannot
be rolled back. */
ut_a(err == DB_SUCCESS);
- /* Free the pages again
- in order to avoid a leak. */
- btr_mark_freed_leaves(index, &mtr, FALSE);
goto stored_big_rec;
}
} else {
@@ -2177,7 +2168,7 @@ function_exit:
err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(&cursor),
- rec, offsets, big_rec, &mtr, FALSE, NULL);
+ rec, offsets, &mtr, FALSE, big_rec);
stored_big_rec:
if (modify) {
diff --git a/storage/innodb_plugin/row/row0row.c b/storage/innodb_plugin/row/row0row.c
index e476ffae84e..9cdbbe76e04 100644
--- a/storage/innodb_plugin/row/row0row.c
+++ b/storage/innodb_plugin/row/row0row.c
@@ -243,20 +243,19 @@ row_build(
}
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- if (rec_offs_any_null_extern(rec, offsets)) {
- /* This condition can occur during crash recovery
- before trx_rollback_active() has completed execution.
-
- This condition is possible if the server crashed
- during an insert or update-by-delete-and-insert before
- btr_store_big_rec_extern_fields() did mtr_commit() all
- BLOB pointers to the freshly inserted clustered index
- record. */
- ut_a(trx_assert_recovered(
- row_get_rec_trx_id(rec, index, offsets)));
- ut_a(trx_undo_roll_ptr_is_insert(
- row_get_rec_roll_ptr(rec, index, offsets)));
- }
+ /* This condition can occur during crash recovery before
+ trx_rollback_active() has completed execution.
+
+ This condition is possible if the server crashed
+ during an insert or update before
+ btr_store_big_rec_extern_fields() did mtr_commit() all
+ BLOB pointers to the clustered index record.
+
+ If the record contains a null BLOB pointer, look up the
+ transaction that holds the implicit lock on this record, and
+ assert that it was recovered (and will soon be rolled back). */
+ ut_a(!rec_offs_any_null_extern(rec, offsets)
+ || trx_assert_recovered(row_get_rec_trx_id(rec, index, offsets)));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
if (type != ROW_COPY_POINTERS) {
diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c
index 05856687015..b5952ff0a78 100644
--- a/storage/innodb_plugin/row/row0upd.c
+++ b/storage/innodb_plugin/row/row0upd.c
@@ -1978,22 +1978,21 @@ row_upd_clust_rec(
rec_offs_init(offsets_);
ut_a(err == DB_SUCCESS);
- /* Write out the externally stored columns, but
- allocate the pages and write the pointers using the
- mini-transaction of the record update. If any pages
- were freed in the update, temporarily mark them
- allocated so that off-page columns will not overwrite
- them. We must do this, because we write the redo log
- for the BLOB writes before writing the redo log for
- the record update. */
-
- btr_mark_freed_leaves(index, mtr, TRUE);
+ /* Write out the externally stored columns while still
+ x-latching index->lock and block->lock. We have to
+ mtr_commit(mtr) first, so that the redo log will be
+ written in the correct order. Otherwise, we would run
+ into trouble on crash recovery if mtr freed B-tree
+ pages on which some of the big_rec fields will be
+ written. */
+ btr_cur_mtr_commit_and_start(btr_cur, mtr);
+
rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(btr_cur), rec,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
- big_rec, mtr, TRUE, mtr);
+ mtr, TRUE, big_rec);
/* If writing big_rec fails (for example, because of
DB_OUT_OF_FILE_SPACE), the record will be corrupted.
Even if we did not update any externally stored
@@ -2003,8 +2002,6 @@ row_upd_clust_rec(
to the undo log, and thus the record cannot be rolled
back. */
ut_a(err == DB_SUCCESS);
- /* Free the pages again in order to avoid a leak. */
- btr_mark_freed_leaves(index, mtr, FALSE);
}
mtr_commit(mtr);
diff --git a/storage/innodb_plugin/trx/trx0undo.c b/storage/innodb_plugin/trx/trx0undo.c
index c36f55fbd9c..746f0808643 100644
--- a/storage/innodb_plugin/trx/trx0undo.c
+++ b/storage/innodb_plugin/trx/trx0undo.c
@@ -912,7 +912,7 @@ trx_undo_add_page(
page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
+ TRX_UNDO_FSEG_HEADER,
undo->top_page_no + 1, FSP_UP,
- TRUE, mtr, mtr);
+ TRUE, mtr);
fil_space_release_free_extents(undo->space, n_reserved);