summaryrefslogtreecommitdiff
path: root/storage/innobase/btr/btr0cur.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/btr/btr0cur.c')
-rw-r--r--storage/innobase/btr/btr0cur.c251
1 files changed, 173 insertions, 78 deletions
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index 1551aee5426..e227e47b52f 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
@@ -1860,7 +1860,7 @@ btr_cur_update_in_place(
page_zip = buf_block_get_page_zip(block);
/* Check that enough space is available on the compressed page. */
- if (UNIV_LIKELY_NULL(page_zip)
+ if (page_zip
&& !btr_cur_update_alloc_zip(page_zip, block, index,
rec_offs_size(offsets), FALSE, mtr)) {
return(DB_ZIP_OVERFLOW);
@@ -2051,7 +2051,7 @@ any_extern:
ut_a(!page_zip || page_zip_validate(page_zip, page));
#endif /* UNIV_ZIP_DEBUG */
- if (UNIV_LIKELY_NULL(page_zip)
+ if (page_zip
&& !btr_cur_update_alloc_zip(page_zip, block, index,
new_rec_size, TRUE, mtr)) {
err = DB_ZIP_OVERFLOW;
@@ -2209,7 +2209,9 @@ btr_cur_pessimistic_update(
/*=======================*/
ulint flags, /*!< in: undo logging, locking, and rollback
flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update */
+ btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
+ cursor may become invalid if *big_rec == NULL
+ || !(flags & BTR_KEEP_POS_FLAG) */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
@@ -2348,10 +2350,10 @@ btr_cur_pessimistic_update(
record to be inserted: we have to remember which fields were such */
ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
+ ut_ad(rec_offs_validate(rec, index, offsets));
n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
ut_ad(page_is_comp(page));
if (page_zip_rec_needs_ext(
rec_get_converted_size(index, new_entry, n_ext),
@@ -2371,6 +2373,10 @@ make_external:
err = DB_TOO_BIG_RECORD;
goto return_after_reservations;
}
+
+ ut_ad(page_is_leaf(page));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(flags & BTR_KEEP_POS_FLAG);
}
/* Store state of explicit locks on rec on the page infimum record,
@@ -2398,6 +2404,8 @@ make_external:
rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
if (rec) {
+ page_cursor->rec = rec;
+
lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
rec, block);
@@ -2411,7 +2419,10 @@ make_external:
rec, index, offsets, mtr);
}
- btr_cur_compress_if_useful(cursor, mtr);
+ btr_cur_compress_if_useful(
+ cursor,
+ big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG),
+ mtr);
if (page_zip && !dict_index_is_clust(index)
&& page_is_leaf(page)) {
@@ -2431,6 +2442,21 @@ make_external:
}
}
+ if (big_rec_vec) {
+ ut_ad(page_is_leaf(page));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(flags & BTR_KEEP_POS_FLAG);
+
+ /* btr_page_split_and_insert() in
+ btr_cur_pessimistic_insert() invokes
+ mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK).
+ We must keep the index->lock when we created a
+ big_rec, so that row_upd_clust_rec() can store the
+ big_rec in the same mini-transaction. */
+
+ mtr_x_lock(dict_index_get_lock(index), mtr);
+ }
+
/* Was the record to be updated positioned as the first user
record on its page? */
was_first = page_cur_is_before_first(page_cursor);
@@ -2446,6 +2472,7 @@ make_external:
ut_a(rec);
ut_a(err == DB_SUCCESS);
ut_a(dummy_big_rec == NULL);
+ page_cursor->rec = rec;
if (dict_index_is_sec_or_ibuf(index)) {
/* Update PAGE_MAX_TRX_ID in the index page header.
@@ -2880,10 +2907,12 @@ UNIV_INTERN
ibool
btr_cur_compress_if_useful(
/*=======================*/
- btr_cur_t* cursor, /*!< in: cursor on the page to compress;
- cursor does not stay valid if compression
- occurs */
- mtr_t* mtr) /*!< in: mtr */
+ btr_cur_t* cursor, /*!< in/out: cursor on the page to compress;
+ cursor does not stay valid if !adjust and
+ compression occurs */
+ ibool adjust, /*!< in: TRUE if should adjust the
+ cursor position even if compression occurs */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
ut_ad(mtr_memo_contains(mtr,
dict_index_get_lock(btr_cur_get_index(cursor)),
@@ -2892,7 +2921,7 @@ btr_cur_compress_if_useful(
MTR_MEMO_PAGE_X_FIX));
return(btr_cur_compress_recommendation(cursor, mtr)
- && btr_compress(cursor, mtr));
+ && btr_compress(cursor, adjust, mtr));
}
/*******************************************************//**
@@ -3134,7 +3163,7 @@ return_after_reservations:
mem_heap_free(heap);
if (ret == FALSE) {
- ret = btr_cur_compress_if_useful(cursor, mtr);
+ ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
}
if (n_extents > 0) {
@@ -3870,10 +3899,10 @@ btr_cur_set_ownership_of_extern_field(
byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
}
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr);
- } else if (UNIV_LIKELY(mtr != NULL)) {
+ } else if (mtr != NULL) {
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
MLOG_1BYTE, mtr);
@@ -4106,9 +4135,9 @@ The fields are stored on pages allocated from leaf node
file segment of the index tree.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
UNIV_INTERN
-ulint
-btr_store_big_rec_extern_fields_func(
-/*=================================*/
+enum db_err
+btr_store_big_rec_extern_fields(
+/*============================*/
dict_index_t* index, /*!< in: index of rec; the index tree
MUST be X-latched */
buf_block_t* rec_block, /*!< in/out: block containing rec */
@@ -4117,38 +4146,37 @@ btr_store_big_rec_extern_fields_func(
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
-#ifdef UNIV_DEBUG
- mtr_t* local_mtr, /*!< in: mtr containing the
- latch to rec and to the tree */
-#endif /* UNIV_DEBUG */
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ibool update_in_place,/*! in: TRUE if the record is updated
- in place (not delete+insert) */
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- const big_rec_t*big_rec_vec) /*!< in: vector containing fields
+ const big_rec_t*big_rec_vec, /*!< in: vector containing fields
to be stored externally */
-
+ mtr_t* btr_mtr, /*!< in: mtr containing the
+ latches to the clustered index */
+ enum blob_op op) /*! in: operation code */
{
- ulint rec_page_no;
- byte* field_ref;
- ulint extern_len;
- ulint store_len;
- ulint page_no;
- ulint space_id;
- ulint zip_size;
- ulint prev_page_no;
- ulint hint_page_no;
- ulint i;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
+ ulint rec_page_no;
+ byte* field_ref;
+ ulint extern_len;
+ ulint store_len;
+ ulint page_no;
+ ulint space_id;
+ ulint zip_size;
+ ulint prev_page_no;
+ ulint hint_page_no;
+ ulint i;
+ mtr_t mtr;
+ mtr_t* alloc_mtr;
+ mem_heap_t* heap = NULL;
page_zip_des_t* page_zip;
- z_stream c_stream;
+ z_stream c_stream;
+ buf_block_t** freed_pages = NULL;
+ ulint n_freed_pages = 0;
+ enum db_err error = DB_SUCCESS;
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_any_extern(offsets));
- ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
+ ut_ad(btr_mtr);
+ ut_ad(mtr_memo_contains(btr_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
ut_a(dict_index_is_clust(index));
@@ -4161,7 +4189,7 @@ btr_store_big_rec_extern_fields_func(
rec_page_no = buf_block_get_page_no(rec_block);
ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
int err;
/* Zlib deflate needs 128 kilobytes for the default
@@ -4177,6 +4205,42 @@ btr_store_big_rec_extern_fields_func(
ut_a(err == Z_OK);
}
+ if (btr_blob_op_is_update(op)) {
+ /* Avoid reusing pages that have been previously freed
+ in btr_mtr. */
+ if (btr_mtr->n_freed_pages) {
+ if (heap == NULL) {
+ heap = mem_heap_create(
+ btr_mtr->n_freed_pages
+ * sizeof *freed_pages);
+ }
+
+ freed_pages = mem_heap_alloc(
+ heap,
+ btr_mtr->n_freed_pages
+ * sizeof *freed_pages);
+ n_freed_pages = 0;
+ }
+
+ /* Because btr_mtr will be committed after mtr, it is
+ possible that the tablespace has been extended when
+ the B-tree record was updated or inserted, or it will
+ be extended while allocating pages for big_rec.
+
+ TODO: In mtr (not btr_mtr), write a redo log record
+ about extending the tablespace to its current size,
+ and remember the current size. Whenever the tablespace
+ grows as pages are allocated, write further redo log
+ records to mtr. (Currently tablespace extension is not
+ covered by the redo log. If it were, the record would
+ only be written to btr_mtr, which is committed after
+ mtr.) */
+ alloc_mtr = btr_mtr;
+ } else {
+ /* Use the local mtr for allocations. */
+ alloc_mtr = &mtr;
+ }
+
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
/* All pointers to externally stored columns in the record
must either be zero or they must be pointers to inherited
@@ -4191,7 +4255,7 @@ btr_store_big_rec_extern_fields_func(
/* Either this must be an update in place,
or the BLOB must be inherited, or the BLOB pointer
must be zero (will be written in this function). */
- ut_a(update_in_place
+ ut_a(op == BTR_STORE_UPDATE
|| (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
|| !memcmp(field_ref, field_ref_zero,
BTR_EXTERN_FIELD_REF_SIZE));
@@ -4216,7 +4280,7 @@ btr_store_big_rec_extern_fields_func(
prev_page_no = FIL_NULL;
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
int err = deflateReset(&c_stream);
ut_a(err == Z_OK);
@@ -4236,18 +4300,24 @@ btr_store_big_rec_extern_fields_func(
hint_page_no = prev_page_no + 1;
}
+alloc_another:
block = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, &mtr);
+ FSP_NO_DIR, 0, alloc_mtr, &mtr);
if (UNIV_UNLIKELY(block == NULL)) {
-
mtr_commit(&mtr);
+ error = DB_OUT_OF_FILE_SPACE;
+ goto func_exit;
+ }
- if (UNIV_LIKELY_NULL(page_zip)) {
- deflateEnd(&c_stream);
- mem_heap_free(heap);
- }
-
- return(DB_OUT_OF_FILE_SPACE);
+ if (rw_lock_get_x_lock_count(&block->lock) > 1) {
+ /* This page must have been freed in
+ btr_mtr previously. Put it aside, and
+ allocate another page for the BLOB data. */
+ ut_ad(alloc_mtr == btr_mtr);
+ ut_ad(btr_blob_op_is_update(op));
+ ut_ad(n_freed_pages < btr_mtr->n_freed_pages);
+ freed_pages[n_freed_pages++] = block;
+ goto alloc_another;
}
page_no = buf_block_get_page_no(block);
@@ -4264,7 +4334,7 @@ btr_store_big_rec_extern_fields_func(
SYNC_EXTERN_STORAGE);
prev_page = buf_block_get_frame(prev_block);
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
mlog_write_ulint(
prev_page + FIL_PAGE_NEXT,
page_no, MLOG_4BYTES, &mtr);
@@ -4281,7 +4351,7 @@ btr_store_big_rec_extern_fields_func(
}
- if (UNIV_LIKELY_NULL(page_zip)) {
+ if (page_zip) {
int err;
page_zip_des_t* blob_page_zip;
@@ -4364,11 +4434,15 @@ btr_store_big_rec_extern_fields_func(
goto next_zip_page;
}
- rec_block = buf_page_get(space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(rec_block,
- SYNC_NO_ORDER_CHECK);
+ if (alloc_mtr == &mtr) {
+ rec_block = buf_page_get(
+ space_id, zip_size,
+ rec_page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(
+ rec_block,
+ SYNC_NO_ORDER_CHECK);
+ }
if (err == Z_STREAM_END) {
mach_write_to_4(field_ref
@@ -4402,7 +4476,8 @@ btr_store_big_rec_extern_fields_func(
page_zip_write_blob_ptr(
page_zip, rec, index, offsets,
- big_rec_vec->fields[i].field_no, &mtr);
+ big_rec_vec->fields[i].field_no,
+ alloc_mtr);
next_zip_page:
prev_page_no = page_no;
@@ -4447,19 +4522,23 @@ next_zip_page:
extern_len -= store_len;
- rec_block = buf_page_get(space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(rec_block,
- SYNC_NO_ORDER_CHECK);
+ if (alloc_mtr == &mtr) {
+ rec_block = buf_page_get(
+ space_id, zip_size,
+ rec_page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(
+ rec_block,
+ SYNC_NO_ORDER_CHECK);
+ }
mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES, alloc_mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_LEN + 4,
big_rec_vec->fields[i].len
- extern_len,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES, alloc_mtr);
if (prev_page_no == FIL_NULL) {
btr_blob_dbg_add_blob(
@@ -4469,18 +4548,19 @@ next_zip_page:
mlog_write_ulint(field_ref
+ BTR_EXTERN_SPACE_ID,
- space_id,
- MLOG_4BYTES, &mtr);
+ space_id, MLOG_4BYTES,
+ alloc_mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_PAGE_NO,
- page_no,
- MLOG_4BYTES, &mtr);
+ page_no, MLOG_4BYTES,
+ alloc_mtr);
mlog_write_ulint(field_ref
+ BTR_EXTERN_OFFSET,
FIL_PAGE_DATA,
- MLOG_4BYTES, &mtr);
+ MLOG_4BYTES,
+ alloc_mtr);
}
prev_page_no = page_no;
@@ -4494,8 +4574,23 @@ next_zip_page:
}
}
- if (UNIV_LIKELY_NULL(page_zip)) {
+func_exit:
+ if (page_zip) {
deflateEnd(&c_stream);
+ }
+
+ if (n_freed_pages) {
+ ulint i;
+
+ ut_ad(alloc_mtr == btr_mtr);
+ ut_ad(btr_blob_op_is_update(op));
+
+ for (i = 0; i < n_freed_pages; i++) {
+ btr_page_free_low(index, freed_pages[i], 0, alloc_mtr);
+ }
+ }
+
+ if (heap != NULL) {
mem_heap_free(heap);
}
@@ -4516,7 +4611,7 @@ next_zip_page:
ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
}
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- return(DB_SUCCESS);
+ return(error);
}
/*******************************************************************//**
@@ -4719,7 +4814,7 @@ btr_free_externally_stored_field(
btr_page_free_low(index, ext_block, 0, &mtr);
- if (UNIV_LIKELY(page_zip != NULL)) {
+ if (page_zip) {
mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
next_page_no);
mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,