summaryrefslogtreecommitdiff
path: root/innobase/btr/btr0cur.c
diff options
context:
space:
mode:
authorunknown <heikki@donna.mysql.fi>2001-08-04 19:36:14 +0300
committerunknown <heikki@donna.mysql.fi>2001-08-04 19:36:14 +0300
commit88c80bb9996c7d5055beb3e6b47dfddbe48a4ac7 (patch)
treee6dc89cb458f496f2b93e907afb60d3cd886cc18 /innobase/btr/btr0cur.c
parent8857a2352e4bfba4a6b58bbf4b2ac31869703155 (diff)
downloadmariadb-git-88c80bb9996c7d5055beb3e6b47dfddbe48a4ac7.tar.gz
srv0srv.h Support raw disk partitions as data files
srv0start.c Support raw disk partitions as data files srv0srv.c Support raw disk partitions as data files row0purge.c < 4 GB rows, doublewrite, hang fixes row0row.c < 4 GB rows, doublewrite, hang fixes row0sel.c < 4 GB rows, doublewrite, hang fixes row0uins.c < 4 GB rows, doublewrite, hang fixes row0umod.c < 4 GB rows, doublewrite, hang fixes row0undo.c < 4 GB rows, doublewrite, hang fixes row0upd.c < 4 GB rows, doublewrite, hang fixes srv0srv.c < 4 GB rows, doublewrite, hang fixes srv0start.c < 4 GB rows, doublewrite, hang fixes sync0rw.c < 4 GB rows, doublewrite, hang fixes sync0sync.c < 4 GB rows, doublewrite, hang fixes trx0purge.c < 4 GB rows, doublewrite, hang fixes trx0rec.c < 4 GB rows, doublewrite, hang fixes trx0sys.c < 4 GB rows, doublewrite, hang fixes btr0btr.c < 4 GB rows, doublewrite, hang fixes btr0cur.c < 4 GB rows, doublewrite, hang fixes buf0buf.c < 4 GB rows, doublewrite, hang fixes buf0flu.c < 4 GB rows, doublewrite, hang fixes buf0rea.c < 4 GB rows, doublewrite, hang fixes data0data.c < 4 GB rows, doublewrite, hang fixes fil0fil.c < 4 GB rows, doublewrite, hang fixes fsp0fsp.c < 4 GB rows, doublewrite, hang fixes ibuf0ibuf.c < 4 GB rows, doublewrite, hang fixes lock0lock.c < 4 GB rows, doublewrite, hang fixes log0log.c < 4 GB rows, doublewrite, hang fixes log0recv.c < 4 GB rows, doublewrite, hang fixes os0file.c < 4 GB rows, doublewrite, hang fixes page0cur.c < 4 GB rows, doublewrite, hang fixes pars0pars.c < 4 GB rows, doublewrite, hang fixes rem0cmp.c < 4 GB rows, doublewrite, hang fixes rem0rec.c < 4 GB rows, doublewrite, hang fixes row0ins.c < 4 GB rows, doublewrite, hang fixes row0mysql.c < 4 GB rows, doublewrite, hang fixes univ.i < 4 GB rows, doublewrite, hang fixes data0data.ic < 4 GB rows, doublewrite, hang fixes mach0data.ic < 4 GB rows, doublewrite, hang fixes rem0rec.ic < 4 GB rows, doublewrite, hang fixes row0upd.ic < 4 GB rows, doublewrite, hang fixes trx0rec.ic < 4 GB rows, doublewrite, hang fixes rem0cmp.h < 4 GB rows, doublewrite, hang fixes rem0rec.h < 4 GB rows, doublewrite, hang fixes row0ins.h < 4 GB rows, doublewrite, hang fixes row0mysql.h < 4 GB rows, doublewrite, hang fixes row0row.h < 4 GB rows, doublewrite, hang fixes row0upd.h < 4 GB rows, doublewrite, hang fixes srv0srv.h < 4 GB rows, doublewrite, hang fixes sync0sync.h < 4 GB rows, doublewrite, hang fixes trx0rec.h < 4 GB rows, doublewrite, hang fixes trx0sys.h < 4 GB rows, doublewrite, hang fixes trx0types.h < 4 GB rows, doublewrite, hang fixes trx0undo.h < 4 GB rows, doublewrite, hang fixes ut0dbg.h < 4 GB rows, doublewrite, hang fixes ut0ut.h < 4 GB rows, doublewrite, hang fixes btr0btr.h < 4 GB rows, doublewrite, hang fixes btr0cur.h < 4 GB rows, doublewrite, hang fixes buf0buf.h < 4 GB rows, doublewrite, hang fixes buf0flu.h < 4 GB rows, doublewrite, hang fixes data0data.h < 4 GB rows, doublewrite, hang fixes dict0mem.h < 4 GB rows, doublewrite, hang fixes fil0fil.h < 4 GB rows, doublewrite, hang fixes fsp0fsp.h < 4 GB rows, doublewrite, hang fixes os0file.h < 4 GB rows, doublewrite, hang fixes innobase/include/btr0btr.h: < 4 GB rows, doublewrite, hang fixes innobase/include/btr0cur.h: < 4 GB rows, doublewrite, hang fixes innobase/include/buf0buf.h: < 4 GB rows, doublewrite, hang fixes innobase/include/buf0flu.h: < 4 GB rows, doublewrite, hang fixes innobase/include/data0data.h: < 4 GB rows, doublewrite, hang fixes innobase/include/dict0mem.h: < 4 GB rows, doublewrite, hang fixes innobase/include/fil0fil.h: < 4 GB rows, doublewrite, hang fixes innobase/include/fsp0fsp.h: < 4 GB rows, doublewrite, hang fixes innobase/include/os0file.h: < 4 GB rows, doublewrite, hang fixes innobase/include/rem0cmp.h: < 4 GB rows, doublewrite, hang fixes innobase/include/rem0rec.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0ins.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0mysql.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0row.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0upd.h: < 4 GB rows, doublewrite, hang fixes innobase/include/sync0sync.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0rec.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0sys.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0types.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0undo.h: < 4 GB rows, doublewrite, hang fixes innobase/include/ut0dbg.h: < 4 GB rows, doublewrite, hang fixes innobase/include/ut0ut.h: < 4 GB rows, doublewrite, hang fixes innobase/include/data0data.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/mach0data.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/rem0rec.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/row0upd.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0rec.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/univ.i: < 4 GB rows, doublewrite, hang fixes innobase/btr/btr0btr.c: < 4 GB rows, doublewrite, hang fixes innobase/btr/btr0cur.c: < 4 GB rows, doublewrite, hang fixes innobase/buf/buf0buf.c: < 4 GB rows, doublewrite, hang fixes innobase/buf/buf0flu.c: < 4 GB rows, doublewrite, hang fixes innobase/buf/buf0rea.c: < 4 GB rows, doublewrite, hang fixes innobase/data/data0data.c: < 4 GB rows, doublewrite, hang fixes innobase/fil/fil0fil.c: < 4 GB rows, doublewrite, hang fixes innobase/fsp/fsp0fsp.c: < 4 GB rows, doublewrite, hang fixes innobase/ibuf/ibuf0ibuf.c: < 4 GB rows, doublewrite, hang fixes innobase/lock/lock0lock.c: < 4 GB rows, doublewrite, hang fixes innobase/log/log0log.c: < 4 GB rows, doublewrite, hang fixes innobase/log/log0recv.c: < 4 GB rows, doublewrite, hang fixes innobase/os/os0file.c: < 4 GB rows, doublewrite, hang fixes innobase/page/page0cur.c: < 4 GB rows, doublewrite, hang fixes innobase/pars/pars0pars.c: < 4 GB rows, doublewrite, hang fixes innobase/rem/rem0cmp.c: < 4 GB rows, doublewrite, hang fixes innobase/rem/rem0rec.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0ins.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0mysql.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0purge.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0row.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0sel.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0uins.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0umod.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0undo.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0upd.c: < 4 GB rows, doublewrite, hang fixes innobase/sync/sync0rw.c: < 4 GB rows, doublewrite, hang fixes innobase/sync/sync0sync.c: < 4 GB rows, doublewrite, hang fixes innobase/trx/trx0purge.c: < 4 GB rows, doublewrite, hang fixes innobase/trx/trx0rec.c: < 4 GB rows, doublewrite, hang fixes innobase/trx/trx0sys.c: < 4 GB rows, doublewrite, hang fixes innobase/srv/srv0srv.c: Support raw disk partitions as data files innobase/srv/srv0start.c: Support raw disk partitions as data files innobase/include/srv0srv.h: Support raw disk partitions as data files
Diffstat (limited to 'innobase/btr/btr0cur.c')
-rw-r--r--innobase/btr/btr0cur.c820
1 files changed, 790 insertions, 30 deletions
diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
index a8680c6b380..e8ff88c6f4f 100644
--- a/innobase/btr/btr0cur.c
+++ b/innobase/btr/btr0cur.c
@@ -12,7 +12,7 @@ many pages in the tablespace before we start the operation, because
if leaf splitting has been started, it is difficult to undo, except
by crashing the database and doing a roll-forward.
-(c) 1994-1996 Innobase Oy
+(c) 1994-2001 Innobase Oy
Created 10/16/1994 Heikki Tuuri
*******************************************************/
@@ -49,6 +49,15 @@ can be released by page reorganize, then it is reorganized */
this many index pages */
#define BTR_KEY_VAL_ESTIMATE_N_PAGES 8
+/* The structure of a BLOB part header */
+/*--------------------------------------*/
+#define BTR_BLOB_HDR_PART_LEN 0 /* BLOB part len on this
+ page */
+#define BTR_BLOB_HDR_NEXT_PAGE_NO 4 /* next BLOB part page no,
+ FIL_NULL if none */
+/*--------------------------------------*/
+#define BTR_BLOB_HDR_SIZE 8
+
/***********************************************************************
Adds path information to the cursor for the current page, for which
the binary search has been performed. */
@@ -60,6 +69,19 @@ btr_cur_add_path_info(
ulint height, /* in: height of the page in tree;
0 means leaf node */
ulint root_height); /* in: root node height in tree */
+/***************************************************************
+Frees the externally stored fields for a record, if the field is mentioned
+in the update vector. */
+static
+void
+btr_rec_free_updated_extern_fields(
+/*===============================*/
+ dict_index_t* index, /* in: index of rec; the index tree MUST be
+ X-latched */
+ rec_t* rec, /* in: record */
+ upd_t* update, /* in: update vector */
+ mtr_t* mtr); /* in: mini-transaction handle which contains
+ an X-latch to record page and to the tree */
/*==================== B-TREE SEARCH =========================*/
@@ -745,9 +767,13 @@ btr_cur_optimistic_insert(
dtuple_t* entry, /* in: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
+ big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ be stored externally by the caller, or
+ NULL */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr) /* in: mtr */
{
+ big_rec_t* big_rec_vec = NULL;
dict_index_t* index;
page_cur_t* page_cursor;
page_t* page;
@@ -764,6 +790,8 @@ btr_cur_optimistic_insert(
ut_ad(dtuple_check_typed(entry));
+ *big_rec = NULL;
+
page = btr_cur_get_page(cursor);
index = cursor->index;
@@ -772,15 +800,27 @@ btr_cur_optimistic_insert(
max_size = page_get_max_insert_size_after_reorganize(page, 1);
level = btr_page_get_level(page, mtr);
+calculate_sizes_again:
/* Calculate the record size when entry is converted to a record */
data_size = dtuple_get_data_size(entry);
extra_size = rec_get_converted_extra_size(data_size,
dtuple_get_n_fields(entry));
rec_size = data_size + extra_size;
- if (rec_size >= page_get_free_space_of_empty() / 2) {
+ if ((rec_size >= page_get_free_space_of_empty() / 2)
+ || (rec_size >= REC_MAX_DATA_SIZE)) {
+
+ /* The record is so big that we have to store some fields
+ externally on separate database pages */
+
+ big_rec_vec = dtuple_convert_big_rec(index, entry);
+
+ if (big_rec_vec == NULL) {
+
+ return(DB_TOO_BIG_RECORD);
+ }
- return(DB_TOO_BIG_RECORD);
+ goto calculate_sizes_again;
}
/* If there have been many consecutive inserts, and we are on the leaf
@@ -795,7 +835,11 @@ btr_cur_optimistic_insert(
&& (0 == level)
&& (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
|| btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
-
+
+ if (big_rec_vec) {
+ dtuple_convert_back_big_rec(index, entry, big_rec_vec);
+ }
+
return(DB_FAIL);
}
@@ -804,6 +848,9 @@ btr_cur_optimistic_insert(
|| (page_get_max_insert_size(page, 1) >= rec_size)
|| (page_get_n_recs(page) <= 1))) {
+ if (big_rec_vec) {
+ dtuple_convert_back_big_rec(index, entry, big_rec_vec);
+ }
return(DB_FAIL);
}
@@ -812,6 +859,9 @@ btr_cur_optimistic_insert(
if (err != DB_SUCCESS) {
+ if (big_rec_vec) {
+ dtuple_convert_back_big_rec(index, entry, big_rec_vec);
+ }
return(err);
}
@@ -835,6 +885,19 @@ btr_cur_optimistic_insert(
*rec = page_cur_tuple_insert(page_cursor, entry, mtr);
+ if (!(*rec)) {
+ char* err_buf = mem_alloc(1000);
+
+ dtuple_sprintf(err_buf, 900, entry);
+
+ fprintf(stderr,
+ "InnoDB: Error: cannot insert tuple %s to index %s of table %s\n"
+ "InnoDB: max insert size %lu\n",
+ err_buf, index->name, index->table->name, max_size);
+
+ mem_free(err_buf);
+ }
+
ut_a(*rec); /* <- We calculated above the record would fit */
}
@@ -845,6 +908,7 @@ btr_cur_optimistic_insert(
btr_search_update_hash_on_insert(cursor);
}
#endif
+
if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
lock_update_insert(*rec);
@@ -860,6 +924,8 @@ btr_cur_optimistic_insert(
rec_size + PAGE_DIR_SLOT_SIZE);
}
+ *big_rec = big_rec_vec;
+
return(DB_SUCCESS);
}
@@ -884,17 +950,24 @@ btr_cur_pessimistic_insert(
dtuple_t* entry, /* in: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
+ big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ be stored externally by the caller, or
+ NULL */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr) /* in: mtr */
{
- page_t* page;
- ulint err;
- ibool dummy_inh;
- ibool success;
- ulint n_extents = 0;
+ dict_index_t* index = cursor->index;
+ big_rec_t* big_rec_vec = NULL;
+ page_t* page;
+ ulint err;
+ ibool dummy_inh;
+ ibool success;
+ ulint n_extents = 0;
ut_ad(dtuple_check_typed(entry));
+ *big_rec = NULL;
+
page = btr_cur_get_page(cursor);
ut_ad(mtr_memo_contains(mtr,
@@ -908,8 +981,8 @@ btr_cur_pessimistic_insert(
cursor->flag = BTR_CUR_BINARY;
- err = btr_cur_optimistic_insert(flags, cursor, entry, rec, thr, mtr);
-
+ err = btr_cur_optimistic_insert(flags, cursor, entry, rec, big_rec,
+ thr, mtr);
if (err != DB_FAIL) {
return(err);
@@ -932,7 +1005,7 @@ btr_cur_pessimistic_insert(
n_extents = cursor->tree_height / 16 + 3;
- success = fsp_reserve_free_extents(cursor->index->space,
+ success = fsp_reserve_free_extents(index->space,
n_extents, FSP_NORMAL, mtr);
if (!success) {
err = DB_OUT_OF_FILE_SPACE;
@@ -941,7 +1014,22 @@ btr_cur_pessimistic_insert(
}
}
- if (dict_tree_get_page(cursor->index->tree)
+ if ((rec_get_converted_size(entry)
+ >= page_get_free_space_of_empty() / 2)
+ || (rec_get_converted_size(entry) >= REC_MAX_DATA_SIZE)) {
+
+ /* The record is so big that we have to store some fields
+ externally on separate database pages */
+
+ big_rec_vec = dtuple_convert_big_rec(index, entry);
+
+ if (big_rec_vec == NULL) {
+
+ return(DB_TOO_BIG_RECORD);
+ }
+ }
+
+ if (dict_tree_get_page(index->tree)
== buf_frame_get_page_no(page)) {
/* The page is the root page */
@@ -950,7 +1038,7 @@ btr_cur_pessimistic_insert(
*rec = btr_page_split_and_insert(cursor, entry, mtr);
}
- btr_cur_position(cursor->index, page_rec_get_prev(*rec), cursor);
+ btr_cur_position(index, page_rec_get_prev(*rec), cursor);
#ifdef BTR_CUR_ADAPT
btr_search_update_hash_on_insert(cursor);
@@ -963,9 +1051,11 @@ btr_cur_pessimistic_insert(
err = DB_SUCCESS;
if (n_extents > 0) {
- fil_space_release_free_extents(cursor->index->space, n_extents);
+ fil_space_release_free_extents(index->space, n_extents);
}
-
+
+ *big_rec = big_rec_vec;
+
return(err);
}
@@ -1227,7 +1317,8 @@ btr_cur_optimistic_update(
dulint roll_ptr;
trx_t* trx;
mem_heap_t* heap;
- ibool reorganized = FALSE;
+ ibool reorganized = FALSE;
+ ulint i;
/* Only clustered index records are updated using this function */
ut_ad((cursor->index)->type & DICT_CLUSTERED);
@@ -1247,6 +1338,23 @@ btr_cur_optimistic_update(
cmpl_info, thr, mtr));
}
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+ if (upd_get_nth_field(update, i)->extern_storage) {
+
+ /* Externally stored fields are treated in pessimistic
+ update */
+
+ return(DB_OVERFLOW);
+ }
+ }
+
+ if (rec_contains_externally_stored_field(btr_cur_get_rec(cursor))) {
+ /* Externally stored fields are treated in pessimistic
+ update */
+
+ return(DB_OVERFLOW);
+ }
+
page_cursor = btr_cur_get_page_cur(cursor);
heap = mem_heap_create(1024);
@@ -1260,9 +1368,9 @@ btr_cur_optimistic_update(
if (new_rec_size >= page_get_free_space_of_empty() / 2) {
- mem_heap_free(heap);
+ mem_heap_free(heap);
- return(DB_TOO_BIG_RECORD);
+ return(DB_OVERFLOW);
}
max_size = old_rec_size
@@ -1377,6 +1485,48 @@ btr_cur_pess_upd_restore_supremum(
rec);
}
+/***************************************************************
+Replaces and copies the data in the new column values stored in the
+update vector to the clustered index entry given. */
+static
+void
+btr_cur_copy_new_col_vals(
+/*======================*/
+ dtuple_t* entry, /* in/out: index entry where replaced */
+ upd_t* update, /* in: update vector */
+ mem_heap_t* heap) /* in: heap where data is copied */
+{
+ upd_field_t* upd_field;
+ dfield_t* dfield;
+ dfield_t* new_val;
+ ulint field_no;
+ byte* data;
+ ulint i;
+
+ dtuple_set_info_bits(entry, update->info_bits);
+
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+
+ upd_field = upd_get_nth_field(update, i);
+
+ field_no = upd_field->field_no;
+
+ dfield = dtuple_get_nth_field(entry, field_no);
+
+ new_val = &(upd_field->new_val);
+
+ if (new_val->len == UNIV_SQL_NULL) {
+ data = NULL;
+ } else {
+ data = mem_heap_alloc(heap, new_val->len);
+
+ ut_memcpy(data, new_val->data, new_val->len);
+ }
+
+ dfield_set_data(dfield, data, new_val->len);
+ }
+}
+
/*****************************************************************
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
@@ -1389,8 +1539,9 @@ btr_cur_pessimistic_update(
/* out: DB_SUCCESS or error code */
ulint flags, /* in: undo logging, locking, and rollback
flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
- cursor does not stay valid */
+ btr_cur_t* cursor, /* in: cursor on the record to update */
+ big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ be stored externally by the caller, or NULL */
upd_t* update, /* in: update vector; this is allowed also
contain trx id and roll ptr fields, but
the values in update vector have no effect */
@@ -1399,6 +1550,8 @@ btr_cur_pessimistic_update(
que_thr_t* thr, /* in: query thread */
mtr_t* mtr) /* in: mtr */
{
+ big_rec_t* big_rec_vec = NULL;
+ big_rec_t* dummy_big_rec;
dict_index_t* index;
page_t* page;
dict_tree_t* tree;
@@ -1414,6 +1567,11 @@ btr_cur_pessimistic_update(
ibool was_first;
ibool success;
ulint n_extents = 0;
+ ulint* ext_vect;
+ ulint n_ext_vect;
+ ulint reserve_flag;
+
+ *big_rec = NULL;
page = btr_cur_get_page(cursor);
rec = btr_cur_get_rec(cursor);
@@ -1449,8 +1607,14 @@ btr_cur_pessimistic_update(
n_extents = cursor->tree_height / 16 + 3;
+ if (flags & BTR_NO_UNDO_LOG_FLAG) {
+ reserve_flag = FSP_CLEANING;
+ } else {
+ reserve_flag = FSP_NORMAL;
+ }
+
success = fsp_reserve_free_extents(cursor->index->space,
- n_extents, FSP_NORMAL, mtr);
+ n_extents, reserve_flag, mtr);
if (!success) {
err = DB_OUT_OF_FILE_SPACE;
@@ -1464,7 +1628,7 @@ btr_cur_pessimistic_update(
new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
- row_upd_clust_index_replace_new_col_vals(new_entry, update);
+ btr_cur_copy_new_col_vals(new_entry, update, heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
@@ -1487,17 +1651,49 @@ btr_cur_pessimistic_update(
lock_rec_store_on_page_infimum(rec);
btr_search_update_hash_on_delete(cursor);
+
+ if (flags & BTR_NO_UNDO_LOG_FLAG) {
+ /* We are in a transaction rollback undoing a row
+ update: we must free possible externally stored fields
+ which got new values in the update */
+
+ ut_a(big_rec_vec == NULL);
+
+ btr_rec_free_updated_extern_fields(index, rec, update, mtr);
+ }
+
+ /* We have to set appropriate extern storage bits in the new
+ record to be inserted: we have to remember which fields were such */
+
+ ext_vect = mem_heap_alloc(heap, sizeof(ulint) * rec_get_n_fields(rec));
+ n_ext_vect = btr_push_update_extern_fields(ext_vect, rec, update);
+
page_cur_delete_rec(page_cursor, mtr);
page_cur_move_to_prev(page_cursor);
- if (optim_err == DB_UNDERFLOW) {
- rec = btr_cur_insert_if_possible(cursor, new_entry,
+ if ((rec_get_converted_size(new_entry) >=
+ page_get_free_space_of_empty() / 2)
+ || (rec_get_converted_size(new_entry) >= REC_MAX_DATA_SIZE)) {
+
+ big_rec_vec = dtuple_convert_big_rec(index, new_entry);
+
+ if (big_rec_vec == NULL) {
+
+ mem_heap_free(heap);
+
+ goto return_after_reservations;
+ }
+ }
+
+ rec = btr_cur_insert_if_possible(cursor, new_entry,
&dummy_reorganized, mtr);
- ut_a(rec); /* <- We knew the insert would fit */
+ ut_a(rec || optim_err != DB_UNDERFLOW);
+ if (rec) {
lock_rec_restore_from_page_infimum(rec, page);
-
+ rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr);
+
btr_cur_compress_if_useful(cursor, mtr);
err = DB_SUCCESS;
@@ -1521,9 +1717,13 @@ btr_cur_pessimistic_update(
err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
| BTR_NO_LOCKING_FLAG
| BTR_KEEP_SYS_FLAG,
- cursor, new_entry, &rec, NULL, mtr);
+ cursor, new_entry, &rec,
+ &dummy_big_rec, NULL, mtr);
ut_a(rec);
ut_a(err == DB_SUCCESS);
+ ut_a(dummy_big_rec == NULL);
+
+ rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr);
lock_rec_restore_from_page_infimum(rec, page);
@@ -1541,9 +1741,12 @@ btr_cur_pessimistic_update(
return_after_reservations:
if (n_extents > 0) {
- fil_space_release_free_extents(cursor->index->space, n_extents);
+ fil_space_release_free_extents(cursor->index->space,
+ n_extents);
}
+ *big_rec = big_rec_vec;
+
return(err);
}
@@ -1932,6 +2135,11 @@ btr_cur_optimistic_delete(
ut_ad(btr_page_get_level(page, mtr) == 0);
+ if (rec_contains_externally_stored_field(btr_cur_get_rec(cursor))) {
+
+ return(FALSE);
+ }
+
if (btr_cur_can_delete_without_compress(cursor, mtr)) {
lock_update_delete(btr_cur_get_rec(cursor));
@@ -2009,6 +2217,8 @@ btr_cur_pessimistic_delete(
}
}
+ btr_rec_free_externally_stored_fields(cursor->index,
+ btr_cur_get_rec(cursor), mtr);
if ((page_get_n_recs(page) < 2)
&& (dict_tree_get_page(btr_cur_get_tree(cursor))
!= buf_frame_get_page_no(page))) {
@@ -2079,7 +2289,7 @@ return_after_reservations:
fil_space_release_free_extents(cursor->index->space, n_extents);
}
- return(ret);
+ return(ret);
}
/***********************************************************************
@@ -2292,3 +2502,553 @@ btr_estimate_number_of_different_key_vals(
return(index->table->stat_n_rows / (total_n_recs / n_diff));
}
+
+/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
+
+/***********************************************************************
+Stores the positions of the fields marked as extern storage in the update
+vector, and also those fields who are marked as extern storage in rec
+and not mentioned in updated fields. We use this function to remember
+which fields we must mark as extern storage in a record inserted for an
+update. */
+
+ulint
+btr_push_update_extern_fields(
+/*==========================*/
+ /* out: number of values stored in ext_vect */
+ ulint* ext_vect, /* in: array of ulints, must be preallocated
+ to have space for all fields in rec */
+ rec_t* rec, /* in: record */
+ upd_t* update) /* in: update vector or NULL */
+{
+ ulint n_pushed = 0;
+ ibool is_updated;
+ ulint n;
+ ulint j;
+ ulint i;
+
+ if (update) {
+ n = upd_get_n_fields(update);
+
+ for (i = 0; i < n; i++) {
+
+ if (upd_get_nth_field(update, i)->extern_storage) {
+
+ ext_vect[n_pushed] =
+ upd_get_nth_field(update, i)->field_no;
+
+ n_pushed++;
+ }
+ }
+ }
+
+ n = rec_get_n_fields(rec);
+
+ for (i = 0; i < n; i++) {
+ if (rec_get_nth_field_extern_bit(rec, i)) {
+
+ /* Check it is not in updated fields */
+ is_updated = FALSE;
+
+ if (update) {
+ for (j = 0; j < upd_get_n_fields(update);
+ j++) {
+ if (upd_get_nth_field(update, j)
+ ->field_no == i) {
+ is_updated = TRUE;
+ }
+ }
+ }
+
+ if (!is_updated) {
+ ext_vect[n_pushed] = i;
+ n_pushed++;
+ }
+ }
+ }
+
+ return(n_pushed);
+}
+
+/***********************************************************************
+Returns the length of a BLOB part stored on the header page. */
+static
+ulint
+btr_blob_get_part_len(
+/*==================*/
+ /* out: part length */
+ byte* blob_header) /* in: blob header */
+{
+ return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
+}
+
+/***********************************************************************
+Returns the page number where the next BLOB part is stored. */
+static
+ulint
+btr_blob_get_next_page_no(
+/*======================*/
+ /* out: page number or FIL_NULL if
+ no more pages */
+ byte* blob_header) /* in: blob header */
+{
+ return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
+}
+
+/***********************************************************************
+Stores the fields in big_rec_vec to the tablespace and puts pointers to
+them in rec. The fields are stored on pages allocated from leaf node
+file segment of the index tree. */
+
+ulint
+btr_store_big_rec_extern_fields(
+/*============================*/
+ /* out: DB_SUCCESS or error */
+ dict_index_t* index, /* in: index of rec; the index tree
+ MUST be X-latched */
+ rec_t* rec, /* in: record */
+ big_rec_t* big_rec_vec, /* in: vector containing fields
+ to be stored externally */
+ mtr_t* local_mtr) /* in: mtr containing the latch to
+ rec and to the tree */
+{
+ byte* data;
+ ulint local_len;
+ ulint extern_len;
+ ulint store_len;
+ ulint page_no;
+ page_t* page;
+ ulint space_id;
+ page_t* prev_page;
+ page_t* rec_page;
+ ulint prev_page_no;
+ ulint hint_page_no;
+ ulint i;
+ mtr_t mtr;
+
+ ut_ad(mtr_memo_contains(local_mtr, dict_tree_get_lock(index->tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_a(index->type & DICT_CLUSTERED);
+
+ space_id = buf_frame_get_space_id(rec);
+
+ /* We have to create a file segment to the tablespace
+ for each field and put the pointer to the field in rec */
+
+ for (i = 0; i < big_rec_vec->n_fields; i++) {
+
+ data = rec_get_nth_field(rec, big_rec_vec->fields[i].field_no,
+ &local_len);
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+ extern_len = big_rec_vec->fields[i].len;
+
+ ut_a(extern_len > 0);
+
+ prev_page_no = FIL_NULL;
+
+ while (extern_len > 0) {
+ mtr_start(&mtr);
+
+ if (prev_page_no == FIL_NULL) {
+ hint_page_no = buf_frame_get_page_no(rec) + 1;
+ } else {
+ hint_page_no = prev_page_no + 1;
+ }
+
+ page = btr_page_alloc(index->tree, hint_page_no,
+ FSP_NO_DIR, 0, &mtr);
+ if (page == NULL) {
+
+ mtr_commit(&mtr);
+
+ return(DB_OUT_OF_FILE_SPACE);
+ }
+
+ page_no = buf_frame_get_page_no(page);
+
+ if (prev_page_no != FIL_NULL) {
+ prev_page = buf_page_get(space_id,
+ prev_page_no,
+ RW_X_LATCH, &mtr);
+
+ buf_page_dbg_add_level(prev_page,
+ SYNC_EXTERN_STORAGE);
+
+ mlog_write_ulint(prev_page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_NEXT_PAGE_NO,
+ page_no, MLOG_4BYTES, &mtr);
+ }
+
+ if (extern_len > (UNIV_PAGE_SIZE - FIL_PAGE_DATA
+ - BTR_BLOB_HDR_SIZE
+ - FIL_PAGE_DATA_END)) {
+ store_len = UNIV_PAGE_SIZE - FIL_PAGE_DATA
+ - BTR_BLOB_HDR_SIZE
+ - FIL_PAGE_DATA_END;
+ } else {
+ store_len = extern_len;
+ }
+
+ mlog_write_string(page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_SIZE,
+ big_rec_vec->fields[i].data
+ + big_rec_vec->fields[i].len
+ - extern_len,
+ store_len, &mtr);
+ mlog_write_ulint(page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_PART_LEN,
+ store_len, MLOG_4BYTES, &mtr);
+ mlog_write_ulint(page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_NEXT_PAGE_NO,
+ FIL_NULL, MLOG_4BYTES, &mtr);
+
+ extern_len -= store_len;
+
+ rec_page = buf_page_get(space_id,
+ buf_frame_get_page_no(data),
+ RW_X_LATCH, &mtr);
+
+ buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
+
+ mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
+ big_rec_vec->fields[i].len
+ - extern_len,
+ MLOG_4BYTES, &mtr);
+
+ if (prev_page_no == FIL_NULL) {
+ mlog_write_ulint(data + local_len
+ + BTR_EXTERN_SPACE_ID,
+ space_id,
+ MLOG_4BYTES, &mtr);
+
+ mlog_write_ulint(data + local_len
+ + BTR_EXTERN_PAGE_NO,
+ page_no,
+ MLOG_4BYTES, &mtr);
+
+ mlog_write_ulint(data + local_len
+ + BTR_EXTERN_OFFSET,
+ FIL_PAGE_DATA,
+ MLOG_4BYTES, &mtr);
+
+ /* Set the bit denoting that this field
+ in rec is stored externally */
+
+ rec_set_nth_field_extern_bit(rec,
+ big_rec_vec->fields[i].field_no,
+ TRUE, &mtr);
+ }
+
+ prev_page_no = page_no;
+
+ mtr_commit(&mtr);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/***********************************************************************
+Frees the space in an externally stored field to the file space
+management. */
+
+void
+btr_free_externally_stored_field(
+/*=============================*/
+ dict_index_t* index, /* in: index of the data, the index
+ tree MUST be X-latched */
+ byte* data, /* in: internally stored data
+ + reference to the externally
+ stored part */
+ ulint local_len, /* in: length of data */
+ mtr_t* local_mtr) /* in: mtr containing the latch to
+ data an an X-latch to the index
+ tree */
+{
+ page_t* page;
+ page_t* rec_page;
+ ulint space_id;
+ ulint page_no;
+ ulint offset;
+ ulint extern_len;
+ ulint next_page_no;
+ ulint part_len;
+ mtr_t mtr;
+
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+ ut_ad(mtr_memo_contains(local_mtr, dict_tree_get_lock(index->tree),
+ MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
+ MTR_MEMO_PAGE_X_FIX));
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+ for (;;) {
+ mtr_start(&mtr);
+
+ rec_page = buf_page_get(buf_frame_get_space_id(data),
+ buf_frame_get_page_no(data), RW_X_LATCH, &mtr);
+
+ buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
+
+ space_id = mach_read_from_4(data + local_len
+ + BTR_EXTERN_SPACE_ID);
+
+ page_no = mach_read_from_4(data + local_len
+ + BTR_EXTERN_PAGE_NO);
+
+ offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
+
+ extern_len = mach_read_from_4(data + local_len
+ + BTR_EXTERN_LEN + 4);
+
+ /* If extern len is 0, then there is no external storage data
+ at all */
+
+ if (extern_len == 0) {
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr);
+
+ buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
+
+ next_page_no = mach_read_from_4(page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_NEXT_PAGE_NO);
+
+ part_len = btr_blob_get_part_len(page + FIL_PAGE_DATA);
+
+ ut_a(extern_len >= part_len);
+
+ /* We must supply the page level (= 0) as an argument
+ because we did not store it on the page (we save the space
+ overhead from an index page header. */
+
+ btr_page_free_low(index->tree, page, 0, &mtr);
+
+ mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO,
+ next_page_no,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
+ extern_len - part_len,
+ MLOG_4BYTES, &mtr);
+ if (next_page_no == FIL_NULL) {
+ ut_a(extern_len - part_len == 0);
+ }
+
+ if (extern_len - part_len == 0) {
+ ut_a(next_page_no == FIL_NULL);
+ }
+
+ mtr_commit(&mtr);
+ }
+}
+
+/***************************************************************
+Frees the externally stored fields for a record. */
+
+void
+btr_rec_free_externally_stored_fields(
+/*==================================*/
+ dict_index_t* index, /* in: index of the data, the index
+ tree MUST be X-latched */
+ rec_t* rec, /* in: record */
+ mtr_t* mtr) /* in: mini-transaction handle which contains
+ an X-latch to record page and to the index
+ tree */
+{
+ ulint n_fields;
+ byte* data;
+ ulint len;
+ ulint i;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
+ MTR_MEMO_PAGE_X_FIX));
+ if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) {
+
+ return;
+ }
+
+ /* Free possible externally stored fields in the record */
+
+ n_fields = rec_get_n_fields(rec);
+
+ for (i = 0; i < n_fields; i++) {
+ if (rec_get_nth_field_extern_bit(rec, i)) {
+
+ data = rec_get_nth_field(rec, i, &len);
+ btr_free_externally_stored_field(index, data, len, mtr);
+ }
+ }
+}
+
+/***************************************************************
+Frees the externally stored fields for a record, if the field is mentioned
+in the update vector. */
+static
+void
+btr_rec_free_updated_extern_fields(
+/*===============================*/
+ dict_index_t* index, /* in: index of rec; the index tree MUST be
+ X-latched */
+ rec_t* rec, /* in: record */
+ upd_t* update, /* in: update vector */
+ mtr_t* mtr) /* in: mini-transaction handle which contains
+ an X-latch to record page and to the tree */
+{
+ upd_field_t* ufield;
+ ulint n_fields;
+ byte* data;
+ ulint len;
+ ulint i;
+
+ ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
+ MTR_MEMO_PAGE_X_FIX));
+ if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) {
+
+ return;
+ }
+
+ /* Free possible externally stored fields in the record */
+
+ n_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_fields; i++) {
+ ufield = upd_get_nth_field(update, i);
+
+ if (rec_get_nth_field_extern_bit(rec, ufield->field_no)) {
+
+ data = rec_get_nth_field(rec, ufield->field_no, &len);
+ btr_free_externally_stored_field(index, data, len, mtr);
+ }
+ }
+}
+
+/***********************************************************************
+Copies an externally stored field of a record to mem heap. Parameter
+data contains a pointer to 'internally' stored part of the field:
+possibly some data, and the reference to the externally stored part in
+the last 20 bytes of data. */
+
+byte*
+btr_copy_externally_stored_field(
+/*=============================*/
+ /* out: the whole field copied to heap */
+ ulint* len, /* out: length of the whole field */
+ byte* data, /* in: 'internally' stored part of the
+ field containing also the reference to
+ the external part */
+ ulint local_len,/* in: length of data */
+ mem_heap_t* heap) /* in: mem heap */
+{
+ page_t* page;
+ ulint space_id;
+ ulint page_no;
+ ulint offset;
+ ulint extern_len;
+ byte* blob_header;
+ ulint part_len;
+ byte* buf;
+ ulint copied_len;
+ mtr_t mtr;
+
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+ space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
+
+ page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
+
+ offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
+
+ /* Currently a BLOB cannot be bigger that 4 GB; we
+ leave the 4 upper bytes in the length field unused */
+
+ extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
+
+ buf = mem_heap_alloc(heap, local_len + extern_len);
+
+ ut_memcpy(buf, data, local_len);
+ copied_len = local_len;
+
+ if (extern_len == 0) {
+ *len = copied_len;
+
+ return(buf);
+ }
+
+ for (;;) {
+ mtr_start(&mtr);
+
+ page = buf_page_get(space_id, page_no, RW_S_LATCH, &mtr);
+
+ buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
+
+ blob_header = page + offset;
+
+ part_len = btr_blob_get_part_len(blob_header);
+
+ ut_memcpy(buf + copied_len, blob_header + BTR_BLOB_HDR_SIZE,
+ part_len);
+ copied_len += part_len;
+
+ page_no = btr_blob_get_next_page_no(blob_header);
+
+ /* On other BLOB pages except the first the BLOB header
+ always is at the page data start: */
+
+ offset = FIL_PAGE_DATA;
+
+ mtr_commit(&mtr);
+
+ if (page_no == FIL_NULL) {
+ ut_a(copied_len == local_len + extern_len);
+
+ *len = copied_len;
+
+ return(buf);
+ }
+
+ ut_a(copied_len < local_len + extern_len);
+ }
+}
+
+/***********************************************************************
+Copies an externally stored field of a record to mem heap. */
+
+byte*
+btr_rec_copy_externally_stored_field(
+/*=================================*/
+ /* out: the field copied to heap */
+ rec_t* rec, /* in: record */
+ ulint no, /* in: field number */
+ ulint* len, /* out: length of the field */
+ mem_heap_t* heap) /* in: mem heap */
+{
+ ulint local_len;
+ byte* data;
+
+ ut_a(rec_get_nth_field_extern_bit(rec, no));
+
+ /* An externally stored field can contain some initial
+ data from the field, and in the last 20 bytes it has the
+ space id, page number, and offset where the rest of the
+ field data is stored, and the data length in addition to
+ the data stored locally. We may need to store some data
+ locally to get the local record length above the 128 byte
+ limit so that field offsets are stored in two bytes, and
+ the extern bit is available in those two bytes. */
+
+ data = rec_get_nth_field(rec, no, &local_len);
+
+ return(btr_copy_externally_stored_field(len, data, local_len, heap));
+}