diff options
Diffstat (limited to 'storage/innobase/row/row0purge.c')
-rw-r--r-- | storage/innobase/row/row0purge.c | 671 |
1 files changed, 671 insertions, 0 deletions
diff --git a/storage/innobase/row/row0purge.c b/storage/innobase/row/row0purge.c new file mode 100644 index 00000000000..5893e016011 --- /dev/null +++ b/storage/innobase/row/row0purge.c @@ -0,0 +1,671 @@ +/****************************************************** +Purge obsolete records + +(c) 1997 Innobase Oy + +Created 3/14/1997 Heikki Tuuri +*******************************************************/ + +#include "row0purge.h" + +#ifdef UNIV_NONINL +#include "row0purge.ic" +#endif + +#include "fsp0fsp.h" +#include "mach0data.h" +#include "trx0rseg.h" +#include "trx0trx.h" +#include "trx0roll.h" +#include "trx0undo.h" +#include "trx0purge.h" +#include "trx0rec.h" +#include "que0que.h" +#include "row0row.h" +#include "row0upd.h" +#include "row0vers.h" +#include "row0mysql.h" +#include "log0log.h" + +/************************************************************************ +Creates a purge node to a query graph. */ + +purge_node_t* +row_purge_node_create( +/*==================*/ + /* out, own: purge node */ + que_thr_t* parent, /* in: parent node, i.e., a thr node */ + mem_heap_t* heap) /* in: memory heap where created */ +{ + purge_node_t* node; + + ut_ad(parent && heap); + + node = mem_heap_alloc(heap, sizeof(purge_node_t)); + + node->common.type = QUE_NODE_PURGE; + node->common.parent = parent; + + node->heap = mem_heap_create(256); + + return(node); +} + +/*************************************************************** +Repositions the pcur in the purge node on the clustered index record, +if found. */ +static +ibool +row_purge_reposition_pcur( +/*======================*/ + /* out: TRUE if the record was found */ + ulint mode, /* in: latching mode */ + purge_node_t* node, /* in: row purge node */ + mtr_t* mtr) /* in: mtr */ +{ + ibool found; + + if (node->found_clust) { + found = btr_pcur_restore_position(mode, &(node->pcur), mtr); + + return(found); + } + + found = row_search_on_row_ref(&(node->pcur), mode, node->table, + node->ref, mtr); + node->found_clust = found; + + if (found) { + btr_pcur_store_position(&(node->pcur), mtr); + } + + return(found); +} + +/*************************************************************** +Removes a delete marked clustered index record if possible. */ +static +ibool +row_purge_remove_clust_if_poss_low( +/*===============================*/ + /* out: TRUE if success, or if not found, or + if modified after the delete marking */ + purge_node_t* node, /* in: row purge node */ + ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ +{ + dict_index_t* index; + btr_pcur_t* pcur; + btr_cur_t* btr_cur; + ibool success; + ulint err; + mtr_t mtr; + rec_t* rec; + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + *offsets_ = (sizeof offsets_) / sizeof *offsets_; + + index = dict_table_get_first_index(node->table); + + pcur = &(node->pcur); + btr_cur = btr_pcur_get_btr_cur(pcur); + + mtr_start(&mtr); + + success = row_purge_reposition_pcur(mode, node, &mtr); + + if (!success) { + /* The record is already removed */ + + btr_pcur_commit_specify_mtr(pcur, &mtr); + + return(TRUE); + } + + rec = btr_pcur_get_rec(pcur); + + if (0 != ut_dulint_cmp(node->roll_ptr, + row_get_rec_roll_ptr(rec, index, rec_get_offsets( + rec, index, offsets_, ULINT_UNDEFINED, &heap)))) { + if (heap) { + mem_heap_free(heap); + } + /* Someone else has modified the record later: do not remove */ + btr_pcur_commit_specify_mtr(pcur, &mtr); + + return(TRUE); + } + + if (heap) { + mem_heap_free(heap); + } + + if (mode == BTR_MODIFY_LEAF) { + success = btr_cur_optimistic_delete(btr_cur, &mtr); + } else { + ut_ad(mode == BTR_MODIFY_TREE); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr); + + if (err == DB_SUCCESS) { + success = TRUE; + } else if (err == DB_OUT_OF_FILE_SPACE) { + success = FALSE; + } else { + ut_error; + } + } + + btr_pcur_commit_specify_mtr(pcur, &mtr); + + return(success); +} + +/*************************************************************** +Removes a clustered index record if it has not been modified after the delete +marking. */ +static +void +row_purge_remove_clust_if_poss( +/*===========================*/ + purge_node_t* node) /* in: row purge node */ +{ + ibool success; + ulint n_tries = 0; + +/* fputs("Purge: Removing clustered record\n", stderr); */ + + success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF); + if (success) { + + return; + } +retry: + success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE); + /* The delete operation may fail if we have little + file space left: TODO: easiest to crash the database + and restart with more file space */ + + if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { + n_tries++; + + os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); + + goto retry; + } + + ut_a(success); +} + +/*************************************************************** +Removes a secondary index entry if possible. */ +static +ibool +row_purge_remove_sec_if_poss_low( +/*=============================*/ + /* out: TRUE if success or if not found */ + purge_node_t* node, /* in: row purge node */ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in: index entry */ + ulint mode) /* in: latch mode BTR_MODIFY_LEAF or + BTR_MODIFY_TREE */ +{ + btr_pcur_t pcur; + btr_cur_t* btr_cur; + ibool success; + ibool old_has = 0; /* remove warning */ + ibool found; + ulint err; + mtr_t mtr; + mtr_t* mtr_vers; + + log_free_check(); + mtr_start(&mtr); + + found = row_search_index_entry(index, entry, mode, &pcur, &mtr); + + if (!found) { + /* Not found */ + + /* fputs("PURGE:........sec entry not found\n", stderr); */ + /* dtuple_print(entry); */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + return(TRUE); + } + + btr_cur = btr_pcur_get_btr_cur(&pcur); + + /* We should remove the index record if no later version of the row, + which cannot be purged yet, requires its existence. If some requires, + we should do nothing. */ + + mtr_vers = mem_alloc(sizeof(mtr_t)); + + mtr_start(mtr_vers); + + success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr_vers); + + if (success) { + old_has = row_vers_old_has_index_entry(TRUE, + btr_pcur_get_rec(&(node->pcur)), + mtr_vers, index, entry); + } + + btr_pcur_commit_specify_mtr(&(node->pcur), mtr_vers); + + mem_free(mtr_vers); + + if (!success || !old_has) { + /* Remove the index record */ + + if (mode == BTR_MODIFY_LEAF) { + success = btr_cur_optimistic_delete(btr_cur, &mtr); + } else { + ut_ad(mode == BTR_MODIFY_TREE); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, + FALSE, &mtr); + if (err == DB_SUCCESS) { + success = TRUE; + } else if (err == DB_OUT_OF_FILE_SPACE) { + success = FALSE; + } else { + ut_error; + } + } + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + return(success); +} + +/*************************************************************** +Removes a secondary index entry if possible. */ +UNIV_INLINE +void +row_purge_remove_sec_if_poss( +/*=========================*/ + purge_node_t* node, /* in: row purge node */ + dict_index_t* index, /* in: index */ + dtuple_t* entry) /* in: index entry */ +{ + ibool success; + ulint n_tries = 0; + +/* fputs("Purge: Removing secondary record\n", stderr); */ + + success = row_purge_remove_sec_if_poss_low(node, index, entry, + BTR_MODIFY_LEAF); + if (success) { + + return; + } +retry: + success = row_purge_remove_sec_if_poss_low(node, index, entry, + BTR_MODIFY_TREE); + /* The delete operation may fail if we have little + file space left: TODO: easiest to crash the database + and restart with more file space */ + + if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { + + n_tries++; + + os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); + + goto retry; + } + + ut_a(success); +} + +/*************************************************************** +Purges a delete marking of a record. */ +static +void +row_purge_del_mark( +/*===============*/ + purge_node_t* node) /* in: row purge node */ +{ + mem_heap_t* heap; + dtuple_t* entry; + dict_index_t* index; + + ut_ad(node); + + heap = mem_heap_create(1024); + + while (node->index != NULL) { + index = node->index; + + /* Build the index entry */ + entry = row_build_index_entry(node->row, index, heap); + + row_purge_remove_sec_if_poss(node, index, entry); + + node->index = dict_table_get_next_index(node->index); + } + + mem_heap_free(heap); + + row_purge_remove_clust_if_poss(node); +} + +/*************************************************************** +Purges an update of an existing record. Also purges an update of a delete +marked record if that record contained an externally stored field. */ +static +void +row_purge_upd_exist_or_extern( +/*==========================*/ + purge_node_t* node) /* in: row purge node */ +{ + mem_heap_t* heap; + dtuple_t* entry; + dict_index_t* index; + upd_field_t* ufield; + ibool is_insert; + ulint rseg_id; + ulint page_no; + ulint offset; + ulint internal_offset; + byte* data_field; + ulint data_field_len; + ulint i; + mtr_t mtr; + + ut_ad(node); + + if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { + + goto skip_secondaries; + } + + heap = mem_heap_create(1024); + + while (node->index != NULL) { + index = node->index; + + if (row_upd_changes_ord_field_binary(NULL, node->index, + node->update)) { + /* Build the older version of the index entry */ + entry = row_build_index_entry(node->row, index, heap); + + row_purge_remove_sec_if_poss(node, index, entry); + } + + node->index = dict_table_get_next_index(node->index); + } + + mem_heap_free(heap); + +skip_secondaries: + /* Free possible externally stored fields */ + for (i = 0; i < upd_get_n_fields(node->update); i++) { + + ufield = upd_get_nth_field(node->update, i); + + if (ufield->extern_storage) { + /* We use the fact that new_val points to + node->undo_rec and get thus the offset of + dfield data inside the unod record. Then we + can calculate from node->roll_ptr the file + address of the new_val data */ + + internal_offset = ((byte*)ufield->new_val.data) + - node->undo_rec; + + ut_a(internal_offset < UNIV_PAGE_SIZE); + + trx_undo_decode_roll_ptr(node->roll_ptr, + &is_insert, &rseg_id, + &page_no, &offset); + mtr_start(&mtr); + + /* We have to acquire an X-latch to the clustered + index tree */ + + index = dict_table_get_first_index(node->table); + + mtr_x_lock(dict_tree_get_lock(index->tree), &mtr); + + /* NOTE: we must also acquire an X-latch to the + root page of the tree. We will need it when we + free pages from the tree. If the tree is of height 1, + the tree X-latch does NOT protect the root page, + because it is also a leaf page. Since we will have a + latch on an undo log page, we would break the + latching order if we would only later latch the + root page of such a tree! */ + + btr_root_get(index->tree, &mtr); + + /* We assume in purge of externally stored fields + that the space id of the undo log record is 0! */ + + data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr) + + offset + internal_offset; + +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(buf_frame_align(data_field), + SYNC_TRX_UNDO_PAGE); +#endif /* UNIV_SYNC_DEBUG */ + + data_field_len = ufield->new_val.len; + + btr_free_externally_stored_field(index, data_field, + data_field_len, FALSE, &mtr); + mtr_commit(&mtr); + } + } +} + +/*************************************************************** +Parses the row reference and other info in a modify undo log record. */ +static +ibool +row_purge_parse_undo_rec( +/*=====================*/ + /* out: TRUE if purge operation required: + NOTE that then the CALLER must unfreeze + data dictionary! */ + purge_node_t* node, /* in: row undo node */ + ibool* updated_extern, + /* out: TRUE if an externally stored field + was updated */ + que_thr_t* thr) /* in: query thread */ +{ + dict_index_t* clust_index; + byte* ptr; + trx_t* trx; + dulint undo_no; + dulint table_id; + dulint trx_id; + dulint roll_ptr; + ulint info_bits; + ulint type; + ulint cmpl_info; + + ut_ad(node && thr); + + trx = thr_get_trx(thr); + + ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, + updated_extern, &undo_no, &table_id); + node->rec_type = type; + + if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) { + + return(FALSE); + } + + ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, + &info_bits); + node->table = NULL; + + if (type == TRX_UNDO_UPD_EXIST_REC + && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) { + + /* Purge requires no changes to indexes: we may return */ + + return(FALSE); + } + + /* Prevent DROP TABLE etc. from running when we are doing the purge + for this row */ + + row_mysql_freeze_data_dictionary(trx); + + mutex_enter(&(dict_sys->mutex)); + + node->table = dict_table_get_on_id_low(table_id, trx); + + mutex_exit(&(dict_sys->mutex)); + + if (node->table == NULL) { + /* The table has been dropped: no need to do purge */ + + row_mysql_unfreeze_data_dictionary(trx); + + return(FALSE); + } + + if (node->table->ibd_file_missing) { + /* We skip purge of missing .ibd files */ + + node->table = NULL; + + row_mysql_unfreeze_data_dictionary(trx); + + return(FALSE); + } + + clust_index = dict_table_get_first_index(node->table); + + if (clust_index == NULL) { + /* The table was corrupt in the data dictionary */ + + row_mysql_unfreeze_data_dictionary(trx); + + return(FALSE); + } + + ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), + node->heap); + + ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, + roll_ptr, info_bits, trx, + node->heap, &(node->update)); + + /* Read to the partial row the fields that occur in indexes */ + + if (!cmpl_info & UPD_NODE_NO_ORD_CHANGE) { + ptr = trx_undo_rec_get_partial_row(ptr, clust_index, + &(node->row), node->heap); + } + + return(TRUE); +} + +/*************************************************************** +Fetches an undo log record and does the purge for the recorded operation. +If none left, or the current purge completed, returns the control to the +parent node, which is always a query thread node. */ +static +ulint +row_purge( +/*======*/ + /* out: DB_SUCCESS if operation successfully + completed, else error code */ + purge_node_t* node, /* in: row purge node */ + que_thr_t* thr) /* in: query thread */ +{ + dulint roll_ptr; + ibool purge_needed; + ibool updated_extern; + trx_t* trx; + + ut_ad(node && thr); + + trx = thr_get_trx(thr); + + node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr, + &(node->reservation), + node->heap); + if (!node->undo_rec) { + /* Purge completed for this query thread */ + + thr->run_node = que_node_get_parent(node); + + return(DB_SUCCESS); + } + + node->roll_ptr = roll_ptr; + + if (node->undo_rec == &trx_purge_dummy_rec) { + purge_needed = FALSE; + } else { + purge_needed = row_purge_parse_undo_rec(node, &updated_extern, + thr); + /* If purge_needed == TRUE, we must also remember to unfreeze + data dictionary! */ + } + + if (purge_needed) { + node->found_clust = FALSE; + + node->index = dict_table_get_next_index( + dict_table_get_first_index(node->table)); + + if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { + row_purge_del_mark(node); + + } else if (updated_extern + || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { + + row_purge_upd_exist_or_extern(node); + } + + if (node->found_clust) { + btr_pcur_close(&(node->pcur)); + } + + row_mysql_unfreeze_data_dictionary(trx); + } + + /* Do some cleanup */ + trx_purge_rec_release(node->reservation); + mem_heap_empty(node->heap); + + thr->run_node = node; + + return(DB_SUCCESS); +} + +/*************************************************************** +Does the purge operation for a single undo log record. This is a high-level +function used in an SQL execution graph. */ + +que_thr_t* +row_purge_step( +/*===========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr) /* in: query thread */ +{ + purge_node_t* node; + ulint err; + + ut_ad(thr); + + node = thr->run_node; + + ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); + + err = row_purge(node, thr); + + ut_ad(err == DB_SUCCESS); + + return(thr); +} |