diff options
author | unknown <monty@donna.mysql.com> | 2001-02-17 14:19:19 +0200 |
---|---|---|
committer | unknown <monty@donna.mysql.com> | 2001-02-17 14:19:19 +0200 |
commit | 2662b59306ef0cd495fa6e2edf7129e58a11393a (patch) | |
tree | bfe39951a73e906579ab819bf5198ad8f3a64a36 /innobase/row/row0undo.c | |
parent | 66de55a56bdcf2f7a9c0c4f8e19b3e761475e202 (diff) | |
download | mariadb-git-2662b59306ef0cd495fa6e2edf7129e58a11393a.tar.gz |
Added Innobase to source distribution
Docs/manual.texi:
Added Innobase documentation
configure.in:
Incremented version
include/my_base.h:
Added option for Innobase
myisam/mi_check.c:
cleanup
mysql-test/t/bdb.test:
cleanup
mysql-test/t/innobase.test:
Extended with new tests from bdb.test
mysql-test/t/merge.test:
Added test of SHOW create
mysys/my_init.c:
Fix for UNIXWARE 7
scripts/mysql_install_db.sh:
Always write how to start mysqld
scripts/safe_mysqld.sh:
Fixed type
sql/ha_innobase.cc:
Update to new version
sql/ha_innobase.h:
Update to new version
sql/handler.h:
Added 'update_table_comment()' and 'append_create_info()'
sql/sql_delete.cc:
Fixes for Innobase
sql/sql_select.cc:
Fixes for Innobase
sql/sql_show.cc:
Append create information (for MERGE tables)
sql/sql_update.cc:
Fixes for Innobase
Diffstat (limited to 'innobase/row/row0undo.c')
-rw-r--r-- | innobase/row/row0undo.c | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/innobase/row/row0undo.c b/innobase/row/row0undo.c new file mode 100644 index 00000000000..6dc032f7e13 --- /dev/null +++ b/innobase/row/row0undo.c @@ -0,0 +1,313 @@ +/****************************************************** +Row undo + +(c) 1997 Innobase Oy + +Created 1/8/1997 Heikki Tuuri +*******************************************************/ + +#include "row0undo.h" + +#ifdef UNIV_NONINL +#include "row0undo.ic" +#endif + +#include "fsp0fsp.h" +#include "mach0data.h" +#include "trx0rseg.h" +#include "trx0trx.h" +#include "trx0roll.h" +#include "trx0undo.h" +#include "trx0purge.h" +#include "trx0rec.h" +#include "que0que.h" +#include "row0row.h" +#include "row0uins.h" +#include "row0umod.h" +#include "srv0srv.h" + +/* How to undo row operations? +(1) For an insert, we have stored a prefix of the clustered index record +in the undo log. Using it, we look for the clustered record, and using +that we look for the records in the secondary indexes. The insert operation +may have been left incomplete, if the database crashed, for example. +We may have look at the trx id and roll ptr to make sure the record in the +clustered index is really the one for which the undo log record was +written. We can use the framework we get from the original insert op. +(2) Delete marking: We can use the framework we get from the original +delete mark op. We only have to check the trx id. +(3) Update: This may be the most complicated. We have to use the framework +we get from the original update op. + +What if the same trx repeatedly deletes and inserts an identical row. +Then the row id changes and also roll ptr. What if the row id was not +part of the ordering fields in the clustered index? Maybe we have to write +it to undo log. Well, maybe not, because if we order the row id and trx id +in descending order, then the only undeleted copy is the first in the +index. Our searches in row operations always position the cursor before +the first record in the result set. But, if there is no key defined for +a table, then it would be desirable that row id is in ascending order. +So, lets store row id in descending order only if it is not an ordering +field in the clustered index. + +NOTE: Deletes and inserts may lead to situation where there are identical +records in a secondary index. Is that a problem in the B-tree? Yes. +Also updates can lead to this, unless trx id and roll ptr are included in +ord fields. +(1) Fix in clustered indexes: include row id, trx id, and roll ptr +in node pointers of B-tree. +(2) Fix in secondary indexes: include all fields in node pointers, and +if an entry is inserted, check if it is equal to the right neighbor, +in which case update the right neighbor: the neighbor must be delete +marked, set it unmarked and write the trx id of the current transaction. + +What if the same trx repeatedly updates the same row, updating a secondary +index field or not? Updating a clustered index ordering field? + +(1) If it does not update the secondary index and not the clustered index +ord field. Then the secondary index record stays unchanged, but the +trx id in the secondary index record may be smaller than in the clustered +index record. This is no problem? +(2) If it updates secondary index ord field but not clustered: then in +secondary index there are delete marked records, which differ in an +ord field. No problem. +(3) Updates clustered ord field but not secondary, and secondary index +is unique. Then the record in secondary index is just updated at the +clustered ord field. +(4) + +Problem with duplicate records: +Fix 1: Add a trx op no field to all indexes. A problem: if a trx with a +bigger trx id has inserted and delete marked a similar row, our trx inserts +again a similar row, and a trx with an even bigger id delete marks it. Then +the position of the row should change in the index if the trx id affects +the alphabetical ordering. + +Fix 2: If an insert encounters a similar row marked deleted, we turn the +insert into an 'update' of the row marked deleted. Then we must write undo +info on the update. A problem: what if a purge operation tries to remove +the delete marked row? + +We can think of the database row versions as a linked list which starts +from the record in the clustered index, and is linked by roll ptrs +through undo logs. The secondary index records are references which tell +what kinds of records can be found in this linked list for a record +in the clustered index. + +How to do the purge? A record can be removed from the clustered index +if its linked list becomes empty, i.e., the row has been marked deleted +and its roll ptr points to the record in the undo log we are going through, +doing the purge. Similarly, during a rollback, a record can be removed +if the stored roll ptr in the undo log points to a trx already (being) purged, +or if the roll ptr is NULL, i.e., it was a fresh insert. */ + +/************************************************************************ +Creates a row undo node to a query graph. */ + +undo_node_t* +row_undo_node_create( +/*=================*/ + /* out, own: undo node */ + trx_t* trx, /* in: transaction */ + que_thr_t* parent, /* in: parent node, i.e., a thr node */ + mem_heap_t* heap) /* in: memory heap where created */ +{ + undo_node_t* undo; + + ut_ad(trx && parent && heap); + + undo = mem_heap_alloc(heap, sizeof(undo_node_t)); + + undo->common.type = QUE_NODE_UNDO; + undo->common.parent = parent; + + undo->state = UNDO_NODE_FETCH_NEXT; + undo->trx = trx; + + undo->heap = mem_heap_create(256); + + return(undo); +} + +/*************************************************************** +Looks for the clustered index record when node has the row reference. +The pcur in node is used in the search. If found, stores the row to node, +and stores the position of pcur, and detaches it. The pcur must be closed +by the caller in any case. */ + +ibool +row_undo_search_clust_to_pcur( +/*==========================*/ + /* out: TRUE if found; NOTE the node->pcur + must be closed by the caller, regardless of + the return value */ + undo_node_t* node, /* in: row undo node */ + que_thr_t* thr) /* in: query thread */ +{ + dict_index_t* clust_index; + ibool found; + mtr_t mtr; + ibool ret; + rec_t* rec; + + UT_NOT_USED(thr); + + mtr_start(&mtr); + + clust_index = dict_table_get_first_index(node->table); + + found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF, + node->table, node->ref, &mtr); + + rec = btr_pcur_get_rec(&(node->pcur)); + + if (!found || 0 != ut_dulint_cmp(node->roll_ptr, + row_get_rec_roll_ptr(rec, clust_index))) { + + /* We must remove the reservation on the undo log record + BEFORE releasing the latch on the clustered index page: this + is to make sure that some thread will eventually undo the + modification corresponding to node->roll_ptr. */ + + /* printf("--------------------undoing a previous version\n"); + */ + trx_undo_rec_release(node->trx, node->undo_no); + + ret = FALSE; + } else { + node->row = row_build(ROW_COPY_DATA, clust_index, rec, + node->heap); + btr_pcur_store_position(&(node->pcur), &mtr); + + ret = TRUE; + } + + btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); + + return(ret); +} + +/*************************************************************** +Fetches an undo log record and does the undo for the recorded operation. +If none left, or a partial rollback completed, returns control to the +parent node, which is always a query thread node. */ +static +ulint +row_undo( +/*=====*/ + /* out: DB_SUCCESS if operation successfully + completed, else error code */ + undo_node_t* node, /* in: row undo node */ + que_thr_t* thr) /* in: query thread */ +{ + ulint err; + trx_t* trx; + dulint roll_ptr; + + ut_ad(node && thr); + + trx = node->trx; + + if (node->state == UNDO_NODE_FETCH_NEXT) { + + /* The call below also starts &mtr */ + node->undo_rec = trx_roll_pop_top_rec_of_trx(trx, + trx->roll_limit, + &roll_ptr, + node->heap); + if (!node->undo_rec) { + /* Rollback completed for this query thread */ + + thr->run_node = que_node_get_parent(node); + + return(DB_SUCCESS); + } + + node->roll_ptr = roll_ptr; + node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec); + + if (trx_undo_roll_ptr_is_insert(roll_ptr)) { + + node->state = UNDO_NODE_INSERT; + } else { + node->state = UNDO_NODE_MODIFY; + } + + } else if (node->state == UNDO_NODE_PREV_VERS) { + + /* Undo should be done to the same clustered index record + again in this same rollback, restoring the previous version */ + + roll_ptr = node->new_roll_ptr; + + node->undo_rec = trx_undo_get_undo_rec_low(roll_ptr, + node->heap); + node->roll_ptr = roll_ptr; + node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec); + + if (trx_undo_roll_ptr_is_insert(roll_ptr)) { + + node->state = UNDO_NODE_INSERT; + } else { + node->state = UNDO_NODE_MODIFY; + } + } + + if (node->state == UNDO_NODE_INSERT) { + + err = row_undo_ins(node, thr); + + node->state = UNDO_NODE_FETCH_NEXT; + } else { + ut_ad(node->state == UNDO_NODE_MODIFY); + err = row_undo_mod(node, thr); + } + + /* Do some cleanup */ + btr_pcur_close(&(node->pcur)); + + mem_heap_empty(node->heap); + + thr->run_node = node; + + return(err); +} + +/*************************************************************** +Undoes a row operation in a table. This is a high-level function used +in SQL execution graphs. */ + +que_thr_t* +row_undo_step( +/*==========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr) /* in: query thread */ +{ + ulint err; + undo_node_t* node; + trx_t* trx; + + ut_ad(thr); + + srv_activity_count++; + + trx = thr_get_trx(thr); + + node = thr->run_node; + + ut_ad(que_node_get_type(node) == QUE_NODE_UNDO); + + err = row_undo(node, thr); + + trx->error_state = err; + + if (err != DB_SUCCESS) { + /* SQL error detected */ + + ut_a(0); + + return(NULL); + } + + return(thr); +} |