summaryrefslogtreecommitdiff
path: root/storage/innobase/read/read0read.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/read/read0read.c')
-rw-r--r--storage/innobase/read/read0read.c222
1 files changed, 174 insertions, 48 deletions
diff --git a/storage/innobase/read/read0read.c b/storage/innobase/read/read0read.c
index 06349c1fd39..1d6809ae5b7 100644
--- a/storage/innobase/read/read0read.c
+++ b/storage/innobase/read/read0read.c
@@ -15,6 +15,111 @@ Created 2/16/1997 Heikki Tuuri
#include "srv0srv.h"
#include "trx0sys.h"
+/*
+-------------------------------------------------------------------------------
+FACT A: Cursor read view on a secondary index sees only committed versions
+-------
+of the records in the secondary index or those versions of rows created
+by transaction which created a cursor before cursor was created even
+if transaction which created the cursor has changed that clustered index page.
+
+PROOF: We must show that read goes always to the clustered index record
+to see that record is visible in the cursor read view. Consider e.g.
+following table and SQL-clauses:
+
+create table t1(a int not null, b int, primary key(a), index(b));
+insert into t1 values (1,1),(2,2);
+commit;
+
+Now consider that we have a cursor for a query
+
+select b from t1 where b >= 1;
+
+This query will use secondary key on the table t1. Now after the first fetch
+on this cursor if we do a update:
+
+update t1 set b = 5 where b = 2;
+
+Now second fetch of the cursor should not see record (2,5) instead it should
+see record (2,2).
+
+We also should show that if we have delete t1 where b = 5; we still
+can see record (2,2).
+
+When we access a secondary key record maximum transaction id is fetched
+from this record and this trx_id is compared to up_limit_id in the view.
+If trx_id in the record is greater or equal than up_limit_id in the view
+cluster record is accessed. Because trx_id of the creating
+transaction is stored when this view was created to the list of
+trx_ids not seen by this read view previous version of the
+record is requested to be built. This is build using clustered record.
+If the secondary key record is delete marked it's corresponding
+clustered record can be already be purged only if records
+trx_id < low_limit_no. Purge can't remove any record deleted by a
+transaction which was active when cursor was created. But, we still
+may have a deleted secondary key record but no clustered record. But,
+this is not a problem because this case is handled in
+row_sel_get_clust_rec() function which is called
+whenever we note that this read view does not see trx_id in the
+record. Thus, we see correct version. Q. E. D.
+
+-------------------------------------------------------------------------------
+FACT B: Cursor read view on a clustered index sees only committed versions
+-------
+of the records in the clustered index or those versions of rows created
+by transaction which created a cursor before cursor was created even
+if transaction which created the cursor has changed that clustered index page.
+
+PROOF: Consider e.g.following table and SQL-clauses:
+
+create table t1(a int not null, b int, primary key(a));
+insert into t1 values (1),(2);
+commit;
+
+Now consider that we have a cursor for a query
+
+select a from t1 where a >= 1;
+
+This query will use clustered key on the table t1. Now after the first fetch
+on this cursor if we do a update:
+
+update t1 set a = 5 where a = 2;
+
+Now second fetch of the cursor should not see record (5) instead it should
+see record (2).
+
+We also should show that if we have execute delete t1 where a = 5; after
+the cursor is opened we still can see record (2).
+
+When accessing clustered record we always check if this read view sees
+trx_id stored to clustered record. By default we don't see any changes
+if record trx_id >= low_limit_id i.e. change was made transaction
+which started after transaction which created the cursor. If row
+was changed by the future transaction a previous version of the
+clustered record is created. Thus we see only committed version in
+this case. We see all changes made by committed transactions i.e.
+record trx_id < up_limit_id. In this case we don't need to do anything,
+we already see correct version of the record. We don't see any changes
+made by active transaction except creating transaction. We have stored
+trx_id of creating transaction to list of trx_ids when this view was
+created. Thus we can easily see if this record was changed by the
+creating transaction. Because we already have clustered record we can
+access roll_ptr. Using this roll_ptr we can fetch undo record.
+We can now check that undo_no of the undo record is less than undo_no of the
+trancaction which created a view when cursor was created. We see this
+clustered record only in case when record undo_no is less than undo_no
+in the view. If this is not true we build based on undo_rec previous
+version of the record. This record is found because purge can't remove
+records accessed by active transaction. Thus we see correct version. Q. E. D.
+-------------------------------------------------------------------------------
+FACT C: Purge does not remove any delete marked row that is visible
+-------
+to cursor view.
+
+TODO: proof this
+
+*/
+
/*************************************************************************
Creates a read view object. */
UNIV_INLINE
@@ -44,9 +149,11 @@ with ..._close. This is used in purge. */
read_view_t*
read_view_oldest_copy_or_open_new(
/*==============================*/
- /* out, own: read view struct */
- trx_t* cr_trx, /* in: creating transaction, or NULL */
- mem_heap_t* heap) /* in: memory heap from which allocated */
+ /* out, own: read view struct */
+ dulint cr_trx_id, /* in: trx_id of creating
+ transaction, or (0, 0) used in purge*/
+ mem_heap_t* heap) /* in: memory heap from which
+ allocated */
{
read_view_t* old_view;
read_view_t* view_copy;
@@ -62,32 +169,33 @@ read_view_oldest_copy_or_open_new(
if (old_view == NULL) {
- return(read_view_open_now(cr_trx, heap));
+ return(read_view_open_now(cr_trx_id, heap));
}
n = old_view->n_trx_ids;
- if (old_view->creator) {
+ if (ut_dulint_cmp(old_view->creator_trx_id,
+ ut_dulint_create(0,0)) != 0) {
n++;
} else {
needs_insert = FALSE;
}
view_copy = read_view_create_low(n, heap);
-
+
/* Insert the id of the creator in the right place of the descending
array of ids, if needs_insert is TRUE: */
i = 0;
while (i < n) {
if (needs_insert
- && (i >= old_view->n_trx_ids
- || ut_dulint_cmp(old_view->creator->id,
+ && (i >= old_view->n_trx_ids
+ || ut_dulint_cmp(old_view->creator_trx_id,
read_view_get_nth_trx_id(old_view, i))
> 0)) {
read_view_set_nth_trx_id(view_copy, i,
- old_view->creator->id);
+ old_view->creator_trx_id);
needs_insert = FALSE;
insert_done = 1;
} else {
@@ -99,9 +207,9 @@ read_view_oldest_copy_or_open_new(
i++;
}
- view_copy->creator = cr_trx;
-
- view_copy->low_limit_no = old_view->low_limit_no;
+ view_copy->creator_trx_id = cr_trx_id;
+
+ view_copy->low_limit_no = old_view->low_limit_no;
view_copy->low_limit_id = old_view->low_limit_id;
view_copy->can_be_too_old = FALSE;
@@ -126,9 +234,12 @@ point in time are seen in the view. */
read_view_t*
read_view_open_now(
/*===============*/
- /* out, own: read view struct */
- trx_t* cr_trx, /* in: creating transaction, or NULL */
- mem_heap_t* heap) /* in: memory heap from which allocated */
+ /* out, own: read view struct */
+ dulint cr_trx_id, /* in: trx_id of creating
+ transaction, or (0, 0) used in
+ purge */
+ mem_heap_t* heap) /* in: memory heap from which
+ allocated */
{
read_view_t* view;
trx_t* trx;
@@ -138,11 +249,13 @@ read_view_open_now(
#endif /* UNIV_SYNC_DEBUG */
view = read_view_create_low(UT_LIST_GET_LEN(trx_sys->trx_list), heap);
- view->creator = cr_trx;
+ view->creator_trx_id = cr_trx_id;
+ view->type = VIEW_NORMAL;
+ view->undo_no = ut_dulint_create(0, 0);
/* No future transactions should be visible in the view */
- view->low_limit_no = trx_sys->max_trx_id;
+ view->low_limit_no = trx_sys->max_trx_id;
view->low_limit_id = view->low_limit_no;
view->can_be_too_old = FALSE;
@@ -153,8 +266,9 @@ read_view_open_now(
/* No active transaction should be visible, except cr_trx */
while (trx) {
- if (trx != cr_trx && (trx->conc_state == TRX_ACTIVE ||
- trx->conc_state == TRX_PREPARED)) {
+ if (ut_dulint_cmp(trx->id, cr_trx_id) != 0
+ && (trx->conc_state == TRX_ACTIVE
+ || trx->conc_state == TRX_PREPARED)) {
read_view_set_nth_trx_id(view, n, trx->id);
@@ -169,13 +283,13 @@ read_view_open_now(
if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {
view->low_limit_no = trx->no;
- }
+ }
}
trx = UT_LIST_GET_NEXT(trx_list, trx);
}
- view->n_trx_ids = n;
+ view->n_trx_ids = n;
if (n > 0) {
/* The last active transaction has the smallest id: */
@@ -184,8 +298,9 @@ read_view_open_now(
view->up_limit_id = view->low_limit_id;
}
+
UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
-
+
return(view);
}
@@ -201,7 +316,7 @@ read_view_close(
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
-}
+}
/*************************************************************************
Closes a consistent read view for MySQL. This function is called at an SQL
@@ -225,7 +340,7 @@ read_view_close_for_mysql(
mutex_exit(&kernel_mutex);
}
-
+
/*************************************************************************
Prints a read view to stderr. */
@@ -236,14 +351,23 @@ read_view_print(
{
ulint n_ids;
ulint i;
-
+
+ if (view->type == VIEW_HIGH_GRANULARITY) {
+ fprintf(stderr,
+ "High-granularity read view undo_n:o %lu %lu\n",
+ (ulong) ut_dulint_get_high(view->undo_no),
+ (ulong) ut_dulint_get_low(view->undo_no));
+ } else {
+ fprintf(stderr, "Normal read view\n");
+ }
+
fprintf(stderr, "Read view low limit trx n:o %lu %lu\n",
(ulong) ut_dulint_get_high(view->low_limit_no),
(ulong) ut_dulint_get_low(view->low_limit_no));
fprintf(stderr, "Read view up limit trx id %lu %lu\n",
(ulong) ut_dulint_get_high(view->up_limit_id),
- (ulong) ut_dulint_get_low(view->up_limit_id));
+ (ulong) ut_dulint_get_low(view->up_limit_id));
fprintf(stderr, "Read view low limit trx id %lu %lu\n",
(ulong) ut_dulint_get_high(view->low_limit_id),
@@ -261,9 +385,10 @@ read_view_print(
}
/*************************************************************************
-Create a consistent cursor view for mysql to be used in cursors. In this
-consistent read view modifications done by the creating transaction or future
-transactions are not visible. */
+Create a high-granularity consistent cursor view for mysql to be used
+in cursors. In this consistent read view modifications done by the
+creating transaction after the cursor is created or future transactions
+are not visible. */
cursor_view_t*
read_cursor_view_create_for_mysql(
@@ -278,7 +403,7 @@ read_cursor_view_create_for_mysql(
ut_a(cr_trx);
- /* Use larger heap than in trx_create when creating a read_view
+ /* Use larger heap than in trx_create when creating a read_view
because cursors are quite long. */
heap = mem_heap_create(512);
@@ -286,23 +411,25 @@ read_cursor_view_create_for_mysql(
curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(cursor_view_t));
curview->heap = heap;
- /* Drop cursor tables from consideration when evaluating the need of
- auto-commit */
+ /* Drop cursor tables from consideration when evaluating the need of
+ auto-commit */
curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
cr_trx->n_mysql_tables_in_use = 0;
mutex_enter(&kernel_mutex);
curview->read_view = read_view_create_low(
- UT_LIST_GET_LEN(trx_sys->trx_list),
+ UT_LIST_GET_LEN(trx_sys->trx_list),
curview->heap);
view = curview->read_view;
- view->creator = cr_trx;
+ view->creator_trx_id = cr_trx->id;
+ view->type = VIEW_HIGH_GRANULARITY;
+ view->undo_no = cr_trx->undo_no;
/* No future transactions should be visible in the view */
- view->low_limit_no = trx_sys->max_trx_id;
+ view->low_limit_no = trx_sys->max_trx_id;
view->low_limit_id = view->low_limit_no;
view->can_be_too_old = FALSE;
@@ -310,13 +437,12 @@ read_cursor_view_create_for_mysql(
n = 0;
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
- /* No active transaction should be visible, except cr_trx.
- This is quick fix for a bug 12456 and needs to be fixed when
- semi-consistent high-granularity read view is implemented. */
+ /* No active transaction should be visible */
while (trx) {
- if (trx != cr_trx && (trx->conc_state == TRX_ACTIVE ||
- trx->conc_state == TRX_PREPARED)) {
+
+ if (trx->conc_state == TRX_ACTIVE
+ || trx->conc_state == TRX_PREPARED) {
read_view_set_nth_trx_id(view, n, trx->id);
@@ -331,13 +457,13 @@ read_cursor_view_create_for_mysql(
if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {
view->low_limit_no = trx->no;
- }
+ }
}
trx = UT_LIST_GET_NEXT(trx_list, trx);
}
- view->n_trx_ids = n;
+ view->n_trx_ids = n;
if (n > 0) {
/* The last active transaction has the smallest id: */
@@ -347,7 +473,7 @@ read_cursor_view_create_for_mysql(
}
UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
-
+
mutex_exit(&kernel_mutex);
return(curview);
@@ -364,11 +490,11 @@ read_cursor_view_close_for_mysql(
cursor_view_t* curview)/* in: cursor view to be closed */
{
ut_a(curview);
- ut_a(curview->read_view);
+ ut_a(curview->read_view);
ut_a(curview->heap);
- /* Add cursor's tables to the global count of active tables that
- belong to this transaction */
+ /* Add cursor's tables to the global count of active tables that
+ belong to this transaction */
trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
mutex_enter(&kernel_mutex);
@@ -380,13 +506,13 @@ read_cursor_view_close_for_mysql(
mem_heap_free(curview->heap);
}
-
+
/*************************************************************************
This function sets a given consistent cursor view to a transaction
read view if given consistent cursor view is not NULL. Otherwise, function
restores a global read view to a transaction read view. */
-void
+void
read_cursor_set_for_mysql(
/*======================*/
trx_t* trx, /* in: transaction where cursor is set */