summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@oracle.com>2010-06-29 16:19:07 +0300
committerMarko Mäkelä <marko.makela@oracle.com>2010-06-29 16:19:07 +0300
commit7c718cdb01ec8ba347f5c5feb3078e37cf840ced (patch)
tree6288f16b7174e4a25687f28321e2deeb596e4b67 /storage
parent142e8417dcc36f45682f45ad88785d013941bd48 (diff)
downloadmariadb-git-7c718cdb01ec8ba347f5c5feb3078e37cf840ced.tar.gz
Merge Bug#54358 fix from mysql-5.1-innodb:
------------------------------------------------------------ revno: 3529 revision-id: marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j parent: jimmy.yang@oracle.com-20100629024137-690sacm5sogruzvb committer: Marko Mäkelä <marko.makela@oracle.com> branch nick: 5.1-innodb timestamp: Tue 2010-06-29 15:55:18 +0300 message: Bug#54358: READ UNCOMMITTED access failure of off-page DYNAMIC or COMPRESSED columns When the server crashes after a record stub has been inserted and before all its off-page columns have been written, the record will contain incomplete off-page columns after crash recovery. Such records may only be accessed at the READ UNCOMMITTED isolation level or when rolling back a recovered transaction in recv_recovery_rollback_active(). Skip these records at the READ UNCOMMITTED isolation level. TODO: Add assertions for checking the above assumptions hold when an incomplete BLOB is encountered. btr_rec_copy_externally_stored_field(): Return NULL if the field is incomplete. row_prebuilt_t::templ_contains_blob: Clarify what "BLOB" means in this context. Hint: MySQL BLOBs are not the same as InnoDB BLOBs. row_sel_store_mysql_rec(): Return FALSE if not all columns could be retrieved. Previously this function always returned TRUE. Assert that the record is not delete-marked. row_sel_push_cache_row_for_mysql(): Return FALSE if not all columns could be retrieved. row_search_for_mysql(): Skip records containing incomplete off-page columns. Assert that the transaction isolation level is READ UNCOMMITTED. rb://380 approved by Jimmy Yang
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/btr/btr0cur.c24
-rw-r--r--storage/innobase/include/btr0cur.h2
-rw-r--r--storage/innobase/include/row0mysql.h6
-rw-r--r--storage/innobase/row/row0merge.c5
-rw-r--r--storage/innobase/row/row0sel.c113
5 files changed, 133 insertions, 17 deletions
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index f1d8bb52825..90dfba7e151 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -4934,7 +4934,7 @@ btr_copy_externally_stored_field(
/*******************************************************************//**
Copies an externally stored field of a record to mem heap.
-@return the field copied to heap */
+@return the field copied to heap, or NULL if the field is incomplete */
UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
@@ -4964,6 +4964,28 @@ btr_rec_copy_externally_stored_field(
data = rec_get_nth_field(rec, offsets, no, &local_len);
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ if (UNIV_UNLIKELY
+ (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
+ /* The externally stored field was not written
+ yet. This is only a valid condition when the server
+ crashed after the time a record stub was freshly
+ inserted but before all its columns were written. This
+ record should only be seen by
+ recv_recovery_rollback_active() or any
+ TRX_ISO_READ_UNCOMMITTED transactions. */
+
+ /* TODO: assert that there is an owner_trx with
+ owner_trx->id == DB_TRX_ID and owner_trx->is_recovered */
+
+ /* TODO: assert that for the current transaction trx,
+ either (trx == owner_trx && trx_is_recv(trx)) or
+ trx->isolation_level == TRX_ISO_READ_UNCOMMITTED. */
+ return(NULL);
+ }
+
return(btr_copy_externally_stored_field(len, data,
zip_size, local_len, heap));
}
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 136d2d068a1..757477838ee 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -558,7 +558,7 @@ btr_copy_externally_stored_field_prefix(
ulint local_len);/*!< in: length of data, in bytes */
/*******************************************************************//**
Copies an externally stored field of a record to mem heap.
-@return the field copied to heap */
+@return the field copied to heap, or NULL if the field is incomplete */
UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index f97d90adb2b..d9c26a2ee3b 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -633,7 +633,11 @@ struct row_prebuilt_struct {
the secondary index, then this is
set to TRUE */
unsigned templ_contains_blob:1;/*!< TRUE if the template contains
- BLOB column(s) */
+ a column with DATA_BLOB ==
+ get_innobase_type_from_mysql_type();
+ not to be confused with InnoDB
+ externally stored columns
+ (VARCHAR can be off-page too) */
mysql_row_templ_t* mysql_template;/*!< template used to transform
rows fast between MySQL and Innobase
formats; memory for this template
diff --git a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c
index 3fdad6d423b..417e125375a 100644
--- a/storage/innobase/row/row0merge.c
+++ b/storage/innobase/row/row0merge.c
@@ -1779,6 +1779,11 @@ row_merge_copy_blobs(
(below). */
data = btr_rec_copy_externally_stored_field(
mrec, offsets, zip_size, i, &len, heap);
+ /* Because we have locked the table, any records
+ written by incomplete transactions must have been
+ rolled back already. There must not be any incomplete
+ BLOB columns. */
+ ut_a(data);
dfield_set_data(field, data, len);
}
diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
index 4bd61016b21..13c867dc4c3 100644
--- a/storage/innobase/row/row0sel.c
+++ b/storage/innobase/row/row0sel.c
@@ -414,7 +414,7 @@ row_sel_fetch_columns(
field_no))) {
/* Copy an externally stored field to the
- temporary heap */
+ temporary heap, if possible. */
heap = mem_heap_create(1);
@@ -423,6 +423,21 @@ row_sel_fetch_columns(
dict_table_zip_size(index->table),
field_no, &len, heap);
+ /* data == NULL means that the
+ externally stored field was not
+ written yet. This is only a valid
+ condition when the server crashed
+ after the time a record stub was
+ freshly inserted but before all its
+ columns were written. This record
+ should only be seen by
+ recv_recovery_rollback_active() or any
+ TRX_ISO_READ_UNCOMMITTED
+ transactions. The InnoDB SQL parser
+ (the sole caller of this function)
+ does not implement READ UNCOMMITTED,
+ and it is not involved during rollback. */
+ ut_a(data);
ut_a(len != UNIV_SQL_NULL);
needs_copy = TRUE;
@@ -924,6 +939,7 @@ row_sel_get_clust_rec(
when plan->clust_pcur was positioned. The latch will not be
released until mtr_commit(mtr). */
+ ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
row_sel_fetch_columns(index, clust_rec, offsets,
UT_LIST_GET_FIRST(plan->columns));
*out_rec = clust_rec;
@@ -1626,6 +1642,13 @@ skip_lock:
}
if (old_vers == NULL) {
+ /* The record does not exist
+ in our read view. Skip it, but
+ first attempt to determine
+ whether the index segment we
+ are searching through has been
+ exhausted. */
+
offsets = rec_get_offsets(
rec, index, offsets,
ULINT_UNDEFINED, &heap);
@@ -2639,9 +2662,8 @@ Convert a row in the Innobase format to a row in the MySQL format.
Note that the template in prebuilt may advise us to copy only a few
columns to mysql_rec, other columns are left blank. All columns may not
be needed in the query.
-@return TRUE if success, FALSE if could not allocate memory for a BLOB
-(though we may also assert in that case) */
-static
+@return TRUE on success, FALSE if not all columns could be retrieved */
+static __attribute__((warn_unused_result))
ibool
row_sel_store_mysql_rec(
/*====================*/
@@ -2664,6 +2686,7 @@ row_sel_store_mysql_rec(
ut_ad(prebuilt->mysql_template);
ut_ad(prebuilt->default_rec);
ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
mem_heap_free(prebuilt->blob_heap);
@@ -2711,6 +2734,26 @@ row_sel_store_mysql_rec(
dict_table_zip_size(prebuilt->table),
templ->rec_field_no, &len, heap);
+ if (UNIV_UNLIKELY(!data)) {
+ /* The externally stored field
+ was not written yet. This is
+ only a valid condition when
+ the server crashed after the
+ time a record stub was freshly
+ inserted but before all its
+ columns were written. This
+ record should only be seen by
+ recv_recovery_rollback_active()
+ or any TRX_ISO_READ_UNCOMMITTED
+ transactions. */
+
+ if (extern_field_heap) {
+ mem_heap_free(extern_field_heap);
+ }
+
+ return(FALSE);
+ }
+
ut_a(len != UNIV_SQL_NULL);
} else {
/* Field is stored in the row. */
@@ -3128,9 +3171,10 @@ row_sel_pop_cached_row_for_mysql(
}
/********************************************************************//**
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
+Pushes a row for MySQL to the fetch cache.
+@return TRUE on success, FALSE if the record contains incomplete BLOBs */
+UNIV_INLINE __attribute__((warn_unused_result))
+ibool
row_sel_push_cache_row_for_mysql(
/*=============================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
@@ -3172,10 +3216,11 @@ row_sel_push_cache_row_for_mysql(
prebuilt->fetch_cache[
prebuilt->n_fetch_cached],
prebuilt, rec, offsets))) {
- ut_error;
+ return(FALSE);
}
prebuilt->n_fetch_cached++;
+ return(TRUE);
}
/*********************************************************************//**
@@ -3570,6 +3615,24 @@ row_search_for_mysql(
if (!row_sel_store_mysql_rec(buf, prebuilt,
rec, offsets)) {
+ /* Only fresh inserts at
+ server crash time may contain
+ incomplete externally stored
+ columns. Pretend that such
+ records do not exist. Such
+ records may only be accessed
+ at the READ UNCOMMITTED
+ isolation level or when
+ rolling back a recovered
+ transaction. Rollback happens
+ at a lower level, not here. */
+ ut_a(trx->isolation_level
+ == TRX_ISO_READ_UNCOMMITTED);
+ /* TODO: assert that there is
+ an owner_trx with
+ owner_trx->id == DB_TRX_ID and
+ owner_trx->is_recovered */
+
err = DB_TOO_BIG_RECORD;
/* We let the main loop to do the
@@ -4349,9 +4412,20 @@ requires_clust_rec:
not cache rows because there the cursor is a scrollable
cursor. */
- row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
- offsets);
- if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
+ if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
+ offsets)) {
+ /* Only fresh inserts at server crash time may contain
+ incomplete externally stored columns. Pretend that
+ such records do not exist. Such records may only be
+ accessed at the READ UNCOMMITTED isolation level or
+ when rolling back a recovered transaction. Rollback
+ happens at a lower level, not here. */
+ ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
+ /* TODO: assert that there is an owner_trx
+ with owner_trx->id == DB_TRX_ID and
+ owner_trx->is_recovered */
+ } else if (prebuilt->n_fetch_cached
+ == MYSQL_FETCH_CACHE_SIZE) {
goto got_row;
}
@@ -4367,9 +4441,20 @@ requires_clust_rec:
} else {
if (!row_sel_store_mysql_rec(buf, prebuilt,
result_rec, offsets)) {
- err = DB_TOO_BIG_RECORD;
-
- goto lock_wait_or_error;
+ /* Only fresh inserts at server crash
+ time may contain incomplete externally
+ stored columns. Pretend that such
+ records do not exist. Such records may
+ only be accessed at the READ UNCOMMITTED
+ isolation level or when rolling back a
+ recovered transaction. Rollback happens
+ at a lower level, not here. */
+ ut_a(trx->isolation_level
+ == TRX_ISO_READ_UNCOMMITTED);
+ /* TODO: assert that there is an owner_trx
+ with owner_trx->id == DB_TRX_ID and
+ owner_trx->is_recovered */
+ goto next_rec;
}
}