summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@oracle.com>2010-10-19 08:58:53 +0300
committerMarko Mäkelä <marko.makela@oracle.com>2010-10-19 08:58:53 +0300
commitf2d39c9eafd31304c1aaead7cd47209fb2451033 (patch)
treef61385777042c14e1cef4bbcc8036bde15062c72 /storage
parent50c69f9301840ac8b0c1ede75c1ddba4f53bf39f (diff)
downloadmariadb-git-f2d39c9eafd31304c1aaead7cd47209fb2451033.tar.gz
Bug #56680 wrong InnoDB results from a case-insensitive covering index
row_search_for_mysql(): When a secondary index record might not be visible in the current transaction's read view and we consult the clustered index and optionally some undo log records, return the relevant columns of the clustered index record to MySQL instead of the secondary index record. REC_INFO_DELETED_FLAG: Move the definition from rem0rec.ic to rem0rec.h. ibuf_insert_to_index_page_low(): New function, refactored from ibuf_insert_to_index_page(). ibuf_insert_to_index_page(): When we are inserting a record in place of a delete-marked record and some fields of the record differ, update that record just like row_ins_sec_index_entry_by_modify() would do. mysql_row_templ_t: Add clust_rec_field_no. row_sel_store_mysql_rec(), row_sel_push_cache_row_for_mysql(): Add the flag rec_clust, for returning data at clust_rec_field_no instead of rec_field_no. Resurrect the debug assertion that the record not be marked for deletion. (Bug #55626) buf_LRU_free_block(): Refactored from buf_LRU_search_and_free_block(). This is needed for the innodb_change_buffering_debug diagnostics. [UNIV_DEBUG || UNIV_IBUF_DEBUG] ibuf_debug, buf_page_get_gen(), buf_flush_page_try(): Implement innodb_change_buffering_debug=1 for evicting pages from the buffer pool, so that change buffering will be attempted more frequently.
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/buf/buf0buf.c24
-rw-r--r--storage/innobase/buf/buf0flu.c76
-rw-r--r--storage/innobase/buf/buf0lru.c101
-rw-r--r--storage/innobase/handler/ha_innodb.cc28
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.c183
-rw-r--r--storage/innobase/include/buf0flu.h14
-rw-r--r--storage/innobase/include/buf0lru.h8
-rw-r--r--storage/innobase/include/ibuf0ibuf.h5
-rw-r--r--storage/innobase/include/rem0rec.h3
-rw-r--r--storage/innobase/include/rem0rec.ic3
-rw-r--r--storage/innobase/include/row0mysql.h4
-rw-r--r--storage/innobase/include/row0upd.h8
-rw-r--r--storage/innobase/row/row0mysql.c2
-rw-r--r--storage/innobase/row/row0sel.c84
-rw-r--r--storage/innobase/row/row0upd.c8
15 files changed, 407 insertions, 144 deletions
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
index 45867388a61..500088c3901 100644
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
@@ -1270,6 +1270,30 @@ loop:
buf_awe_map_page_to_frame(block, TRUE);
}
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ if (mode == BUF_GET_IF_IN_POOL && ibuf_debug) {
+ /* Try to evict the block from the buffer pool, to use the
+ insert buffer as much as possible. */
+
+ if (buf_LRU_free_block(block)) {
+ mutex_exit(&buf_pool->mutex);
+ mutex_exit(&block->mutex);
+ fprintf(stderr,
+ "innodb_change_buffering_debug evict %u %u\n",
+ (unsigned) space, (unsigned) offset);
+ return(NULL);
+ } else if (buf_flush_page_try(block)) {
+ fprintf(stderr,
+ "innodb_change_buffering_debug flush %u %u\n",
+ (unsigned) space, (unsigned) offset);
+ guess = block->frame;
+ goto loop;
+ }
+
+ /* Failed to evict the page; change it directly */
+ }
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
#ifdef UNIV_SYNC_DEBUG
buf_block_buf_fix_inc_debug(block, file, line);
#else
diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
index 24fa306c127..7dd6bfd9198 100644
--- a/storage/innobase/buf/buf0flu.c
+++ b/storage/innobase/buf/buf0flu.c
@@ -723,6 +723,82 @@ buf_flush_try_page(
return(0);
}
+# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/**********************************************************************
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: buf_pool_mutex and block->mutex must be held upon entering this
+function, and they will be released by this function after flushing.
+This is loosely based on buf_flush_batch() and buf_flush_try_page(). */
+
+ibool
+buf_flush_page_try(
+/*===============*/
+ /* out: TRUE if flushed and
+ mutexes released */
+ buf_block_t* block) /*!< in/out: buffer control block */
+{
+ ut_ad(mutex_own(&buf_pool->mutex));
+ ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(mutex_own(&block->mutex));
+
+ if (!buf_flush_ready_for_flush(block, BUF_FLUSH_LRU)) {
+ return(FALSE);
+ }
+
+ if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
+ || buf_pool->init_flush[BUF_FLUSH_LRU]) {
+ /* There is already a flush batch of the same type running */
+ return(FALSE);
+ }
+
+ buf_pool->init_flush[BUF_FLUSH_LRU] = TRUE;
+
+ block->io_fix = BUF_IO_WRITE;
+ block->flush_type = BUF_FLUSH_LRU;
+
+ if (buf_pool->n_flush[BUF_FLUSH_LRU]++ == 0) {
+
+ os_event_reset(buf_pool->no_flush[BUF_FLUSH_LRU]);
+ }
+
+ /* VERY IMPORTANT:
+ Because any thread may call the LRU flush, even when owning
+ locks on pages, to avoid deadlocks, we must make sure that the
+ s-lock is acquired on the page without waiting: this is
+ accomplished because buf_flush_ready_for_flush() must hold,
+ and that requires the page not to be bufferfixed. */
+
+ rw_lock_s_lock_gen(&block->lock, BUF_IO_WRITE);
+
+ /* Note that the s-latch is acquired before releasing the
+ buf_pool mutex: this ensures that the latch is acquired
+ immediately. */
+
+ mutex_exit(&block->mutex);
+ mutex_exit(&buf_pool->mutex);
+
+ /* Even though block is not protected by any mutex at this
+ point, it is safe to access block, because it is io_fixed and
+ oldest_modification != 0. Thus, it cannot be relocated in the
+ buffer pool or removed from flush_list or LRU_list. */
+
+ buf_flush_write_block_low(block);
+
+ mutex_enter(&buf_pool->mutex);
+ buf_pool->init_flush[BUF_FLUSH_LRU] = FALSE;
+
+ if (buf_pool->n_flush[BUF_FLUSH_LRU] == 0) {
+ /* The running flush batch has ended */
+ os_event_set(buf_pool->no_flush[BUF_FLUSH_LRU]);
+ }
+
+ mutex_exit(&buf_pool->mutex);
+ buf_flush_buffered_writes();
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
/***************************************************************
Flushes to disk all flushable pages within the flush area. */
static
diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
index d3c787d1578..1dc3efd1464 100644
--- a/storage/innobase/buf/buf0lru.c
+++ b/storage/innobase/buf/buf0lru.c
@@ -321,6 +321,60 @@ buf_LRU_get_recent_limit(void)
}
/**********************************************************************
+Try to put a block from the LRU list to the free list. */
+
+ibool
+buf_LRU_free_block(
+/*===============*/
+ /* out: TRUE if freed */
+ buf_block_t* block) /* in/out: block to be freed */
+{
+ if (!buf_flush_ready_for_replace(block)) {
+ return(FALSE);
+ }
+
+#ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+ fprintf(stderr,
+ "Putting space %lu page %lu"
+ " to free list\n",
+ (ulong) block->space,
+ (ulong) block->offset);
+ }
+#endif /* UNIV_DEBUG */
+
+ buf_LRU_block_remove_hashed_page(block);
+
+ mutex_exit(&(buf_pool->mutex));
+ mutex_exit(&block->mutex);
+
+ /* Remove possible adaptive hash index built on the
+ page; in the case of AWE the block may not have a
+ frame at all */
+
+ if (block->frame) {
+ /* The page was declared uninitialized
+ by buf_LRU_block_remove_hashed_page().
+ We need to flag the contents of the
+ page valid (which it still is) in
+ order to avoid bogus Valgrind
+ warnings. */
+ UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
+ btr_search_drop_page_hash_index(block->frame);
+ UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
+ }
+
+ ut_a(block->buf_fix_count == 0);
+
+ mutex_enter(&(buf_pool->mutex));
+ mutex_enter(&block->mutex);
+
+ buf_LRU_block_free_hashed_page(block);
+
+ return(TRUE);
+}
+
+/**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */
@@ -348,54 +402,13 @@ buf_LRU_search_and_free_block(
ut_a(block->in_LRU_list);
mutex_enter(&block->mutex);
+ freed = buf_LRU_free_block(block);
+ mutex_exit(&block->mutex);
- if (buf_flush_ready_for_replace(block)) {
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Putting space %lu page %lu"
- " to free list\n",
- (ulong) block->space,
- (ulong) block->offset);
- }
-#endif /* UNIV_DEBUG */
-
- buf_LRU_block_remove_hashed_page(block);
-
- mutex_exit(&(buf_pool->mutex));
- mutex_exit(&block->mutex);
-
- /* Remove possible adaptive hash index built on the
- page; in the case of AWE the block may not have a
- frame at all */
-
- if (block->frame) {
- /* The page was declared uninitialized
- by buf_LRU_block_remove_hashed_page().
- We need to flag the contents of the
- page valid (which it still is) in
- order to avoid bogus Valgrind
- warnings. */
- UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
- btr_search_drop_page_hash_index(block->frame);
- UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
- }
-
- ut_a(block->buf_fix_count == 0);
-
- mutex_enter(&(buf_pool->mutex));
- mutex_enter(&block->mutex);
-
- buf_LRU_block_free_hashed_page(block);
- freed = TRUE;
- mutex_exit(&block->mutex);
-
+ if (freed) {
break;
}
- mutex_exit(&block->mutex);
-
block = UT_LIST_GET_PREV(LRU, block);
distance++;
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 5a8f5479223..4c52326a58a 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -79,6 +79,7 @@ extern "C" {
#include "../storage/innobase/include/dict0crea.h"
#include "../storage/innobase/include/btr0cur.h"
#include "../storage/innobase/include/btr0btr.h"
+#include "../storage/innobase/include/ibuf0ibuf.h"
#include "../storage/innobase/include/fsp0fsp.h"
#include "../storage/innobase/include/sync0sync.h"
#include "../storage/innobase/include/fil0fil.h"
@@ -3723,17 +3724,18 @@ include_field:
n_requested_fields++;
templ->col_no = i;
+ templ->clust_rec_field_no = dict_col_get_clust_pos_noninline(
+ &index->table->cols[i], clust_index);
+ ut_ad(templ->clust_rec_field_no != ULINT_UNDEFINED);
if (index == clust_index) {
- templ->rec_field_no = dict_col_get_clust_pos_noninline(
- &index->table->cols[i], index);
+ templ->rec_field_no = templ->clust_rec_field_no;
} else {
templ->rec_field_no = dict_index_get_nth_col_pos(
index, i);
- }
-
- if (templ->rec_field_no == ULINT_UNDEFINED) {
- prebuilt->need_to_access_clustered = TRUE;
+ if (templ->rec_field_no == ULINT_UNDEFINED) {
+ prebuilt->need_to_access_clustered = TRUE;
+ }
}
if (field->null_ptr) {
@@ -3785,9 +3787,7 @@ skip_field:
for (i = 0; i < n_requested_fields; i++) {
templ = prebuilt->mysql_template + i;
- templ->rec_field_no = dict_col_get_clust_pos_noninline(
- &index->table->cols[templ->col_no],
- clust_index);
+ templ->rec_field_no = templ->clust_rec_field_no;
}
}
}
@@ -8990,6 +8990,13 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
AUTOINC_NO_LOCKING, 0); /* Maximum value */
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
+ PLUGIN_VAR_RQCMDARG,
+ "Debug flags for InnoDB change buffering (0=none)",
+ NULL, NULL, 0, 0, 1, 0);
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(additional_mem_pool_size),
MYSQL_SYSVAR(autoextend_increment),
@@ -9031,6 +9038,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(thread_concurrency),
MYSQL_SYSVAR(thread_sleep_delay),
MYSQL_SYSVAR(autoinc_lock_mode),
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ MYSQL_SYSVAR(change_buffering_debug),
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
NULL
};
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
index d54a3378993..71ecc7ec49f 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.c
+++ b/storage/innobase/ibuf/ibuf0ibuf.c
@@ -22,6 +22,7 @@ Created 7/19/1997 Heikki Tuuri
#include "btr0cur.h"
#include "btr0pcur.h"
#include "btr0btr.h"
+#include "row0upd.h"
#include "sync0sync.h"
#include "dict0boot.h"
#include "fut0lst.h"
@@ -137,6 +138,11 @@ access order rules. */
/* Buffer pool size per the maximum insert buffer size */
#define IBUF_POOL_SIZE_PER_MAX_SIZE 2
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/* Flag to control insert buffer debugging. */
+uint ibuf_debug;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
/* The insert buffer control structure */
ibuf_t* ibuf = NULL;
@@ -2824,6 +2830,72 @@ During merge, inserts to an index page a secondary index entry extracted
from the insert buffer. */
static
void
+ibuf_insert_to_index_page_low(
+/*==========================*/
+ dtuple_t* entry, /* in: buffered entry to insert */
+ page_t* page, /* in: index page where the buffered entry
+ should be placed */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr, /* in: mtr */
+ page_cur_t* page_cur)/* in: cursor positioned on the record
+ after which to insert the buffered entry */
+{
+ ulint space;
+ ulint page_no;
+ page_t* bitmap_page;
+ ulint old_bits;
+
+ if (UNIV_LIKELY
+ (page_cur_tuple_insert(page_cur, entry, index, mtr) != NULL)) {
+ return;
+ }
+
+ /* If the record did not fit, reorganize */
+
+ btr_page_reorganize(page, index, mtr);
+
+ page_cur_search(page, index, entry, PAGE_CUR_LE, page_cur);
+
+ /* This time the record must fit */
+
+ if (UNIV_LIKELY
+ (page_cur_tuple_insert(page_cur, entry, index, mtr) != NULL)) {
+ return;
+ }
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+ " InnoDB: Error: Insert buffer insert fails;"
+ " page free %lu, dtuple size %lu\n",
+ (ulong) page_get_max_insert_size(page, 1),
+ (ulong) rec_get_converted_size(index, entry));
+ fputs("InnoDB: Cannot insert index record ", stderr);
+ dtuple_print(stderr, entry);
+ fputs("\nInnoDB: The table where this index record belongs\n"
+ "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
+ "InnoDB: that table.\n", stderr);
+
+ space = buf_frame_get_space_id(page);
+ page_no = buf_frame_get_page_no(page);
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr);
+ old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+ IBUF_BITMAP_FREE, mtr);
+
+ fprintf(stderr,
+ "InnoDB: space %lu, page %lu, bitmap bits %lu\n",
+ (ulong) space, (ulong) page_no, (ulong) old_bits);
+
+ fputs("InnoDB: Submit a detailed bug report"
+ " to http://bugs.mysql.com\n", stderr);
+}
+
+/************************************************************************
+During merge, inserts to an index page a secondary index entry extracted
+from the insert buffer. */
+static
+void
ibuf_insert_to_index_page(
/*======================*/
dtuple_t* entry, /* in: buffered entry to insert */
@@ -2835,11 +2907,10 @@ ibuf_insert_to_index_page(
page_cur_t page_cur;
ulint low_match;
rec_t* rec;
- page_t* bitmap_page;
- ulint old_bits;
ut_ad(ibuf_inside());
ut_ad(dtuple_check_typed(entry));
+ ut_ad(!buf_block_align(page)->is_hashed);
if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
!= (ibool)!!page_is_comp(page))) {
@@ -2877,61 +2948,79 @@ dump:
low_match = page_cur_search(page, index, entry,
PAGE_CUR_LE, &page_cur);
- if (low_match == dtuple_get_n_fields(entry)) {
+ if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
+ mem_heap_t* heap;
+ upd_t* update;
+ ulint* offsets;
+
rec = page_cur_get_rec(&page_cur);
- btr_cur_del_unmark_for_ibuf(rec, mtr);
- } else {
- rec = page_cur_tuple_insert(&page_cur, entry, index, mtr);
+ /* This is based on
+ row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
+ ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
- if (rec == NULL) {
- /* If the record did not fit, reorganize */
+ heap = mem_heap_create(1024);
- btr_page_reorganize(page, index, mtr);
+ offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
+ &heap);
+ update = row_upd_build_sec_rec_difference_binary(
+ index, entry, rec, NULL, heap);
- page_cur_search(page, index, entry,
- PAGE_CUR_LE, &page_cur);
+ if (update->n_fields == 0) {
+ /* The records only differ in the delete-mark.
+ Clear the delete-mark, like we did before
+ Bug #56680 was fixed. */
+ btr_cur_del_unmark_for_ibuf(rec, mtr);
+updated_in_place:
+ mem_heap_free(heap);
+ return;
+ }
- /* This time the record must fit */
- if (UNIV_UNLIKELY(!page_cur_tuple_insert(
- &page_cur, entry, index,
- mtr))) {
+ /* Copy the info bits. Clear the delete-mark. */
+ update->info_bits = rec_get_info_bits(rec, page_is_comp(page));
+ update->info_bits &= ~REC_INFO_DELETED_FLAG;
+
+ /* We cannot invoke btr_cur_optimistic_update() here,
+ because we do not have a btr_cur_t or que_thr_t,
+ as the insert buffer merge occurs at a very low level. */
+ if (!row_upd_changes_field_size_or_external(index, offsets,
+ update)) {
+ /* This is the easy case. Do something similar
+ to btr_cur_update_in_place(). */
+ row_upd_rec_in_place(rec, offsets, update);
+ goto updated_in_place;
+ }
- ut_print_timestamp(stderr);
+ /* A collation may identify values that differ in
+ storage length.
+ Some examples (1 or 2 bytes):
+ utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I
+ utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S
+ utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
- fprintf(stderr,
- " InnoDB: Error: Insert buffer insert"
- " fails; page free %lu,"
- " dtuple size %lu\n",
- (ulong) page_get_max_insert_size(
- page, 1),
- (ulong) rec_get_converted_size(
- index, entry));
- fputs("InnoDB: Cannot insert index record ",
- stderr);
- dtuple_print(stderr, entry);
- fputs("\nInnoDB: The table where"
- " this index record belongs\n"
- "InnoDB: is now probably corrupt."
- " Please run CHECK TABLE on\n"
- "InnoDB: that table.\n", stderr);
-
- bitmap_page = ibuf_bitmap_get_map_page(
- buf_frame_get_space_id(page),
- buf_frame_get_page_no(page),
- mtr);
- old_bits = ibuf_bitmap_page_get_bits(
- bitmap_page,
- buf_frame_get_page_no(page),
- IBUF_BITMAP_FREE, mtr);
+ latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S
- fprintf(stderr, "InnoDB: Bitmap bits %lu\n",
- (ulong) old_bits);
+ Examples of a character (3-byte UTF-8 sequence)
+ identified with 2 or 4 characters (1-byte UTF-8 sequences):
- fputs("InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- }
- }
+ utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO
+ utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN
+ */
+
+ /* Delete the different-length record, and insert the
+ buffered one. */
+
+ lock_rec_store_on_page_infimum(page, rec);
+ page_cur_delete_rec(&page_cur, index, offsets, mtr);
+ page_cur_move_to_prev(&page_cur);
+ mem_heap_free(heap);
+
+ ibuf_insert_to_index_page_low(entry, page, index, mtr,
+ &page_cur);
+ lock_rec_restore_from_page_infimum(rec, page);
+ } else {
+ ibuf_insert_to_index_page_low(entry, page, index, mtr,
+ &page_cur);
}
}
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 322848509f4..e52c22b8f57 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -38,6 +38,20 @@ buf_flush_init_for_writing(
dulint newest_lsn, /* in: newest modification lsn to the page */
ulint space, /* in: space id */
ulint page_no); /* in: page number */
+# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/**********************************************************************
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: buf_pool_mutex and block->mutex must be held upon entering this
+function, and they will be released by this function after flushing.
+This is loosely based on buf_flush_batch() and buf_flush_try_page(). */
+
+ibool
+buf_flush_page_try(
+/*===============*/
+ /* out: TRUE if flushed and
+ mutexes released */
+ buf_block_t* block); /*!< in/out: buffer control block */
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/***********************************************************************
This utility flushes dirty blocks from the end of the LRU list or flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 6d26fd4d3b2..777e55a350d 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -66,6 +66,14 @@ buf_LRU_get_recent_limit(void);
/*==========================*/
/* out: the limit; zero if could not determine it */
/**********************************************************************
+Try to put a block from the LRU list to the free list. */
+
+ibool
+buf_LRU_free_block(
+/*===============*/
+ /* out: TRUE if freed */
+ buf_block_t* block); /* in/out: block to be freed */
+/**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index 77fefe2020b..d6d7a918b62 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -18,6 +18,11 @@ Created 7/19/1997 Heikki Tuuri
#include "ibuf0types.h"
#include "fsp0fsp.h"
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/* Flag to control insert buffer debugging. */
+extern uint ibuf_debug;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
extern ibuf_t* ibuf;
/**********************************************************************
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index abc204bb583..58762fc3111 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -19,6 +19,9 @@ if and only if the record is the first user record on a non-leaf
B-tree page that is the leftmost page on its level
(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */
#define REC_INFO_MIN_REC_FLAG 0x10UL
+/* The deleted flag in info bits */
+#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
+ record has been delete marked */
/* Number of extra bytes in an old-style record,
in addition to the data and the offsets */
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index d91fb4c4391..df66bb13aeb 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -98,9 +98,6 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_INFO_BITS_MASK 0xF0UL
#define REC_INFO_BITS_SHIFT 0
-/* The deleted flag in info bits */
-#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
- record has been delete marked */
/* The following masks are used to filter the SQL null bit from
one-byte and two-byte offsets */
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 488177791a4..52b13838cc7 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -485,6 +485,10 @@ struct mysql_row_templ_struct {
Innobase record in the current index;
not defined if template_type is
ROW_MYSQL_WHOLE_ROW */
+ ulint clust_rec_field_no; /* field number of the column in an
+ Innobase record in the clustered index;
+ not defined if template_type is
+ ROW_MYSQL_WHOLE_ROW */
ulint mysql_col_offset; /* offset of the column in the MySQL
row format */
ulint mysql_col_len; /* length of the column in the MySQL
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index efbc6d6facf..034b1dafb17 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -129,9 +129,11 @@ row_upd_changes_field_size_or_external(
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
upd_t* update);/* in: update vector */
/***************************************************************
-Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. This function is used only for
-a clustered index */
+Replaces the new column values stored in the update vector to the
+record given. No field size changes are allowed. This function is
+usually invoked on a clustered index. The only use case for a
+secondary index is row_ins_sec_index_entry_by_modify() or its
+counterpart in ibuf_insert_to_index_page(). */
void
row_upd_rec_in_place(
diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
index a0f54f7288e..99738115cc7 100644
--- a/storage/innobase/row/row0mysql.c
+++ b/storage/innobase/row/row0mysql.c
@@ -400,7 +400,7 @@ row_mysql_convert_row_to_innobase(
row is used, as row may contain
pointers to this record! */
{
- mysql_row_templ_t* templ;
+ const mysql_row_templ_t*templ;
dfield_t* dfield;
ulint i;
diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
index ad15d0798a2..e03d3d79768 100644
--- a/storage/innobase/row/row0sel.c
+++ b/storage/innobase/row/row0sel.c
@@ -2601,20 +2601,21 @@ row_sel_store_mysql_rec(
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
rec_t* rec, /* in: Innobase record in the index
which was described in prebuilt's
- template */
+ template, or in the clustered index;
+ must be protected by a page latch */
+ ibool rec_clust, /* in: TRUE if rec is in the clustered
+ index instead of prebuilt->index */
const ulint* offsets) /* in: array returned by
- rec_get_offsets() */
+ rec_get_offsets(rec) */
{
- mysql_row_templ_t* templ;
mem_heap_t* extern_field_heap = NULL;
mem_heap_t* heap;
- byte* data;
- ulint len;
ulint i;
ut_ad(prebuilt->mysql_template);
ut_ad(prebuilt->default_rec);
ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
mem_heap_free(prebuilt->blob_heap);
@@ -2623,10 +2624,15 @@ row_sel_store_mysql_rec(
for (i = 0; i < prebuilt->n_template; i++) {
- templ = prebuilt->mysql_template + i;
+ const mysql_row_templ_t*templ = prebuilt->mysql_template + i;
+ byte* data;
+ ulint len;
+ ulint field_no;
- if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
- templ->rec_field_no))) {
+ field_no = rec_clust
+ ? templ->clust_rec_field_no : templ->rec_field_no;
+
+ if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) {
/* Copy an externally stored field to the temporary
heap */
@@ -2652,15 +2658,13 @@ row_sel_store_mysql_rec(
causes an assert */
data = btr_rec_copy_externally_stored_field(
- rec, offsets, templ->rec_field_no,
- &len, heap);
+ rec, offsets, field_no, &len, heap);
ut_a(len != UNIV_SQL_NULL);
} else {
/* Field is stored in the row. */
- data = rec_get_nth_field(rec, offsets,
- templ->rec_field_no, &len);
+ data = rec_get_nth_field(rec, offsets, field_no, &len);
if (UNIV_UNLIKELY(templ->type == DATA_BLOB)
&& len != UNIV_SQL_NULL) {
@@ -3019,7 +3023,7 @@ row_sel_pop_cached_row_for_mysql(
row_prebuilt_t* prebuilt) /* in: prebuilt struct */
{
ulint i;
- mysql_row_templ_t* templ;
+ const mysql_row_templ_t*templ;
byte* cached_rec;
ut_ad(prebuilt->n_fetch_cached > 0);
ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
@@ -3075,14 +3079,19 @@ void
row_sel_push_cache_row_for_mysql(
/*=============================*/
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
- rec_t* rec, /* in: record to push */
- const ulint* offsets) /* in: rec_get_offsets() */
+ rec_t* rec, /* in: Innobase record in the index
+ which was described in prebuilt's
+ template, or in the clustered index */
+ ibool rec_clust, /* in: TRUE if rec is in the clustered
+ index instead of prebuilt->index */
+ const ulint* offsets) /* in: rec_get_offsets(rec) */
{
byte* buf;
ulint i;
ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
ut_a(!prebuilt->templ_contains_blob);
if (prebuilt->fetch_cache[0] == NULL) {
@@ -3111,7 +3120,7 @@ row_sel_push_cache_row_for_mysql(
if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
prebuilt->fetch_cache[
prebuilt->n_fetch_cached],
- prebuilt, rec, offsets))) {
+ prebuilt, rec, rec_clust, offsets))) {
ut_error;
}
@@ -3500,7 +3509,8 @@ row_search_for_mysql(
rec, offsets));
#endif
if (!row_sel_store_mysql_rec(buf, prebuilt,
- rec, offsets)) {
+ rec, FALSE,
+ offsets)) {
err = DB_TOO_BIG_RECORD;
/* We let the main loop to do the
@@ -4233,19 +4243,8 @@ requires_clust_rec:
goto next_rec;
}
- if (prebuilt->need_to_access_clustered) {
-
- result_rec = clust_rec;
-
- ut_ad(rec_offs_validate(result_rec, clust_index,
- offsets));
- } else {
- /* We used 'offsets' for the clust rec, recalculate
- them for 'rec' */
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- result_rec = rec;
- }
+ result_rec = clust_rec;
+ ut_ad(rec_offs_validate(result_rec, clust_index, offsets));
} else {
result_rec = rec;
}
@@ -4256,6 +4255,7 @@ requires_clust_rec:
ut_ad(rec_offs_validate(result_rec,
result_rec != rec ? clust_index : index,
offsets));
+ ut_ad(!rec_get_deleted_flag(result_rec, comp));
if ((match_mode == ROW_SEL_EXACT
|| prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
@@ -4276,7 +4276,7 @@ requires_clust_rec:
cursor. */
row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
- offsets);
+ result_rec != rec, offsets);
if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
goto got_row;
@@ -4284,15 +4284,31 @@ requires_clust_rec:
goto next_rec;
} else {
- if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) {
+ if (UNIV_UNLIKELY
+ (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) {
+ /* CHECK TABLE: fetch the row */
+
+ if (result_rec != rec
+ && !prebuilt->need_to_access_clustered) {
+ /* We used 'offsets' for the clust
+ rec, recalculate them for 'rec' */
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED,
+ &heap);
+ result_rec = rec;
+ }
+
memcpy(buf + 4, result_rec
- rec_offs_extra_size(offsets),
rec_offs_size(offsets));
mach_write_to_4(buf,
rec_offs_extra_size(offsets) + 4);
} else {
- if (!row_sel_store_mysql_rec(buf, prebuilt,
- result_rec, offsets)) {
+ /* Returning a row to MySQL */
+
+ if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec,
+ result_rec != rec,
+ offsets)) {
err = DB_TOO_BIG_RECORD;
goto lock_wait_or_error;
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index 034b7010410..0790cfe02e2 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -430,9 +430,11 @@ row_upd_changes_field_size_or_external(
}
/***************************************************************
-Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. This function is used only for
-a clustered index */
+Replaces the new column values stored in the update vector to the
+record given. No field size changes are allowed. This function is
+usually invoked on a clustered index. The only use case for a
+secondary index is row_ins_sec_index_entry_by_modify() or its
+counterpart in ibuf_insert_to_index_page(). */
void
row_upd_rec_in_place(