diff options
author | unknown <knielsen@knielsen-hq.org> | 2011-04-29 16:16:42 +0200 |
---|---|---|
committer | unknown <knielsen@knielsen-hq.org> | 2011-04-29 16:16:42 +0200 |
commit | 8b046db411b963f48b06716fab764a35456c43d7 (patch) | |
tree | 3dff6535f6a0cc5b3ab3c2cc8ea32443d7010b79 /storage | |
parent | 3a3a91ff051abe859589e33fab4b5588e325d55b (diff) | |
parent | fecd255a16fc3be6206e03ad5e70d8e90118acc6 (diff) | |
download | mariadb-git-8b046db411b963f48b06716fab764a35456c43d7.tar.gz |
Merge XtraDB from Percona Server 5.1.56-12.7 into MariaDB-5.1.
Diffstat (limited to 'storage')
58 files changed, 1233 insertions, 875 deletions
diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog index bf003b810d2..102db3d7824 100644 --- a/storage/xtradb/ChangeLog +++ b/storage/xtradb/ChangeLog @@ -1,3 +1,61 @@ +2011-01-31 The InnoDB Team + + * btr/btr0cur.c, include/row0upd.h, + row/row0purge.c, row/row0umod.c, row/row0upd.c: + Bug#59230 assert 0 row_upd_changes_ord_field_binary() + in post-crash rollback or purge + +2011-01-27 The InnoDB Team + + * btr/btr0cur.c: + Bug#59465 btr_estimate_number_of_different_key_vals use + incorrect offset for external_size + +2011-01-27 The InnoDB Team + + * include/trx0trx.h, trx/trx0trx.c: + Bug#59440 Race condition in XA ROLLBACK and XA COMMIT + after server restart + +2011-01-25 The InnoDB Team + + * row/row0upd.c: + Bug#59585 Fix 58912 introduces compiler warning + due to potentially uninitialized variable + +2011-01-25 The InnoDB Team + + * mtr/mtr0log.c: + Bug#59486 Incorrect usage of UNIV_UNLIKELY() in mlog_parse_string() + +2011-01-25 The InnoDB Team + + * row/row0vers.c: + Fix Bug#59464 Race condition in row_vers_build_for_semi_consistent_read + +2011-01-25 The InnoDB Team + + * btr/btr0btr.c, btr/btr0cur.c, btr/btr0sea.c, + buf/buf0buddy.c, buf/buf0buf.c, buf/buf0lru.c, + include/buf0buf.h, include/buf0buf.ic, include/buf0lru.h, + mem/mem0mem.c, page/page0zip.c: + Fix Bug#59707 Unused compression-related parameters + in buffer pool functions + +2011-01-18 The InnoDB Team + + * include/sync0rw.h, sync/sync0arr.c, sync/sync0rw.c: + Fix Bug#59579 rw_lock_debug_print outputs to stderr, not to + SHOW ENGINE INNODB STATUS + +2011-01-14 The InnoDB Team + * btr/btr0cur.c, dict/dict0dict.c, handler/ha_innodb.cc, + include/btr0cur.h, include/dict0mem.h, include/rem0cmp.h, + include/rem0cmp.ic, include/srv0srv.h, rem/rem0cmp.c, + srv/srv0srv.c, innodb_bug30423.test: + Fix Bug#30423 InnoDBs treatment of NULL in index stats causes + bad "rows examined" estimates + 2011-01-06 The InnoDB Team * row/row0merge.c: Fix Bug#59312 Examine MAX_FULL_NAME_LEN in InnoDB to address diff --git a/storage/xtradb/btr/btr0btr.c b/storage/xtradb/btr/btr0btr.c index 55204691400..2fb14b06a7b 100644 --- a/storage/xtradb/btr/btr0btr.c +++ b/storage/xtradb/btr/btr0btr.c @@ -1009,7 +1009,7 @@ btr_page_reorganize_low( log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); #ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(0); + temp_block = buf_block_alloc(); #else /* !UNIV_HOTBACKUP */ ut_ad(block == back_block1); temp_block = back_block2; diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index 321504a2b25..9b306ea2864 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -100,6 +100,18 @@ can be released by page reorganize, then it is reorganized */ /*--------------------------------------*/ #define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB part header, in bytes */ + +/** Estimated table level stats from sampled value. +@param value sampled stats +@param index index being sampled +@param sample number of sampled rows +@param ext_size external stored data size +@param not_empty table not empty +@return estimated table wide stats from sampled value */ +#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\ + (((value) * (ib_int64_t) index->stat_n_leaf_pages \ + + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) + /* @} */ #endif /* !UNIV_HOTBACKUP */ @@ -174,7 +186,7 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - rec_t* rec, /*!< in: record */ + const rec_t* rec, /*!< in: record */ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ #endif /* !UNIV_HOTBACKUP */ @@ -961,108 +973,6 @@ btr_cur_open_at_rnd_pos_func( } } -/**********************************************************************//** -Positions a cursor at a randomly chosen position within a B-tree -after the given path -@return TRUE if the position is at the first page, and cursor must point - the first record for used by the caller.*/ -UNIV_INTERN -ibool -btr_cur_open_at_rnd_pos_after_path( -/*====================*/ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_path_t* first_rec_path, - btr_cur_t* cursor, /*!< in/out: B-tree cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_cur_t* page_cursor; - btr_path_t* slot; - ibool is_first_rec = TRUE; - ulint page_no; - ulint space; - ulint zip_size; - ulint height; - rec_t* node_ptr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - if (latch_mode == BTR_MODIFY_TREE) { - mtr_x_lock(dict_index_get_lock(index), mtr); - } else { - mtr_s_lock(dict_index_get_lock(index), mtr); - } - - page_cursor = btr_cur_get_page_cur(cursor); - cursor->index = index; - - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - page_no = dict_index_get_page(index); - - height = ULINT_UNDEFINED; - slot = first_rec_path; - - for (;;) { - buf_block_t* block; - page_t* page; - - block = buf_page_get_gen(space, zip_size, page_no, - RW_NO_LATCH, NULL, BUF_GET, - __FILE__, __LINE__, mtr); - page = buf_block_get_frame(block); - ut_ad(0 == ut_dulint_cmp(index->id, - btr_page_get_index_id(page))); - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page, mtr); - } - - if (height == 0) { - btr_cur_latch_leaves(page, space, zip_size, page_no, - latch_mode, cursor, mtr); - } - - if (is_first_rec && slot->nth_rec != ULINT_UNDEFINED) { - if (height == 0) { - /* must open the first rec */ - page_cur_open_on_nth_user_rec(block, page_cursor, slot->nth_rec); - } else { - is_first_rec = page_cur_open_on_rnd_user_rec_after_nth(block, - page_cursor, slot->nth_rec); - } - } else { - is_first_rec = FALSE; - page_cur_open_on_rnd_user_rec(block, page_cursor); - } - - if (height == 0) { - break; - } - - ut_ad(height > 0); - - height--; - slot++; - - node_ptr = page_cur_get_rec(page_cursor); - offsets = rec_get_offsets(node_ptr, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return (is_first_rec); -} - /*==================== B-TREE INSERT =========================*/ /*************************************************************//** @@ -1933,8 +1843,8 @@ btr_cur_update_in_place( NOT call it if index is secondary */ if (!dict_index_is_clust(index) - || row_upd_changes_ord_field_binary(NULL, NULL, - index, update)) { + || row_upd_changes_ord_field_binary(index, update, thr, + NULL, NULL)) { /* Remove possible hash index pointer to this record */ btr_search_update_hash_on_delete(cursor); @@ -3383,149 +3293,43 @@ btr_estimate_n_rows_in_range( } /*******************************************************************//** -Estimates the number of pages which have not null value of the key of n_cols. -@return estimated number of pages */ -UNIV_INTERN -ulint -btr_estimate_n_pages_not_null( -/*=========================*/ - dict_index_t* index, /*!< in: index */ - ulint n_cols, /*!< in: The cols should be not null */ - btr_path_t* path1) /*!< in: path1[BTR_PATH_ARRAY_N_SLOTS] */ +Record the number of non_null key values in a given index for +each n-column prefix of the index where n < dict_index_get_n_unique(index). +The estimates are eventually stored in the array: +index->stat_n_non_null_key_vals. */ +static +void +btr_record_not_null_field_in_rec( +/*=============================*/ + rec_t* rec, /*!< in: physical record */ + ulint n_unique, /*!< in: dict_index_get_n_unique(index), + number of columns uniquely determine + an index entry */ + const ulint* offsets, /*!< in: rec_get_offsets(rec, index), + its size could be for all fields or + that of "n_unique" */ + ib_int64_t* n_not_null) /*!< in/out: array to record number of + not null rows for n-column prefix */ { - dtuple_t* tuple1; - btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS]; - btr_cur_t cursor; - btr_path_t* slot1; - btr_path_t* slot2; - ibool diverged; - ibool diverged_lot; - ulint divergence_level; - ulint n_pages; - ulint i; - mtr_t mtr; - mem_heap_t* heap; - - heap = mem_heap_create(n_cols * sizeof(dfield_t) - + sizeof(dtuple_t)); + ulint i; - /* make tuple1 (NULL,NULL,,,) from n_cols */ - tuple1 = dtuple_create(heap, n_cols); - dict_index_copy_types(tuple1, index, n_cols); + ut_ad(rec_offs_n_fields(offsets) >= n_unique); - for (i = 0; i < n_cols; i++) { - dfield_set_null(dtuple_get_nth_field(tuple1, i)); + if (n_not_null == NULL) { + return; } - mtr_start(&mtr); - - cursor.path_arr = path1; - - btr_cur_search_to_nth_level(index, 0, tuple1, PAGE_CUR_G, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, __FILE__, __LINE__, &mtr); - - mtr_commit(&mtr); - - - - mtr_start(&mtr); - - cursor.path_arr = path2; - - btr_cur_open_at_index_side(FALSE, index, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, &mtr); - - mtr_commit(&mtr); - - mem_heap_free(heap); - - /* We have the path information for the range in path1 and path2 */ - - n_pages = 1; - diverged = FALSE; /* This becomes true when the path is not - the same any more */ - diverged_lot = FALSE; /* This becomes true when the paths are - not the same or adjacent any more */ - divergence_level = 1000000; /* This is the level where paths diverged - a lot */ - for (i = 0; ; i++) { - ut_ad(i < BTR_PATH_ARRAY_N_SLOTS); - - slot1 = path1 + i; - slot2 = path2 + i; - - if ((slot1 + 1)->nth_rec == ULINT_UNDEFINED - || (slot2 + 1)->nth_rec == ULINT_UNDEFINED) { - - if (i > divergence_level + 1) { - /* In trees whose height is > 1 our algorithm - tends to underestimate: multiply the estimate - by 2: */ - - n_pages = n_pages * 2; - } - - /* Do not estimate the number of rows in the range - to over 1 / 2 of the estimated rows in the whole - table */ - - if (n_pages > index->stat_n_leaf_pages / 2) { - n_pages = index->stat_n_leaf_pages / 2; + for (i = 0; i < n_unique; i++) { + ulint rec_len; + byte* field; - /* If there are just 0 or 1 rows in the table, - then we estimate all rows are in the range */ + field = rec_get_nth_field(rec, offsets, i, &rec_len); - if (n_pages == 0) { - n_pages = index->stat_n_leaf_pages; - } - } - - return(n_pages); - } - - if (!diverged && slot1->nth_rec != slot2->nth_rec) { - - diverged = TRUE; - - if (slot1->nth_rec < slot2->nth_rec) { - n_pages = slot2->nth_rec - slot1->nth_rec; - - if (n_pages > 1) { - diverged_lot = TRUE; - divergence_level = i; - } - } else { - /* Maybe the tree has changed between - searches */ - - return(10); - } - - } else if (diverged && !diverged_lot) { - - if (slot1->nth_rec < slot1->n_recs - || slot2->nth_rec > 1) { - - diverged_lot = TRUE; - divergence_level = i; - - n_pages = 0; - - if (slot1->nth_rec < slot1->n_recs) { - n_pages += slot1->n_recs - - slot1->nth_rec; - } - - if (slot2->nth_rec > 1) { - n_pages += slot2->nth_rec - 1; - } - } - } else if (diverged_lot) { - - n_pages = (n_pages * (slot1->n_recs + slot2->n_recs)) - / 2; + if (rec_len != UNIV_SQL_NULL) { + n_not_null[i]++; + } else { + /* Break if we hit the first NULL value */ + break; } } } @@ -3533,7 +3337,10 @@ btr_estimate_n_pages_not_null( /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is "nulls_ignored", we also record the number of +non-null values for each prefix and store the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( @@ -3547,6 +3354,8 @@ btr_estimate_number_of_different_key_vals( ulint matched_fields; ulint matched_bytes; ib_int64_t* n_diff; + ib_int64_t* n_not_null; + ibool stats_null_not_equal; ullint n_sample_pages; /* number of pages to sample */ ulint not_empty_flag = 0; ulint total_external_size = 0; @@ -3555,42 +3364,49 @@ btr_estimate_number_of_different_key_vals( ullint add_on; mtr_t mtr; mem_heap_t* heap = NULL; - ulint offsets_rec_[REC_OFFS_NORMAL_SIZE]; - ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets_rec = offsets_rec_; - ulint* offsets_next_rec= offsets_next_rec_; - ulint stats_method = srv_stats_method; - btr_path_t first_rec_path[BTR_PATH_ARRAY_N_SLOTS]; - ulint effective_pages; /* effective leaf pages */ - rec_offs_init(offsets_rec_); - rec_offs_init(offsets_next_rec_); + ulint* offsets_rec = NULL; + ulint* offsets_next_rec = NULL; n_cols = dict_index_get_n_unique(index); - if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) { - /* estimate effective pages and path for the first effective record */ - /* TODO: make it work also for n_cols > 1. */ - effective_pages = btr_estimate_n_pages_not_null(index, 1 /*k*/, first_rec_path); + heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) + * (n_cols + 1) + + dict_index_get_n_fields(index) + * (sizeof *offsets_rec + + sizeof *offsets_next_rec)); + + n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + n_not_null = NULL; + + /* Check srv_innodb_stats_method setting, and decide whether we + need to record non-null value and also decide if NULL is + considered equal (by setting stats_null_not_equal value) */ + switch (srv_innodb_stats_method) { + case SRV_STATS_NULLS_IGNORED: + n_not_null = mem_heap_zalloc(heap, (n_cols + 1) + * sizeof *n_not_null); + /* fall through */ + + case SRV_STATS_NULLS_UNEQUAL: + /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL + case, we will treat NULLs as unequal value */ + stats_null_not_equal = TRUE; + break; - if (!effective_pages) { - for (j = 0; j <= n_cols; j++) { - index->stat_n_diff_key_vals[j] = (ib_int64_t)index->stat_n_leaf_pages; - } - return; - } else if (effective_pages > index->stat_n_leaf_pages) { - effective_pages = index->stat_n_leaf_pages; - } - } else { - effective_pages = index->stat_n_leaf_pages; - } + case SRV_STATS_NULLS_EQUAL: + stats_null_not_equal = FALSE; + break; - n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t)); + default: + ut_error; + } /* It makes no sense to test more pages than are contained in the index, thus we lower the number if it is too high */ - if (srv_stats_sample_pages > effective_pages) { - if (effective_pages > 0) { - n_sample_pages = effective_pages; + if (srv_stats_sample_pages > index->stat_index_size) { + if (index->stat_index_size > 0) { + n_sample_pages = index->stat_index_size; } else { n_sample_pages = 1; } @@ -3601,16 +3417,9 @@ btr_estimate_number_of_different_key_vals( /* We sample some pages in the index to get an estimate */ for (i = 0; i < n_sample_pages; i++) { - rec_t* supremum; - ibool is_first_page = TRUE; mtr_start(&mtr); - if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) { - is_first_page = btr_cur_open_at_rnd_pos_after_path(index, BTR_SEARCH_LEAF, - first_rec_path, &cursor, &mtr); - } else { btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); - } /* Count the number of different key values for each prefix of the key on this index page. If the prefix does not determine @@ -3625,25 +3434,25 @@ btr_estimate_number_of_different_key_vals( } ut_a(page); - supremum = page_get_supremum_rec(page); - if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS && is_first_page) { - /* the cursor should be the first record of the page. */ - /* Counting should be started from here. */ - rec = btr_cur_get_rec(&cursor); - } else { rec = page_rec_get_next(page_get_infimum_rec(page)); - } - if (rec != supremum) { + if (!page_rec_is_supremum(rec)) { not_empty_flag = 1; offsets_rec = rec_get_offsets(rec, index, offsets_rec, ULINT_UNDEFINED, &heap); + + if (n_not_null) { + btr_record_not_null_field_in_rec( + rec, n_cols, offsets_rec, n_not_null); + } } - while (rec != supremum) { - rec_t* next_rec; - next_rec = page_rec_get_next(rec); - if (next_rec == supremum) { + while (!page_rec_is_supremum(rec)) { + rec_t* next_rec = page_rec_get_next(rec); + if (page_rec_is_supremum(next_rec)) { + total_external_size += + btr_rec_get_externally_stored_len( + rec, offsets_rec); break; } @@ -3651,15 +3460,14 @@ btr_estimate_number_of_different_key_vals( matched_bytes = 0; offsets_next_rec = rec_get_offsets(next_rec, index, offsets_next_rec, - n_cols, &heap); + ULINT_UNDEFINED, + &heap); cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, - index, &matched_fields, - &matched_bytes, - (stats_method==SRV_STATS_METHOD_NULLS_NOT_EQUAL) ? - SRV_STATS_METHOD_NULLS_NOT_EQUAL : - SRV_STATS_METHOD_NULLS_EQUAL); + index, stats_null_not_equal, + &matched_fields, + &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { /* We add one if this index record has @@ -3668,6 +3476,12 @@ btr_estimate_number_of_different_key_vals( n_diff[j]++; } + if (n_not_null) { + btr_record_not_null_field_in_rec( + next_rec, n_cols, offsets_next_rec, + n_not_null); + } + total_external_size += btr_rec_get_externally_stored_len( rec, offsets_rec); @@ -3702,10 +3516,6 @@ btr_estimate_number_of_different_key_vals( } } - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - ULINT_UNDEFINED, &heap); - total_external_size += btr_rec_get_externally_stored_len( - rec, offsets_rec); mtr_commit(&mtr); } @@ -3719,13 +3529,9 @@ btr_estimate_number_of_different_key_vals( for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] - = ((n_diff[j] - * (ib_int64_t)effective_pages - + n_sample_pages - 1 - + total_external_size - + not_empty_flag) - / (n_sample_pages - + total_external_size)); + = BTR_TABLE_STATS_FROM_SAMPLE( + n_diff[j], index, n_sample_pages, + total_external_size, not_empty_flag); /* If the tree is small, smaller than 10 * n_sample_pages + total_external_size, then @@ -3735,7 +3541,7 @@ btr_estimate_number_of_different_key_vals( different key values, or even more. Let us try to approximate that: */ - add_on = effective_pages + add_on = index->stat_n_leaf_pages / (10 * (n_sample_pages + total_external_size)); @@ -3745,53 +3551,80 @@ btr_estimate_number_of_different_key_vals( index->stat_n_diff_key_vals[j] += add_on; - if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) { - /* index->stat_n_diff_key_vals[k] is used for calc rec_per_key, - as "stats.records / index->stat_n_diff_key_vals[x]". - So it should be adjusted to the value which is based on whole of the index. */ - index->stat_n_diff_key_vals[j] = - index->stat_n_diff_key_vals[j] * (ib_int64_t)index->stat_n_leaf_pages - / (ib_int64_t)effective_pages; + /* Update the stat_n_non_null_key_vals[] with our + sampled result. stat_n_non_null_key_vals[] is created + and initialized to zero in dict_index_add_to_cache(), + along with stat_n_diff_key_vals[] array */ + if (n_not_null != NULL && (j < n_cols)) { + index->stat_n_non_null_key_vals[j] = + BTR_TABLE_STATS_FROM_SAMPLE( + n_not_null[j], index, n_sample_pages, + total_external_size, not_empty_flag); } } - mem_free(n_diff); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ /***********************************************************//** +Gets the offset of the pointer to the externally stored part of a field. +@return offset of the pointer to the externally stored part */ +static +ulint +btr_rec_get_field_ref_offs( +/*=======================*/ + const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ + ulint n) /*!< in: index of the external field */ +{ + ulint field_ref_offs; + ulint local_len; + + ut_a(rec_offs_nth_extern(offsets, n)); + field_ref_offs = rec_get_nth_field_offs(offsets, n, &local_len); + ut_a(local_len != UNIV_SQL_NULL); + ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); + + return(field_ref_offs + local_len - BTR_EXTERN_FIELD_REF_SIZE); +} + +/** Gets a pointer to the externally stored part of a field. +@param rec record +@param offsets rec_get_offsets(rec) +@param n index of the externally stored field +@return pointer to the externally stored part */ +#define btr_rec_get_field_ref(rec, offsets, n) \ + ((rec) + btr_rec_get_field_ref_offs(offsets, n)) + +/***********************************************************//** Gets the externally stored size of a record, in units of a database page. @return externally stored part, in units of a database page */ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - rec_t* rec, /*!< in: record */ + const rec_t* rec, /*!< in: record */ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ { ulint n_fields; - byte* data; - ulint local_len; - ulint extern_len; ulint total_extern_len = 0; ulint i; ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + + if (!rec_offs_any_extern(offsets)) { + return(0); + } + n_fields = rec_offs_n_fields(offsets); for (i = 0; i < n_fields; i++) { if (rec_offs_nth_extern(offsets, i)) { - data = rec_get_nth_field(rec, offsets, i, &local_len); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - extern_len = mach_read_from_4(data + local_len - + BTR_EXTERN_LEN + 4); + ulint extern_len = mach_read_from_4( + btr_rec_get_field_ref(rec, offsets, i) + + BTR_EXTERN_LEN + 4); total_extern_len += ut_calc_align(extern_len, UNIV_PAGE_SIZE); @@ -3821,7 +3654,7 @@ btr_cur_set_ownership_of_extern_field( ulint byte_val; data = rec_get_nth_field(rec, offsets, i, &local_len); - + ut_ad(rec_offs_nth_extern(offsets, i)); ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); local_len -= BTR_EXTERN_FIELD_REF_SIZE; @@ -3831,6 +3664,9 @@ btr_cur_set_ownership_of_extern_field( if (val) { byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); } else { +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!(byte_val & BTR_EXTERN_OWNER_FLAG)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ byte_val = byte_val | BTR_EXTERN_OWNER_FLAG; } @@ -4048,8 +3884,7 @@ btr_blob_free( && buf_block_get_space(block) == space && buf_block_get_page_no(block) == page_no) { - if (buf_LRU_free_block(&block->page, all, NULL, TRUE) - != BUF_LRU_FREED + if (buf_LRU_free_block(&block->page, all, TRUE) != BUF_LRU_FREED && all && block->page.zip.data /* Now, buf_LRU_free_block() may release mutex temporarily */ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE @@ -4058,7 +3893,7 @@ btr_blob_free( /* Attempt to deallocate the uncompressed page if the whole block cannot be deallocted. */ - buf_LRU_free_block(&block->page, FALSE, NULL, TRUE); + buf_LRU_free_block(&block->page, FALSE, TRUE); } } @@ -4075,8 +3910,8 @@ file segment of the index tree. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint -btr_store_big_rec_extern_fields( -/*============================*/ +btr_store_big_rec_extern_fields_func( +/*=================================*/ dict_index_t* index, /*!< in: index of rec; the index tree MUST be X-latched */ buf_block_t* rec_block, /*!< in/out: block containing rec */ @@ -4085,11 +3920,17 @@ btr_store_big_rec_extern_fields( the "external storage" flags in offsets will not correspond to rec when this function returns */ - big_rec_t* big_rec_vec, /*!< in: vector containing fields +#ifdef UNIV_DEBUG + mtr_t* local_mtr, /*!< in: mtr containing the + latch to rec and to the tree */ +#endif /* UNIV_DEBUG */ +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ibool update_in_place,/*! in: TRUE if the record is updated + in place (not delete+insert) */ +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + const big_rec_t*big_rec_vec) /*!< in: vector containing fields to be stored externally */ - mtr_t* local_mtr __attribute__((unused))) /*!< in: mtr - containing the latch to rec and to the - tree */ + { ulint rec_page_no; byte* field_ref; @@ -4107,6 +3948,7 @@ btr_store_big_rec_extern_fields( z_stream c_stream; ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(rec_offs_any_extern(offsets)); ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); @@ -4138,21 +3980,37 @@ btr_store_big_rec_extern_fields( ut_a(err == Z_OK); } +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* All pointers to externally stored columns in the record + must either be zero or they must be pointers to inherited + columns, owned by this record or an earlier record version. */ + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (!rec_offs_nth_extern(offsets, i)) { + continue; + } + field_ref = btr_rec_get_field_ref(rec, offsets, i); + + ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); + /* Either this must be an update in place, + or the BLOB must be inherited, or the BLOB pointer + must be zero (will be written in this function). */ + ut_a(update_in_place + || (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG) + || !memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + } +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ /* We have to create a file segment to the tablespace for each field and put the pointer to the field in rec */ for (i = 0; i < big_rec_vec->n_fields; i++) { - ut_ad(rec_offs_nth_extern(offsets, - big_rec_vec->fields[i].field_no)); - { - ulint local_len; - field_ref = rec_get_nth_field( - rec, offsets, big_rec_vec->fields[i].field_no, - &local_len); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - field_ref += local_len; - } + field_ref = btr_rec_get_field_ref( + rec, offsets, big_rec_vec->fields[i].field_no); +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* A zero BLOB pointer should have been initially inserted. */ + ut_a(!memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ extern_len = big_rec_vec->fields[i].len; UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data, extern_len); @@ -4434,6 +4292,23 @@ next_zip_page: mem_heap_free(heap); } +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* All pointers to externally stored columns in the record + must be valid. */ + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (!rec_offs_nth_extern(offsets, i)) { + continue; + } + + field_ref = btr_rec_get_field_ref(rec, offsets, i); + + /* The pointer must not be zero. */ + ut_a(0 != memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + /* The column must not be disowned by this record. */ + ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); + } +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ return(DB_SUCCESS); } @@ -4456,6 +4331,7 @@ btr_check_blob_fil_page_type( if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) { ulint flags = fil_space_get_flags(space_id); +#ifndef UNIV_DEBUG /* Improve debug test coverage */ if (UNIV_LIKELY ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) { /* Old versions of InnoDB did not initialize @@ -4464,6 +4340,7 @@ btr_check_blob_fil_page_type( a BLOB page that is in Antelope format.*/ return; } +#endif /* !UNIV_DEBUG */ ut_print_timestamp(stderr); fprintf(stderr, @@ -4513,23 +4390,13 @@ btr_free_externally_stored_field( ulint page_no; ulint next_page_no; mtr_t mtr; -#ifdef UNIV_DEBUG + ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains_page(local_mtr, field_ref, MTR_MEMO_PAGE_X_FIX)); ut_ad(!rec || rec_offs_validate(rec, index, offsets)); - - if (rec) { - ulint local_len; - const byte* f = rec_get_nth_field(rec, offsets, - i, &local_len); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - f += local_len; - ut_ad(f == field_ref); - } -#endif /* UNIV_DEBUG */ + ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i)); if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { @@ -4694,13 +4561,8 @@ btr_rec_free_externally_stored_fields( for (i = 0; i < n_fields; i++) { if (rec_offs_nth_extern(offsets, i)) { - ulint len; - byte* data - = rec_get_nth_field(rec, offsets, i, &len); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - btr_free_externally_stored_field( - index, data + len - BTR_EXTERN_FIELD_REF_SIZE, + index, btr_rec_get_field_ref(rec, offsets, i), rec, offsets, page_zip, i, rb_ctx, mtr); } } diff --git a/storage/xtradb/btr/btr0sea.c b/storage/xtradb/btr/btr0sea.c index c78f791480c..3b38e2799c2 100644 --- a/storage/xtradb/btr/btr0sea.c +++ b/storage/xtradb/btr/btr0sea.c @@ -141,7 +141,7 @@ btr_search_check_free_space_in_heap(void) be enough free space in the hash table. */ if (heap->free_block == NULL) { - buf_block_t* block = buf_block_alloc(0); + buf_block_t* block = buf_block_alloc(); rw_lock_x_lock(&btr_search_latch); @@ -1186,7 +1186,7 @@ btr_search_drop_page_hash_index_on_index( /*=====================================*/ dict_index_t* index) /* in: record descriptor */ { - buf_page_t* bpage; + hash_table_t* table; buf_block_t* block; ulint n_fields; @@ -1202,96 +1202,143 @@ btr_search_drop_page_hash_index_on_index( ulint i; mem_heap_t* heap = NULL; ulint* offsets; + ibool released_search_latch; - rw_lock_x_lock(&btr_search_latch); - mutex_enter(&LRU_list_mutex); + rw_lock_s_lock(&btr_search_latch); table = btr_search_sys->hash_index; - bpage = UT_LIST_GET_LAST(buf_pool->LRU); + do { + buf_chunk_t* chunks = buf_pool->chunks; + buf_chunk_t* chunk = chunks + buf_pool->n_chunks; + + released_search_latch = FALSE; + + while (--chunk >= chunks) { + block = chunk->blocks; + i = chunk->size; + +retry: + for (; i--; block++) { + if (buf_block_get_state(block) + != BUF_BLOCK_FILE_PAGE + || block->index != index + || !block->is_hashed) { + continue; + } + + page = block->frame; + + /* from btr_search_drop_page_hash_index() */ + n_fields = block->curr_n_fields; + n_bytes = block->curr_n_bytes; + - while (bpage != NULL) { - block = (buf_block_t*) bpage; - if (block->index == index && block->is_hashed) { - page = block->frame; + /* keeping latch order */ + rw_lock_s_unlock(&btr_search_latch); + released_search_latch = TRUE; + rw_lock_x_lock(&block->lock); - /* from btr_search_drop_page_hash_index() */ - n_fields = block->curr_n_fields; - n_bytes = block->curr_n_bytes; - ut_a(n_fields + n_bytes > 0); + ut_a(n_fields + n_bytes > 0); - n_recs = page_get_n_recs(page); + n_recs = page_get_n_recs(page); - /* Calculate and cache fold values into an array for fast deletion - from the hash index */ + /* Calculate and cache fold values into an array for fast deletion + from the hash index */ - folds = mem_alloc(n_recs * sizeof(ulint)); + folds = mem_alloc(n_recs * sizeof(ulint)); - n_cached = 0; + n_cached = 0; - rec = page_get_infimum_rec(page); - rec = page_rec_get_next_low(rec, page_is_comp(page)); + rec = page_get_infimum_rec(page); + rec = page_rec_get_next_low(rec, page_is_comp(page)); - index_id = btr_page_get_index_id(page); + index_id = btr_page_get_index_id(page); - ut_a(0 == ut_dulint_cmp(index_id, index->id)); + ut_a(0 == ut_dulint_cmp(index_id, index->id)); - prev_fold = 0; + prev_fold = 0; - offsets = NULL; + offsets = NULL; - while (!page_rec_is_supremum(rec)) { - offsets = rec_get_offsets(rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0)); - fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id); + while (!page_rec_is_supremum(rec)) { + offsets = rec_get_offsets(rec, index, offsets, + n_fields + (n_bytes > 0), &heap); + ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0)); + fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id); - if (fold == prev_fold && prev_fold != 0) { + if (fold == prev_fold && prev_fold != 0) { - goto next_rec; - } + goto next_rec; + } - /* Remove all hash nodes pointing to this page from the - hash chain */ + /* Remove all hash nodes pointing to this page from the + hash chain */ - folds[n_cached] = fold; - n_cached++; + folds[n_cached] = fold; + n_cached++; next_rec: - rec = page_rec_get_next_low(rec, page_rec_is_comp(rec)); - prev_fold = fold; - } + rec = page_rec_get_next_low(rec, page_rec_is_comp(rec)); + prev_fold = fold; + } - for (i = 0; i < n_cached; i++) { + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_empty(heap); + } - ha_remove_all_nodes_to_page(table, folds[i], page); - } + rw_lock_x_lock(&btr_search_latch); - ut_a(index->search_info->ref_count > 0); - index->search_info->ref_count--; + if (UNIV_UNLIKELY(!block->is_hashed)) { + goto cleanup; + } - block->is_hashed = FALSE; - block->index = NULL; - + ut_a(block->index == index); + + if (UNIV_UNLIKELY(block->curr_n_fields != n_fields) + || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) { + rw_lock_x_unlock(&btr_search_latch); + rw_lock_x_unlock(&block->lock); + + mem_free(folds); + + rw_lock_s_lock(&btr_search_latch); + goto retry; + } + + for (i = 0; i < n_cached; i++) { + + ha_remove_all_nodes_to_page(table, folds[i], page); + } + + ut_a(index->search_info->ref_count > 0); + index->search_info->ref_count--; + + block->is_hashed = FALSE; + block->index = NULL; + +cleanup: #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - if (UNIV_UNLIKELY(block->n_pointers)) { - /* Corruption */ - ut_print_timestamp(stderr); - fprintf(stderr, + if (UNIV_UNLIKELY(block->n_pointers)) { + /* Corruption */ + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Corruption of adaptive hash index. After dropping\n" "InnoDB: the hash index to a page of %s, still %lu hash nodes remain.\n", - index->name, (ulong) block->n_pointers); - } + index->name, (ulong) block->n_pointers); + } #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + rw_lock_x_unlock(&btr_search_latch); + rw_lock_x_unlock(&block->lock); - mem_free(folds); - } + mem_free(folds); - bpage = UT_LIST_GET_PREV(LRU, bpage); - } + rw_lock_s_lock(&btr_search_latch); + } + } + } while (released_search_latch); - mutex_exit(&LRU_list_mutex); - rw_lock_x_unlock(&btr_search_latch); + rw_lock_s_unlock(&btr_search_latch); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); diff --git a/storage/xtradb/buf/buf0buddy.c b/storage/xtradb/buf/buf0buddy.c index e6b80bcda55..12f7ad51aac 100644 --- a/storage/xtradb/buf/buf0buddy.c +++ b/storage/xtradb/buf/buf0buddy.c @@ -346,7 +346,7 @@ buf_buddy_alloc_low( if (have_page_hash_mutex) { rw_lock_x_unlock(&page_hash_latch); } - block = buf_LRU_get_free_block(0); + block = buf_LRU_get_free_block(); *lru = TRUE; //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); @@ -477,6 +477,7 @@ buf_buddy_relocate( if (size >= PAGE_ZIP_MIN_SIZE) { /* This is a compressed page. */ mutex_t* mutex; + ulint space, page_no; if (!have_page_hash_mutex) { mutex_exit(&zip_free_mutex); diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index 452e5b0f526..020896b5739 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -754,9 +754,9 @@ buf_block_init( block->modify_clock = 0; -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG block->page.file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ block->check_index_page_at_flush = FALSE; block->index = NULL; @@ -839,11 +839,13 @@ buf_chunk_init( ulint zip_hash_mem_size = 0; hash_table_t* zip_hash_tmp = NULL; ulint i; + ulint size_target; buf_shm_info_t* shm_info = NULL; /* Round down to a multiple of page size, although it already should be. */ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); + size_target = (mem_size / UNIV_PAGE_SIZE) - 1; srv_buffer_pool_shm_is_reused = FALSE; @@ -1044,6 +1046,10 @@ init_again: chunk->size = size; } + if (chunk->size > size_target) { + chunk->size = size_target; + } + if (shm_info && !(shm_info->is_new)) { /* convert the shared memory segment for reuse */ ptrdiff_t phys_offset; @@ -1830,7 +1836,7 @@ shrink_again: buf_LRU_make_block_old(&block->page); dirty++; - } else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE) + } else if (buf_LRU_free_block(&block->page, TRUE, FALSE) != BUF_LRU_FREED) { nonfree++; } @@ -2177,7 +2183,7 @@ buf_page_peek_if_search_hashed( return(is_hashed); } -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG /********************************************************************//** Sets file_page_was_freed TRUE if the page is found in the buffer pool. This function should be called when we free a file page and want the @@ -2199,6 +2205,8 @@ buf_page_set_file_page_was_freed( bpage = buf_page_hash_get(space, offset); if (bpage) { + /* bpage->file_page_was_freed can already hold + when this code is invoked from dict_drop_index_tree() */ bpage->file_page_was_freed = TRUE; } @@ -2237,7 +2245,7 @@ buf_page_reset_file_page_was_freed( return(bpage); } -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ /********************************************************************//** Get read access to a compressed page (usually of type @@ -2333,8 +2341,7 @@ err_exit: ut_a(block_mutex == &((buf_block_t*) bpage)->mutex); /* Discard the uncompressed page frame if possible. */ - if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE) - == BUF_LRU_FREED) { + if (buf_LRU_free_block(bpage, FALSE, FALSE) == BUF_LRU_FREED) { mutex_exit(block_mutex); goto lookup; @@ -2358,7 +2365,7 @@ got_block: buf_page_set_accessed_make_young(bpage, access_time); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!bpage->file_page_was_freed); #endif @@ -2821,7 +2828,7 @@ wait_until_unfixed: //mutex_exit(&buf_pool_zip_mutex); mutex_exit(block_mutex); - block = buf_LRU_get_free_block(0); + block = buf_LRU_get_free_block(); ut_a(block); block_mutex = &block->mutex; @@ -2974,8 +2981,7 @@ wait_until_unfixed: /* Try to evict the block from the buffer pool, to use the insert buffer as much as possible. */ - if (buf_LRU_free_block(&block->page, TRUE, NULL) - == BUF_LRU_FREED) { + if (buf_LRU_free_block(&block->page, TRUE, FALSE) == BUF_LRU_FREED) { buf_pool_mutex_exit(); mutex_exit(&block->mutex); fprintf(stderr, @@ -3007,7 +3013,7 @@ wait_until_unfixed: buf_page_set_accessed_make_young(&block->page, access_time); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!block->page.file_page_was_freed); #endif @@ -3183,7 +3189,7 @@ buf_page_optimistic_get( ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); #endif if (innobase_get_slow_log()) { @@ -3303,7 +3309,7 @@ buf_page_get_known_nowait( ut_a(block->page.buf_fix_count > 0); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); #endif @@ -3394,9 +3400,9 @@ buf_page_try_get_func( ut_a(block->page.buf_fix_count > 0); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); buf_pool->stat.n_page_gets++; @@ -3426,9 +3432,9 @@ buf_page_init_low( bpage->oldest_modification = 0; HASH_INVALIDATE(bpage, hash); bpage->is_corrupt = FALSE; -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG bpage->file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ } /********************************************************************//** @@ -3556,7 +3562,7 @@ buf_page_init_for_read( && UNIV_LIKELY(!recv_recovery_is_on())) { block = NULL; } else { - block = buf_LRU_get_free_block(0); + block = buf_LRU_get_free_block(); ut_ad(block); } @@ -3682,6 +3688,7 @@ err_exit: bpage->state = BUF_BLOCK_ZIP_PAGE; bpage->space = space; bpage->offset = offset; + bpage->space_was_being_deleted = FALSE; #ifdef UNIV_DEBUG bpage->in_page_hash = FALSE; @@ -3750,7 +3757,7 @@ buf_page_create( ut_ad(mtr->state == MTR_ACTIVE); ut_ad(space || !zip_size); - free_block = buf_LRU_get_free_block(0); + free_block = buf_LRU_get_free_block(); //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); @@ -3762,9 +3769,9 @@ buf_page_create( #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(space, offset) == 0); #endif -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG block->page.file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ /* Page can be found in buf_pool */ //buf_pool_mutex_exit(); diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c index 5db6f816aab..cda8d3b170e 100644 --- a/storage/xtradb/buf/buf0flu.c +++ b/storage/xtradb/buf/buf0flu.c @@ -367,7 +367,7 @@ buf_flush_ready_for_replace( if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) { - return(bpage->oldest_modification == 0 + return((bpage->oldest_modification == 0 || bpage->space_was_being_deleted) && buf_page_get_io_fix(bpage) == BUF_IO_NONE && bpage->buf_fix_count == 0); } @@ -406,6 +406,13 @@ buf_flush_ready_for_flush( && buf_page_get_io_fix(bpage) == BUF_IO_NONE) { ut_ad(bpage->in_flush_list); + if (bpage->space_was_being_deleted) { + /* should be removed from flush_list here */ + /* because buf_flush_try_neighbors() cannot flush without fil_space_get_size(space) */ + buf_flush_remove(bpage); + return(FALSE); + } + if (flush_type != BUF_FLUSH_LRU) { return(TRUE); diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index 92a645ef2f5..583eec9bd9c 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -530,6 +530,30 @@ next_page_no_mutex: } } +/******************************************************************//** +*/ +UNIV_INTERN +void +buf_LRU_mark_space_was_deleted( +/*===========================*/ + ulint id) /*!< in: space id */ +{ + buf_page_t* bpage; + + mutex_enter(&LRU_list_mutex); + + bpage = UT_LIST_GET_FIRST(buf_pool->LRU); + + while (bpage != NULL) { + if (buf_page_get_space(bpage) == id) { + bpage->space_was_being_deleted = TRUE; + } + bpage = UT_LIST_GET_NEXT(LRU, bpage); + } + + mutex_exit(&LRU_list_mutex); +} + /********************************************************************//** Insert a compressed block into buf_pool->zip_clean in the LRU order. */ UNIV_INTERN @@ -618,7 +642,7 @@ restart: ut_ad(block->in_unzip_LRU_list); ut_ad(block->page.in_LRU_list); - freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex); + freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex); mutex_exit(&block->mutex); switch (freed) { @@ -690,7 +714,7 @@ restart: ut_ad(bpage->in_LRU_list); accessed = buf_page_is_accessed(bpage); - freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex); + freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex); mutex_exit(block_mutex); switch (freed) { @@ -876,10 +900,8 @@ LRU list to the free list. @return the free control block, in state BUF_BLOCK_READY_FOR_USE */ UNIV_INTERN buf_block_t* -buf_LRU_get_free_block( -/*===================*/ - ulint zip_size) /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_LRU_get_free_block(void) +/*========================*/ { buf_block_t* block = NULL; ibool freed; @@ -955,28 +977,10 @@ loop: /* If there is a block in the free list, take it */ block = buf_LRU_get_free_only(); - if (block) { - -#ifdef UNIV_DEBUG - block->page.zip.m_start = -#endif /* UNIV_DEBUG */ - block->page.zip.m_end = - block->page.zip.m_nonempty = - block->page.zip.n_blobs = 0; - - if (UNIV_UNLIKELY(zip_size)) { - ibool lru; - page_zip_set_size(&block->page.zip, zip_size); - mutex_enter(&LRU_list_mutex); - block->page.zip.data = buf_buddy_alloc(zip_size, &lru, FALSE); - mutex_exit(&LRU_list_mutex); - UNIV_MEM_DESC(block->page.zip.data, zip_size, block); - } else { - page_zip_set_size(&block->page.zip, 0); - block->page.zip.data = NULL; - } + //buf_pool_mutex_exit(); - //buf_pool_mutex_exit(); + if (block) { + memset(&block->page.zip, 0, sizeof block->page.zip); if (started_monitor) { srv_print_innodb_monitor = mon_value_was; @@ -988,8 +992,6 @@ loop: /* If no block was in the free list, search from the end of the LRU list and try to free a block there */ - //buf_pool_mutex_exit(); - freed = buf_LRU_search_and_free_block(n_iterations); if (freed > 0) { @@ -1471,10 +1473,6 @@ buf_LRU_free_block( buf_page_t* bpage, /*!< in: block to be freed */ ibool zip, /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released, - /*!< in: pointer to a variable that will - be assigned TRUE if buf_pool_mutex - was temporarily released, or NULL */ ibool have_LRU_mutex) { buf_page_t* b = NULL; @@ -1498,6 +1496,10 @@ buf_LRU_free_block( return(BUF_LRU_NOT_FREED); } + if (bpage->space_was_being_deleted && bpage->oldest_modification != 0) { + buf_flush_remove(bpage); + } + #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0); #endif /* UNIV_IBUF_COUNT_DEBUG */ @@ -1685,10 +1687,6 @@ not_freed: b->io_fix = BUF_IO_READ; } - if (buf_pool_mutex_released) { - *buf_pool_mutex_released = TRUE; - } - //buf_pool_mutex_exit(); mutex_exit(&LRU_list_mutex); rw_lock_x_unlock(&page_hash_latch); @@ -2369,8 +2367,7 @@ buf_LRU_file_restore(void) continue; } - if (fil_area_is_exist(space_id, zip_size, page_no, 0, - zip_size ? zip_size : UNIV_PAGE_SIZE)) { + if (fil_is_exist(space_id, page_no)) { tablespace_version = fil_space_get_version(space_id); diff --git a/storage/xtradb/dict/dict0boot.c b/storage/xtradb/dict/dict0boot.c index 43cfced65a0..2b6a208321d 100644 --- a/storage/xtradb/dict/dict0boot.c +++ b/storage/xtradb/dict/dict0boot.c @@ -465,17 +465,21 @@ dict_boot(void) ut_a(error == DB_SUCCESS); /*-------------------------*/ - table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 3, 0); + table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 4, 0); table->n_mysql_handles_opened = 1; /* for pin */ dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "NON_NULL_VALS", DATA_BINARY, 0, 0); /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ #if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 #error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" #endif +#if DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2 +#error "DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2" +#endif table->id = DICT_STATS_ID; dict_table_add_to_cache(table, heap); diff --git a/storage/xtradb/dict/dict0crea.c b/storage/xtradb/dict/dict0crea.c index c63ff57be97..c6e7d588e72 100644 --- a/storage/xtradb/dict/dict0crea.c +++ b/storage/xtradb/dict/dict0crea.c @@ -529,7 +529,7 @@ dict_create_sys_stats_tuple( sys_stats = dict_sys->sys_stats; - entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + entry = dtuple_create(heap, 4 + DATA_N_SYS_COLS); dict_table_copy_types(entry, sys_stats); @@ -548,6 +548,11 @@ dict_create_sys_stats_tuple( ptr = mem_heap_alloc(heap, 8); mach_write_to_8(ptr, ut_dulint_zero); /* initial value is 0 */ dfield_set_data(dfield, ptr, 8); + /* 5: NON_NULL_VALS ------------------*/ + dfield = dtuple_get_nth_field(entry, 3/*NON_NULL_VALS*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, ut_dulint_zero); /* initial value is 0 */ + dfield_set_data(dfield, ptr, 8); return(entry); } diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 638ab55703b..18880a5c72c 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -1732,6 +1732,12 @@ undo_size_ok: new_index->heap, (1 + dict_index_get_n_unique(new_index)) * sizeof(ib_int64_t)); + + new_index->stat_n_non_null_key_vals = mem_heap_zalloc( + new_index->heap, + (1 + dict_index_get_n_unique(new_index)) + * sizeof(*new_index->stat_n_non_null_key_vals)); + /* Give some sensible values to stat_n_... in case we do not calculate statistics quickly enough */ @@ -4331,15 +4337,18 @@ dict_reload_statistics( ulint key_cols; ulint n_cols; const rec_t* rec; + ulint n_fields; const byte* field; ulint len; ib_int64_t* stat_n_diff_key_vals_tmp; + ib_int64_t* stat_n_non_null_key_vals_tmp; byte* buf; ulint i; mtr_t mtr; n_cols = dict_index_get_n_unique(index); stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + stat_n_non_null_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); sys_stats = dict_sys->sys_stats; sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); @@ -4375,9 +4384,13 @@ dict_reload_statistics( } if (rec_get_deleted_flag(rec, 0)) { + /* don't count */ + i--; goto next_rec; } + n_fields = rec_get_n_fields_old(rec); + field = rec_get_nth_field_old(rec, 1, &len); ut_a(len == 4); @@ -4389,6 +4402,21 @@ dict_reload_statistics( ut_a(len == 8); stat_n_diff_key_vals_tmp[i] = ut_conv_dulint_to_longlong(mach_read_from_8(field)); + + if (n_fields > DICT_SYS_STATS_NON_NULL_VALS_FIELD) { + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_NON_NULL_VALS_FIELD, &len); + ut_a(len == 8); + + stat_n_non_null_key_vals_tmp[i] = ut_conv_dulint_to_longlong(mach_read_from_8(field)); + } else { + /* not enough fields: should be older */ + fprintf(stderr, "InnoDB: Notice: stats for %s/%s (%lu/%lu)" + " in SYS_STATS seems older format. " + "Please execute ANALYZE TABLE for it.\n", + index->table_name, index->name, i, n_cols); + + stat_n_non_null_key_vals_tmp[i] = ((ib_int64_t)(-1)); + } next_rec: btr_pcur_move_to_next_user_rec(&pcur, &mtr); } @@ -4398,6 +4426,12 @@ next_rec: for (i = 0; i <= n_cols; i++) { index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i]; + if (stat_n_non_null_key_vals_tmp[i] == ((ib_int64_t)(-1))) { + /* approximate value */ + index->stat_n_non_null_key_vals[i] = stat_n_diff_key_vals_tmp[n_cols]; + } else { + index->stat_n_non_null_key_vals[i] = stat_n_non_null_key_vals_tmp[i]; + } } } /*===========================================*/ @@ -4442,18 +4476,22 @@ dict_store_statistics( ulint n_cols; ulint rests; const rec_t* rec; + ulint n_fields; const byte* field; ulint len; ib_int64_t* stat_n_diff_key_vals_tmp; + ib_int64_t* stat_n_non_null_key_vals_tmp; byte* buf; ulint i; mtr_t mtr; n_cols = dict_index_get_n_unique(index); stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + stat_n_non_null_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); for (i = 0; i <= n_cols; i++) { stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i]; + stat_n_non_null_key_vals_tmp[i] = index->stat_n_non_null_key_vals[i]; } sys_stats = dict_sys->sys_stats; @@ -4481,12 +4519,24 @@ dict_store_statistics( || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), index->id)) { /* not found */ - btr_pcur_close(&pcur); - mtr_commit(&mtr); + + break; } if (rec_get_deleted_flag(rec, 0)) { + /* don't count */ + i--; + goto next_rec; + } + + n_fields = rec_get_n_fields_old(rec); + + if (n_fields <= DICT_SYS_STATS_NON_NULL_VALS_FIELD) { + /* not update for the older smaller format */ + fprintf(stderr, "InnoDB: Notice: stats for %s/%s (%lu/%lu)" + " in SYS_STATS seems older format. Please ANALYZE TABLE it.\n", + index->table_name, index->name, i, n_cols); goto next_rec; } @@ -4503,6 +4553,14 @@ dict_store_statistics( (ulint) stat_n_diff_key_vals_tmp[key_cols] & 0xFFFFFFFF), &mtr); + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_NON_NULL_VALS_FIELD, &len); + ut_a(len == 8); + + mlog_write_dulint((byte*)field, + ut_dulint_create((ulint) (stat_n_non_null_key_vals_tmp[key_cols] >> 32), + (ulint) stat_n_non_null_key_vals_tmp[key_cols] & 0xFFFFFFFF), + &mtr); + rests--; next_rec: @@ -4635,6 +4693,10 @@ dict_update_statistics( for (i = dict_index_get_n_unique(index); i; ) { index->stat_n_diff_key_vals[i--] = 1; } + + memset(index->stat_n_non_null_key_vals, 0, + (1 + dict_index_get_n_unique(index)) + * sizeof(*index->stat_n_non_null_key_vals)); } index = dict_table_get_next_index(index); @@ -4662,6 +4724,78 @@ end: dict_table_stats_unlock(table, RW_X_LATCH); } +/*********************************************************************//** +*/ +UNIV_INTERN +ibool +dict_is_older_statistics( +/*=====================*/ + dict_index_t* index) +{ + mem_heap_t* heap; + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + const rec_t* rec; + ulint n_fields; + ulint len; + byte* buf; + mtr_t mtr; + + heap = mem_heap_create(100); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + +next_rec: + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), + index->id)) { + /* not found */ + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + /* no statistics == not older statistics */ + return(FALSE); + } + + if (rec_get_deleted_flag(rec, 0)) { + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + goto next_rec; + } + + n_fields = rec_get_n_fields_old(rec); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + if (n_fields > DICT_SYS_STATS_NON_NULL_VALS_FIELD) { + return(FALSE); + } else { + return(TRUE); + } +} + /**********************************************************************//** Prints info of a foreign key constraint. */ static diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index 2ab0dd6267a..8121130f681 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -242,6 +242,7 @@ the ib_logfiles form a 'space' and it is handled here */ struct fil_system_struct { #ifndef UNIV_HOTBACKUP mutex_t mutex; /*!< The mutex protecting the cache */ + mutex_t file_extend_mutex; #endif /* !UNIV_HOTBACKUP */ hash_table_t* spaces; /*!< The hash table of spaces in the system; they are hashed on the space @@ -690,7 +691,7 @@ fil_node_open_file( ut_a(space->purpose != FIL_LOG); ut_a(!trx_sys_sys_space(space->id)); - if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { + if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * (lint)UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Error: the size of single-table" " tablespace file %s\n" @@ -816,7 +817,7 @@ fil_node_close_file( ut_ad(node && system); ut_ad(mutex_own(&(system->mutex))); ut_a(node->open); - ut_a(node->n_pending == 0); + ut_a(node->n_pending == 0 || srv_lazy_drop_table); ut_a(node->n_pending_flushes == 0); ut_a(node->modification_counter == node->flush_counter); @@ -1028,7 +1029,7 @@ fil_node_free( ut_ad(node && system && space); ut_ad(mutex_own(&(system->mutex))); ut_a(node->magic_n == FIL_NODE_MAGIC_N); - ut_a(node->n_pending == 0); + ut_a(node->n_pending == 0 || srv_lazy_drop_table); if (node->open) { /* We fool the assertion in fil_node_close_file() to think @@ -1549,6 +1550,7 @@ fil_init( fil_system = mem_zalloc(sizeof(fil_system_t)); mutex_create(&fil_system->mutex, SYNC_ANY_LATCH); + mutex_create(&fil_system->file_extend_mutex, SYNC_OUTER_ANY_LATCH); fil_system->spaces = hash_create(hash_size); fil_system->name_hash = hash_create(hash_size); @@ -2295,7 +2297,11 @@ try_again: completely and permanently. The flag is_being_deleted also prevents fil_flush() from being applied to this tablespace. */ + if (srv_lazy_drop_table) { + buf_LRU_mark_space_was_deleted(id); + } else { buf_LRU_invalidate_tablespace(id); + } #endif /* printf("Deleting tablespace %s id %lu\n", space->name, id); */ @@ -3095,8 +3101,8 @@ fil_open_single_table_tablespace( space_id = fsp_header_get_space_id(page); space_flags = fsp_header_get_flags(page); - if (srv_expand_import - && (space_id != id || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) { + if (srv_expand_import) { + ibool file_is_corrupt = FALSE; byte* buf3; byte* descr_page; @@ -3167,8 +3173,10 @@ fil_open_single_table_tablespace( if (size_bytes < free_limit_bytes) { free_limit_bytes = size_bytes; - fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath); - file_is_corrupt = TRUE; + if (size_bytes >= (ib_int64_t) (FSP_EXTENT_SIZE * UNIV_PAGE_SIZE)) { + fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath); + file_is_corrupt = TRUE; + } } /* get cruster index information */ @@ -3314,7 +3322,7 @@ skip_info: file_is_corrupt = TRUE; descr_is_corrupt = TRUE; } else { - ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES); + descr_is_corrupt = FALSE; } @@ -3778,7 +3786,7 @@ fil_load_single_table_tablespace( size = (((ib_uint64_t)size_high) << 32) + (ib_uint64_t)size_low; #ifndef UNIV_HOTBACKUP - if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { + if (size < FIL_IBD_FILE_INITIAL_SIZE * (lint)UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Error: the size of single-table tablespace" " file %s\n" @@ -3798,7 +3806,7 @@ fil_load_single_table_tablespace( /* Align the memory for file i/o if we might have O_DIRECT set */ page = ut_align(buf2, UNIV_PAGE_SIZE); - if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { + if (size >= FIL_IBD_FILE_INITIAL_SIZE * (lint)UNIV_PAGE_SIZE) { success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); /* We have to read the tablespace id from the file */ @@ -4348,6 +4356,10 @@ fil_extend_space_to_desired_size( ulint page_size; ibool success = TRUE; + /* file_extend_mutex is for http://bugs.mysql.com/56433 */ + /* to protect from the other fil_extend_space_to_desired_size() */ + /* during temprary releasing &fil_system->mutex */ + mutex_enter(&fil_system->file_extend_mutex); fil_mutex_enter_and_prepare_for_io(space_id); space = fil_space_get_by_id(space_id); @@ -4359,6 +4371,7 @@ fil_extend_space_to_desired_size( *actual_size = space->size; mutex_exit(&fil_system->mutex); + mutex_exit(&fil_system->file_extend_mutex); return(TRUE); } @@ -4391,6 +4404,8 @@ fil_extend_space_to_desired_size( offset_low = ((start_page_no - file_start_page_no) % (4096 * ((1024 * 1024) / page_size))) * page_size; + + mutex_exit(&fil_system->mutex); #ifdef UNIV_HOTBACKUP success = os_file_write(node->name, node->handle, buf, offset_low, offset_high, @@ -4400,8 +4415,10 @@ fil_extend_space_to_desired_size( node->name, node->handle, buf, offset_low, offset_high, page_size * n_pages, - NULL, NULL, NULL); + NULL, NULL, space_id, NULL); #endif + mutex_enter(&fil_system->mutex); + if (success) { node->size += n_pages; space->size += n_pages; @@ -4447,6 +4464,7 @@ fil_extend_space_to_desired_size( printf("Extended %s to %lu, actual size %lu pages\n", space->name, size_after_extend, *actual_size); */ mutex_exit(&fil_system->mutex); + mutex_exit(&fil_system->file_extend_mutex); fil_flush(space_id); @@ -4811,6 +4829,22 @@ _fil_io( srv_data_written+= len; } + /* if the table space was already deleted, space might not exist already. */ + if (message + && space_id < SRV_LOG_SPACE_FIRST_ID + && ((buf_page_t*)message)->space_was_being_deleted) { + + if (mode == OS_AIO_NORMAL) { + buf_page_io_complete(message, trx); + return(DB_SUCCESS); /*fake*/ + } + if (type == OS_FILE_READ) { + return(DB_TABLESPACE_DELETED); + } else { + return(DB_SUCCESS); /*fake*/ + } + } + /* Reserve the fil_system mutex and make sure that we can open at least one file while holding it, if the file is not already open */ @@ -4940,10 +4974,24 @@ _fil_io( #else /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset_low, offset_high, len, node, message, trx); + offset_low, offset_high, len, node, message, space_id, trx); #endif } /**/ + /* if the table space was already deleted, space might not exist already. */ + if (message + && space_id < SRV_LOG_SPACE_FIRST_ID + && ((buf_page_t*)message)->space_was_being_deleted) { + + if (mode == OS_AIO_SYNC) { + if (type == OS_FILE_READ) { + return(DB_TABLESPACE_DELETED); + } else { + return(DB_SUCCESS); /*fake*/ + } + } + } + ut_a(ret); if (mode == OS_AIO_SYNC) { @@ -4966,21 +5014,10 @@ _fil_io( Confirm whether the parameters are valid or not */ UNIV_INTERN ibool -fil_area_is_exist( +fil_is_exist( /*==============*/ ulint space_id, /*!< in: space id */ - ulint zip_size __attribute__((unused)), - /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint block_offset, /*!< in: offset in number of blocks */ - ulint byte_offset __attribute__((unused)), - /*!< in: remainder of offset in bytes; in - aio this must be divisible by the OS block - size */ - ulint len __attribute__((unused))) - /*!< in: how many bytes to read or write; this - must not cross a file boundary; in aio this - must be a block size multiple */ + ulint block_offset) /*!< in: offset in number of blocks */ { fil_space_t* space; fil_node_t* node; @@ -5054,6 +5091,7 @@ fil_aio_wait( fil_node_t* fil_node; void* message; ulint type; + ulint space_id = 0; ut_ad(fil_validate()); @@ -5061,7 +5099,7 @@ fil_aio_wait( srv_set_io_thread_op_info(segment, "native aio handle"); #ifdef WIN_ASYNC_IO ret = os_aio_windows_handle(segment, 0, &fil_node, - &message, &type); + &message, &type, &space_id); #else ret = 0; /* Eliminate compiler warning */ ut_error; @@ -5070,7 +5108,22 @@ fil_aio_wait( srv_set_io_thread_op_info(segment, "simulated aio handle"); ret = os_aio_simulated_handle(segment, &fil_node, - &message, &type); + &message, &type, &space_id); + } + + /* if the table space was already deleted, fil_node might not exist already. */ + if (message + && space_id < SRV_LOG_SPACE_FIRST_ID + && ((buf_page_t*)message)->space_was_being_deleted) { + + /* intended not to be uncompress read page */ + ut_a(buf_page_get_io_fix(message) == BUF_IO_WRITE + || !buf_page_get_zip_size(message) + || buf_page_get_state(message) != BUF_BLOCK_FILE_PAGE); + + srv_set_io_thread_op_info(segment, "complete io for buf page"); + buf_page_io_complete(message, NULL); + return; } ut_a(ret); diff --git a/storage/xtradb/fsp/fsp0fsp.c b/storage/xtradb/fsp/fsp0fsp.c index 44ebe6819b7..c8e4f8e269c 100644 --- a/storage/xtradb/fsp/fsp0fsp.c +++ b/storage/xtradb/fsp/fsp0fsp.c @@ -657,13 +657,18 @@ xdes_calc_descriptor_page( 0 for uncompressed pages */ ulint offset) /*!< in: page offset */ { -#ifndef DOXYGEN /* Doxygen gets confused of these */ -# if PAGE_ZIP_MIN_SIZE <= XDES_ARR_OFFSET \ - + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE -# error -# endif -#endif /* !DOXYGEN */ +//#ifndef DOXYGEN /* Doxygen gets confused of these */ +//# if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET +// + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE +//# error +//# endif +//# if PAGE_ZIP_MIN_SIZE <= XDES_ARR_OFFSET +// + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE +//# error +//# endif +//#endif /* !DOXYGEN */ ut_a(UNIV_PAGE_SIZE > XDES_ARR_OFFSET + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE); + ut_a(PAGE_ZIP_MIN_SIZE > XDES_ARR_OFFSET + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE); ut_ad(ut_is_2pow(zip_size)); if (!zip_size) { @@ -3473,9 +3478,9 @@ fseg_free_page( fseg_free_page_low(seg_inode, space, zip_size, page, mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG buf_page_set_file_page_was_freed(space, page); -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ } /**********************************************************************//** @@ -3542,13 +3547,13 @@ fseg_free_extent( fsp_free_extent(space, zip_size, page, mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG for (i = 0; i < FSP_EXTENT_SIZE; i++) { buf_page_set_file_page_was_freed(space, first_page_in_extent + i); } -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ } /**********************************************************************//** diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 2a6689eb38f..1daea3cd070 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -201,6 +201,25 @@ static char* internal_innobase_data_file_path = NULL; static char* innodb_version_str = (char*) INNODB_VERSION_STR; +/** Possible values for system variable "innodb_stats_method". The values +are defined the same as its corresponding MyISAM system variable +"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ +static const char* innodb_stats_method_names[] = { + "nulls_equal", + "nulls_unequal", + "nulls_ignored", + NullS +}; + +/** Used to define an enumerate type of the system variable innodb_stats_method. +This is the same as "myisam_stats_method_typelib" */ +static TYPELIB innodb_stats_method_typelib = { + array_elements(innodb_stats_method_names) - 1, + "innodb_stats_method_typelib", + innodb_stats_method_names, + NULL +}; + /* The following counter is used to convey information to InnoDB about server activity: in selects it is not sensible to call srv_active_wake_master_thread after each fetch or search, we only do @@ -5860,6 +5879,11 @@ ha_innobase::index_read( case DB_SUCCESS: error = 0; table->status = 0; +#ifdef EXTENDED_FOR_USERSTAT + rows_read++; + if (active_index < MAX_KEY) + index_rows_read[active_index]++; +#endif break; case DB_RECORD_NOT_FOUND: error = HA_ERR_KEY_NOT_FOUND; @@ -6084,7 +6108,7 @@ ha_innobase::general_fetch( table->status = 0; #ifdef EXTENDED_FOR_USERSTAT rows_read++; - if (active_index >= 0 && active_index < MAX_KEY) + if (active_index < MAX_KEY) index_rows_read[active_index]++; #endif break; @@ -7956,6 +7980,65 @@ innobase_get_mysql_key_number_for_index( return(0); } + +/*********************************************************************//** +Calculate Record Per Key value. Need to exclude the NULL value if +innodb_stats_method is set to "nulls_ignored" +@return estimated record per key value */ +static +ha_rows +innodb_rec_per_key( +/*===============*/ + dict_index_t* index, /*!< in: dict_index_t structure */ + ulint i, /*!< in: the column we are + calculating rec per key */ + ha_rows records) /*!< in: estimated total records */ +{ + ha_rows rec_per_key; + + ut_ad(i < dict_index_get_n_unique(index)); + + /* Note the stat_n_diff_key_vals[] stores the diff value with + n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */ + if (index->stat_n_diff_key_vals[i + 1] == 0) { + + rec_per_key = records; + } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) { + ib_int64_t num_null; + + /* Number of rows with NULL value in this + field */ + num_null = records - index->stat_n_non_null_key_vals[i]; + + /* In theory, index->stat_n_non_null_key_vals[i] + should always be less than the number of records. + Since this is statistics value, the value could + have slight discrepancy. But we will make sure + the number of null values is not a negative number. */ + num_null = (num_null < 0) ? 0 : num_null; + + /* If the number of NULL values is the same as or + large than that of the distinct values, we could + consider that the table consists mostly of NULL value. + Set rec_per_key to 1. */ + if (index->stat_n_diff_key_vals[i + 1] <= num_null) { + rec_per_key = 1; + } else { + /* Need to exclude rows with NULL values from + rec_per_key calculation */ + rec_per_key = (ha_rows)( + (records - num_null) + / (index->stat_n_diff_key_vals[i + 1] + - num_null)); + } + } else { + rec_per_key = (ha_rows) + (records / index->stat_n_diff_key_vals[i + 1]); + } + + return(rec_per_key); +} + /*********************************************************************//** Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ @@ -8014,6 +8097,10 @@ ha_innobase::info_low( for (index = dict_table_get_first_index(ib_table); index != NULL; index = dict_table_get_next_index(index)) { + if (dict_is_older_statistics(index)) { + row_delete_stats_for_mysql(index, prebuilt->trx); + innobase_commit_low(prebuilt->trx); + } row_insert_stats_for_mysql(index, prebuilt->trx); innobase_commit_low(prebuilt->trx); } @@ -8206,13 +8293,8 @@ ha_innobase::info_low( break; } - if (index->stat_n_diff_key_vals[j + 1] == 0) { - - rec_per_key = stats.records; - } else { - rec_per_key = (ha_rows)(stats.records / - index->stat_n_diff_key_vals[j + 1]); - } + rec_per_key = innodb_rec_per_key( + index, j, stats.records); /* Since MySQL seems to favor table scans too much over index searches, we pretend @@ -11368,25 +11450,6 @@ static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_sample_pages, "The number of index pages to sample when calculating statistics (default 8)", NULL, NULL, 8, 1, ~0ULL, 0); -const char *innobase_stats_method_names[]= -{ - "nulls_equal", - "nulls_unequal", - "nulls_ignored", - NullS -}; -TYPELIB innobase_stats_method_typelib= -{ - array_elements(innobase_stats_method_names) - 1, "innobase_stats_method_typelib", - innobase_stats_method_names, NULL -}; -static MYSQL_SYSVAR_ENUM(stats_method, srv_stats_method, - PLUGIN_VAR_RQCMDARG, - "Specifies how InnoDB index statistics collection code should threat NULLs. " - "Possible values of name are same to for 'myisam_stats_method'. " - "This is startup parameter.", - NULL, NULL, 0, &innobase_stats_method_typelib); - static MYSQL_SYSVAR_ULONG(stats_auto_update, srv_stats_auto_update, PLUGIN_VAR_RQCMDARG, "Enable/Disable InnoDB's auto update statistics of indexes. " @@ -11575,6 +11638,13 @@ static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, innodb_change_buffering_validate, innodb_change_buffering_update, "inserts"); +static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, + PLUGIN_VAR_RQCMDARG, + "Specifies how InnoDB index statistics collection code should " + "treat NULLs. Possible values are NULLS_EQUAL (default), " + "NULLS_UNEQUAL and NULLS_IGNORED", + NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib); + #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, PLUGIN_VAR_RQCMDARG, @@ -11713,6 +11783,12 @@ static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table, "except for the deletion.", NULL, NULL, 0, 0, 1, 0); +static MYSQL_SYSVAR_ULONG(lazy_drop_table, srv_lazy_drop_table, + PLUGIN_VAR_RQCMDARG, + "At deleting tablespace, only miminum needed processes at the time are done. " + "e.g. for http://bugs.mysql.com/51325", + NULL, NULL, 0, 0, 1, 0); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(page_size), MYSQL_SYSVAR(log_block_size), @@ -11762,12 +11838,12 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(overwrite_relay_log_info), MYSQL_SYSVAR(rollback_on_timeout), MYSQL_SYSVAR(stats_on_metadata), - MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(stats_auto_update), MYSQL_SYSVAR(stats_update_need_lock), MYSQL_SYSVAR(use_sys_stats_table), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), + MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(replication_delay), MYSQL_SYSVAR(status_file), MYSQL_SYSVAR(strict_mode), @@ -11804,6 +11880,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(auto_lru_dump), MYSQL_SYSVAR(use_purge_thread), MYSQL_SYSVAR(pass_corrupt_table), + MYSQL_SYSVAR(lazy_drop_table), NULL }; diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index ca22375baba..11b4a4b4d60 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -3280,6 +3280,14 @@ static ST_FIELD_INFO i_s_innodb_sys_stats_info[] = STRUCT_FLD(old_name, ""), STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + {STRUCT_FLD(field_name, "NON_NULL_VALS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + END_OF_ST_FIELD_INFO }; @@ -3548,6 +3556,9 @@ copy_sys_stats_rec( { int status; int field; + ulint n_fields; + + n_fields = rec_get_n_fields_old(rec); /* INDEX_ID */ field = dict_index_get_nth_col_pos(index, 0); @@ -3567,6 +3578,16 @@ copy_sys_stats_rec( if (status) { return status; } + /* NON_NULL_VALS */ + if (n_fields < 6) { + table->field[3]->set_null(); + } else { + field = dict_index_get_nth_col_pos(index, 3); + status = copy_id_field(table, 3, rec, field); + if (status) { + return status; + } + } return 0; } diff --git a/storage/xtradb/ibuf/ibuf0ibuf.c b/storage/xtradb/ibuf/ibuf0ibuf.c index 12dbc29be23..3f741da60bb 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.c +++ b/storage/xtradb/ibuf/ibuf0ibuf.c @@ -1881,9 +1881,9 @@ ibuf_remove_free_page(void) fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, IBUF_SPACE_ID, page_no, &mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no); -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ ibuf_enter(); @@ -1925,9 +1925,9 @@ ibuf_remove_free_page(void) ibuf_bitmap_page_set_bits( bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no); -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ mtr_commit(&mtr); mutex_exit(&ibuf_mutex); diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index b477ad0320a..ece3621fa97 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -478,7 +478,10 @@ btr_estimate_n_rows_in_range( /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( @@ -509,8 +512,8 @@ file segment of the index tree. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint -btr_store_big_rec_extern_fields( -/*============================*/ +btr_store_big_rec_extern_fields_func( +/*=================================*/ dict_index_t* index, /*!< in: index of rec; the index tree MUST be X-latched */ buf_block_t* rec_block, /*!< in/out: block containing rec */ @@ -519,10 +522,42 @@ btr_store_big_rec_extern_fields( the "external storage" flags in offsets will not correspond to rec when this function returns */ - big_rec_t* big_rec_vec, /*!< in: vector containing fields +#ifdef UNIV_DEBUG + mtr_t* local_mtr, /*!< in: mtr containing the + latch to rec and to the tree */ +#endif /* UNIV_DEBUG */ +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ibool update_in_place,/*! in: TRUE if the record is updated + in place (not delete+insert) */ +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + const big_rec_t*big_rec_vec) /*!< in: vector containing fields to be stored externally */ - mtr_t* local_mtr); /*!< in: mtr containing the latch to - rec and to the tree */ + __attribute__((nonnull)); + +/** Stores the fields in big_rec_vec to the tablespace and puts pointers to +them in rec. The extern flags in rec will have to be set beforehand. +The fields are stored on pages allocated from leaf node +file segment of the index tree. +@param index in: clustered index; MUST be X-latched by mtr +@param b in/out: block containing rec; MUST be X-latched by mtr +@param rec in/out: clustered index record +@param offsets in: rec_get_offsets(rec, index); + the "external storage" flags in offsets will not be adjusted +@param mtr in: mini-transaction that holds x-latch on index and b +@param upd in: TRUE if the record is updated in place (not delete+insert) +@param big in: vector containing fields to be stored externally +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +#ifdef UNIV_DEBUG +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big) +#elif defined UNIV_BLOB_LIGHT_DEBUG +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big) +#else +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big) +#endif + /*******************************************************************//** Frees the space in an externally stored field to the file space management if the field in data is owned the externally stored field, diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index e06927f42f0..bc0e9170281 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -166,10 +166,8 @@ Allocates a buffer block. @return own: the allocated block, in state BUF_BLOCK_MEMORY */ UNIV_INLINE buf_block_t* -buf_block_alloc( -/*============*/ - ulint zip_size); /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_block_alloc(void); +/*=================*/ /********************************************************************//** Frees a buffer block which does not contain a file page. */ UNIV_INLINE @@ -371,7 +369,7 @@ buf_reset_check_index_page_at_flush( /*================================*/ ulint space, /*!< in: space id */ ulint offset);/*!< in: page number */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG /********************************************************************//** Sets file_page_was_freed TRUE if the page is found in the buffer pool. This function should be called when we free a file page and want the @@ -396,7 +394,7 @@ buf_page_reset_file_page_was_freed( /*===============================*/ ulint space, /*!< in: space id */ ulint offset); /*!< in: page number */ -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ /********************************************************************//** Reads the freed_page_clock of a buffer block. @return freed_page_clock */ @@ -1157,12 +1155,13 @@ struct buf_page_struct{ 0 if the block was never accessed in the buffer pool */ /* @} */ + ibool space_was_being_deleted; ibool is_corrupt; -# ifdef UNIV_DEBUG_FILE_ACCESSES +# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ibool file_page_was_freed; /*!< this is set to TRUE when fsp frees a page in buffer pool */ -# endif /* UNIV_DEBUG_FILE_ACCESSES */ +# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ #endif /* !UNIV_HOTBACKUP */ }; diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index 93cc68e7fc9..2cb0d8ef497 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -384,6 +384,7 @@ buf_block_set_file_page( buf_block_set_state(block, BUF_BLOCK_FILE_PAGE); block->page.space = space; block->page.offset = page_no; + block->page.space_was_being_deleted = FALSE; } /*********************************************************************//** @@ -757,14 +758,12 @@ Allocates a buffer block. @return own: the allocated block, in state BUF_BLOCK_MEMORY */ UNIV_INLINE buf_block_t* -buf_block_alloc( -/*============*/ - ulint zip_size) /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_block_alloc(void) +/*=================*/ { buf_block_t* block; - block = buf_LRU_get_free_block(zip_size); + block = buf_LRU_get_free_block(); buf_block_set_state(block, BUF_BLOCK_MEMORY); diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h index d3b59e8b579..fe7c067dfb7 100644 --- a/storage/xtradb/include/buf0lru.h +++ b/storage/xtradb/include/buf0lru.h @@ -84,6 +84,13 @@ void buf_LRU_invalidate_tablespace( /*==========================*/ ulint id); /*!< in: space id */ +/******************************************************************//** +*/ +UNIV_INTERN +void +buf_LRU_mark_space_was_deleted( +/*===========================*/ + ulint id); /*!< in: space id */ /********************************************************************//** Insert a compressed block into buf_pool->zip_clean in the LRU order. */ UNIV_INTERN @@ -112,11 +119,8 @@ buf_LRU_free_block( buf_page_t* bpage, /*!< in: block to be freed */ ibool zip, /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released, - /*!< in: pointer to a variable that will - be assigned TRUE if buf_pool_mutex - was temporarily released, or NULL */ - ibool have_LRU_mutex); + ibool have_LRU_mutex) + __attribute__((nonnull)); /******************************************************************//** Try to free a replaceable block. @return TRUE if found and freed */ @@ -147,10 +151,9 @@ LRU list to the free list. @return the free control block, in state BUF_BLOCK_READY_FOR_USE */ UNIV_INTERN buf_block_t* -buf_LRU_get_free_block( -/*===================*/ - ulint zip_size); /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_LRU_get_free_block(void) +/*========================*/ + __attribute__((warn_unused_result)); /******************************************************************//** Puts a block back to the free list. */ diff --git a/storage/xtradb/include/buf0types.h b/storage/xtradb/include/buf0types.h index 507f1543bbb..ce3e5ecc9c5 100644 --- a/storage/xtradb/include/buf0types.h +++ b/storage/xtradb/include/buf0types.h @@ -76,7 +76,7 @@ enum buf_io_fix { /** twice the maximum block size of the buddy system; the underlying memory is aligned by this amount: this must be equal to UNIV_PAGE_SIZE */ -#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES) +#define BUF_BUDDY_HIGH ((ulint)BUF_BUDDY_LOW << BUF_BUDDY_SIZES) /* @} */ #endif diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h index 9239e031a7f..a57c5127323 100644 --- a/storage/xtradb/include/dict0boot.h +++ b/storage/xtradb/include/dict0boot.h @@ -146,6 +146,7 @@ clustered index */ #define DICT_SYS_INDEXES_NAME_FIELD 4 #define DICT_SYS_STATS_DIFF_VALS_FIELD 4 +#define DICT_SYS_STATS_NON_NULL_VALS_FIELD 5 /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 7baacdd6055..2baecdc958a 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -1062,6 +1062,13 @@ dict_update_statistics( not been initialized yet, otherwise do nothing */ ibool sync); +/*********************************************************************//** +*/ +UNIV_INTERN +ibool +dict_is_older_statistics( +/*=====================*/ + dict_index_t* index); /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index 6736c2a3a36..f47293bedf6 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -321,6 +321,12 @@ struct dict_index_struct{ dict_get_n_unique(index); we periodically calculate new estimates */ + ib_int64_t* stat_n_non_null_key_vals; + /* approximate number of non-null key values + for this index, for each column where + n < dict_get_n_unique(index); This + is used when innodb_stats_method is + "nulls_ignored". */ ulint stat_index_size; /*!< approximate index size in database pages */ diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h index 7ad69193cc9..f14b59a19d4 100644 --- a/storage/xtradb/include/dict0types.h +++ b/storage/xtradb/include/dict0types.h @@ -33,11 +33,6 @@ typedef struct dict_index_struct dict_index_t; typedef struct dict_table_struct dict_table_t; typedef struct dict_foreign_struct dict_foreign_t; -/* A cluster object is a table object with the type field set to -DICT_CLUSTERED */ - -typedef dict_table_t dict_cluster_t; - typedef struct ind_node_struct ind_node_t; typedef struct tab_node_struct tab_node_t; diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index a262ec8f9cc..fbf8ca20db3 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -648,18 +648,10 @@ _fil_io( Confirm whether the parameters are valid or not */ UNIV_INTERN ibool -fil_area_is_exist( +fil_is_exist( /*==============*/ ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint block_offset, /*!< in: offset in number of blocks */ - ulint byte_offset, /*!< in: remainder of offset in bytes; in - aio this must be divisible by the OS block - size */ - ulint len); /*!< in: how many bytes to read or write; this - must not cross a file boundary; in aio this - must be a block size multiple */ + ulint block_offset); /*!< in: offset in number of blocks */ /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the handler for completed requests. The aio array of pending requests is divided diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index cbbec2cf55e..732e930517b 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -657,6 +657,7 @@ os_aio( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + ulint space_id, trx_t* trx); /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in @@ -717,7 +718,8 @@ os_aio_windows_handle( parameters are valid and can be used to restart the operation, for example */ void** message2, - ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* space_id); #endif /**********************************************************************//** @@ -739,7 +741,8 @@ os_aio_simulated_handle( parameters are valid and can be used to restart the operation, for example */ void** message2, - ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* space_id); /**********************************************************************//** Validates the consistency of the aio system. @return TRUE if ok */ diff --git a/storage/xtradb/include/page0cur.h b/storage/xtradb/include/page0cur.h index 6b444b3dd96..1544b0abe1c 100644 --- a/storage/xtradb/include/page0cur.h +++ b/storage/xtradb/include/page0cur.h @@ -293,22 +293,6 @@ page_cur_open_on_rnd_user_rec( /*==========================*/ buf_block_t* block, /*!< in: page */ page_cur_t* cursor);/*!< out: page cursor */ - -UNIV_INTERN -void -page_cur_open_on_nth_user_rec( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor, /*!< out: page cursor */ - ulint nth); - -UNIV_INTERN -ibool -page_cur_open_on_rnd_user_rec_after_nth( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor, /*!< out: page cursor */ - ulint nth); #endif /* !UNIV_HOTBACKUP */ /***********************************************************//** Parses a log record of a record insert on a page. diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h index fcea62ad486..a908521c9f7 100644 --- a/storage/xtradb/include/rem0cmp.h +++ b/storage/xtradb/include/rem0cmp.h @@ -165,15 +165,18 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current comparison */ - ulint* matched_bytes, /*!< in/out: number of already matched + ulint* matched_bytes);/*!< in/out: number of already matched bytes within the first field not completely matched; when the function returns, contains the value for the current comparison */ - ulint stats_method); /*************************************************************//** This function is used to compare two physical records. Only the common first fields are compared. diff --git a/storage/xtradb/include/rem0cmp.ic b/storage/xtradb/include/rem0cmp.ic index d5185ec94af..63415fe7837 100644 --- a/storage/xtradb/include/rem0cmp.ic +++ b/storage/xtradb/include/rem0cmp.ic @@ -87,5 +87,5 @@ cmp_rec_rec( ulint match_b = 0; return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - &match_f, &match_b, 0)); + FALSE, &match_f, &match_b)); } diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index 141f4beb81e..4acfd2e793b 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -384,6 +384,14 @@ row_insert_stats_for_mysql( dict_index_t* index, trx_t* trx); /*********************************************************************//** +*/ +UNIV_INTERN +int +row_delete_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx); +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. diff --git a/storage/xtradb/include/row0upd.h b/storage/xtradb/include/row0upd.h index b61e6b6dca1..97b7ec49a17 100644 --- a/storage/xtradb/include/row0upd.h +++ b/storage/xtradb/include/row0upd.h @@ -280,19 +280,29 @@ NOTE: we compare the fields as binary strings! @return TRUE if update vector changes an ordering field in the index record */ UNIV_INTERN ibool -row_upd_changes_ord_field_binary( -/*=============================*/ +row_upd_changes_ord_field_binary_func( +/*==================================*/ + dict_index_t* index, /*!< in: index of the record */ + const upd_t* update, /*!< in: update vector for the row; NOTE: the + field numbers in this MUST be clustered index + positions! */ +#ifdef UNIV_DEBUG + const que_thr_t*thr, /*!< in: query thread */ +#endif /* UNIV_DEBUG */ const dtuple_t* row, /*!< in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at compile time */ - const row_ext_t*ext, /*!< NULL, or prefixes of the externally + const row_ext_t*ext) /*!< NULL, or prefixes of the externally stored columns in the old row */ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update) /*!< in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ - __attribute__((nonnull(3,4), warn_unused_result)); + __attribute__((nonnull(1,2), warn_unused_result)); +#ifdef UNIV_DEBUG +# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \ + row_upd_changes_ord_field_binary_func(index,update,thr,row,ext) +#else /* UNIV_DEBUG */ +# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \ + row_upd_changes_ord_field_binary_func(index,update,row,ext) +#endif /* UNIV_DEBUG */ /***********************************************************//** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index f4c9704741c..d4329d16a62 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -177,6 +177,11 @@ capacity. PCT_IO(5) -> returns the number of IO operations that is 5% of the max where max is srv_io_capacity. */ #define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0))) +/* The "innodb_stats_method" setting, decides how InnoDB is going +to treat NULL value when collecting statistics. It is not defined +as enum type because the configure option takes unsigned integer type. */ +extern ulong srv_innodb_stats_method; + #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; extern ibool srv_archive_recovery; @@ -208,10 +213,6 @@ extern ulint srv_fast_shutdown; /* If this is 1, do not do a extern ibool srv_innodb_status; extern unsigned long long srv_stats_sample_pages; -extern ulong srv_stats_method; -#define SRV_STATS_METHOD_NULLS_EQUAL 0 -#define SRV_STATS_METHOD_NULLS_NOT_EQUAL 1 -#define SRV_STATS_METHOD_IGNORE_NULLS 2 extern ulong srv_stats_auto_update; extern ulint srv_stats_update_need_lock; extern ibool srv_use_sys_stats_table; @@ -242,6 +243,8 @@ extern ulint srv_pass_corrupt_table; extern ulong srv_extra_rsegments; extern ulong srv_dict_size_limit; + +extern ulint srv_lazy_drop_table; /*-------------------------------------------*/ extern ulint srv_n_rows_inserted; @@ -413,6 +416,19 @@ enum { in connection with recovery */ }; +/* Alternatives for srv_innodb_stats_method, which could be changed by +setting innodb_stats_method */ +enum srv_stats_method_name_enum { + SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as + equal. This is the default setting + for innodb_stats_method */ + SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as + NOT equal. */ + SRV_STATS_NULLS_IGNORED /* NULL values are ignored */ +}; + +typedef enum srv_stats_method_name_enum srv_stats_method_name_t; + #ifndef UNIV_HOTBACKUP /** Types of threads existing in the system. */ enum srv_thread_type { diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h index 4edf93f4042..22de1bfdd93 100644 --- a/storage/xtradb/include/sync0rw.h +++ b/storage/xtradb/include/sync0rw.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -113,14 +113,14 @@ is necessary only if the memory block containing it is freed. */ #ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG # define rw_lock_create(L, level) \ - rw_lock_create_func((L), (level), #L, __FILE__, __LINE__) + rw_lock_create_func((L), (level), __FILE__, __LINE__, #L) # else /* UNIV_SYNC_DEBUG */ # define rw_lock_create(L, level) \ - rw_lock_create_func((L), #L, __FILE__, __LINE__) + rw_lock_create_func((L), __FILE__, __LINE__, #L) # endif /* UNIV_SYNC_DEBUG */ #else /* UNIV_DEBUG */ # define rw_lock_create(L, level) \ - rw_lock_create_func((L), #L, NULL, 0) + rw_lock_create_func((L), #L) #endif /* UNIV_DEBUG */ /******************************************************************//** @@ -137,10 +137,10 @@ rw_lock_create_func( # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ const char* cfile_name, /*!< in: file name where created */ - ulint cline); /*!< in: file line where created */ + ulint cline, /*!< in: file line where created */ +#endif /* UNIV_DEBUG */ + const char* cmutex_name); /*!< in: mutex name */ /******************************************************************//** Calling this function is obligatory only if the memory buffer containing the rw-lock is freed. Removes an rw-lock object from the global list. The @@ -490,6 +490,7 @@ UNIV_INTERN void rw_lock_debug_print( /*================*/ + FILE* f, /*!< in: output stream */ rw_lock_debug_t* info); /*!< in: debug struct */ #endif /* UNIV_SYNC_DEBUG */ diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index a500cf1da45..f2ff83101ab 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -73,14 +73,14 @@ necessary only if the memory block containing it is freed. */ #ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG # define mutex_create(M, level) \ - mutex_create_func((M), #M, (level), __FILE__, __LINE__) + mutex_create_func((M), (level), __FILE__, __LINE__, #M) # else # define mutex_create(M, level) \ - mutex_create_func((M), #M, __FILE__, __LINE__) + mutex_create_func((M), __FILE__, __LINE__, #M) # endif #else # define mutex_create(M, level) \ - mutex_create_func((M), #M, NULL, 0) + mutex_create_func((M), #M) #endif /******************************************************************//** @@ -93,14 +93,14 @@ void mutex_create_func( /*==============*/ mutex_t* mutex, /*!< in: pointer to memory */ - const char* cmutex_name, /*!< in: mutex name */ #ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ const char* cfile_name, /*!< in: file name where created */ - ulint cline); /*!< in: file line where created */ + ulint cline, /*!< in: file line where created */ +#endif /* UNIV_DEBUG */ + const char* cmutex_name); /*!< in: mutex name */ #undef mutex_free /* Fix for MacOS X */ @@ -496,6 +496,7 @@ or row lock! */ #define SYNC_BUF_POOL 150 #define SYNC_BUF_FLUSH_LIST 149 #define SYNC_DOUBLEWRITE 140 +#define SYNC_OUTER_ANY_LATCH 136 #define SYNC_ANY_LATCH 135 #define SYNC_THR_LOCAL 133 #define SYNC_MEM_HASH 131 diff --git a/storage/xtradb/include/trx0rseg.h b/storage/xtradb/include/trx0rseg.h index 303188f09f2..ea9956cd143 100644 --- a/storage/xtradb/include/trx0rseg.h +++ b/storage/xtradb/include/trx0rseg.h @@ -149,9 +149,7 @@ struct trx_rseg_struct{ ulint id; /*!< rollback segment id == the index of its slot in the trx system file copy */ mutex_t mutex; /*!< mutex protecting the fields in this - struct except id; NOTE that the latching - order must always be kernel mutex -> - rseg mutex */ + struct except id, which is constant */ ulint space; /*!< space where the rollback segment is header is placed */ ulint zip_size;/* compressed page size of space diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index 4c0ce392bcd..173c63918d3 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -214,12 +214,12 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL */ +@return trx or NULL; on match, the trx->xid will be invalidated */ UNIV_INTERN trx_t * trx_get_trx_by_xid( /*===============*/ - XID* xid); /*!< in: X/Open XA transaction identification */ + const XID* xid); /*!< in: X/Open XA transaction identifier */ /**********************************************************************//** If required, flushes the log to disk if we called trx_commit_for_mysql() with trx->flush_log_later == TRUE. diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 111bda1a3c5..7b11a16dae9 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -47,7 +47,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 #define INNODB_VERSION_BUGFIX 15 -#define PERCONA_INNODB_VERSION 12.5 +#define PERCONA_INNODB_VERSION 12.7 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -180,14 +180,15 @@ command. Not tested on Windows. */ debugging without UNIV_DEBUG */ #define UNIV_BUF_DEBUG /* Enable buffer pool debugging without UNIV_DEBUG */ +#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column + debugging without UNIV_DEBUG */ #define UNIV_DEBUG /* Enable ut_ad() assertions and disable UNIV_INLINE */ #define UNIV_DEBUG_LOCK_VALIDATE /* Enable ut_ad(lock_rec_validate_page()) assertions. */ -#define UNIV_DEBUG_FILE_ACCESSES /* Debug .ibd file access - (field file_page_was_freed - in buf_page_t) */ +#define UNIV_DEBUG_FILE_ACCESSES /* Enable freed block access + debugging without UNIV_DEBUG */ #define UNIV_LRU_DEBUG /* debug the buffer pool LRU */ #define UNIV_HASH_DEBUG /* debug HASH_ macros */ #define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */ @@ -433,7 +434,7 @@ it is read or written. */ /* Use sun_prefetch when compile with Sun Studio */ # define UNIV_EXPECT(expr,value) (expr) # define UNIV_LIKELY_NULL(expr) (expr) -# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr) +# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr) # define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr) #else /* Dummy versions of the macros */ diff --git a/storage/xtradb/mem/mem0mem.c b/storage/xtradb/mem/mem0mem.c index 1dd4db30841..86100b04fd6 100644 --- a/storage/xtradb/mem/mem0mem.c +++ b/storage/xtradb/mem/mem0mem.c @@ -347,7 +347,7 @@ mem_heap_create_block( return(NULL); } } else { - buf_block = buf_block_alloc(0); + buf_block = buf_block_alloc(); } block = (mem_block_t*) buf_block->frame; diff --git a/storage/xtradb/mtr/mtr0log.c b/storage/xtradb/mtr/mtr0log.c index d22015a575f..3349036b5b3 100644 --- a/storage/xtradb/mtr/mtr0log.c +++ b/storage/xtradb/mtr/mtr0log.c @@ -408,7 +408,7 @@ mlog_parse_string( ptr += 2; if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) - || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) { + || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) { recv_sys->found_corrupt_log = TRUE; return(NULL); diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index 7d47203e992..31dec031af3 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -142,6 +142,7 @@ struct os_aio_slot_struct{ // made and only the slot message // needs to be passed to the caller // of os_aio_simulated_handle */ + ulint space_id; fil_node_t* message1; /*!< message which is given by the */ void* message2; /*!< the requester of an aio operation and which can be used to identify @@ -3390,7 +3391,8 @@ os_aio_array_reserve_slot( offset */ ulint offset_high, /*!< in: most significant 32 bits of offset */ - ulint len) /*!< in: length of the block to read or write */ + ulint len, /*!< in: length of the block to read or write */ + ulint space_id) { os_aio_slot_t* slot; ulint i; @@ -3472,6 +3474,7 @@ found: slot->offset_high = offset_high; // slot->io_already_done = FALSE; slot->status = OS_AIO_NOT_ISSUED; + slot->space_id = space_id; #ifdef WIN_ASYNC_IO control = &(slot->control); @@ -3680,6 +3683,7 @@ os_aio( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + ulint space_id, trx_t* trx) { os_aio_array_t* array; @@ -3762,7 +3766,7 @@ try_again: trx->io_read += n; } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, - name, buf, offset, offset_high, n); + name, buf, offset, offset_high, n, space_id); if (type == OS_FILE_READ) { if (os_aio_use_native_aio) { #ifdef WIN_ASYNC_IO @@ -3872,7 +3876,8 @@ os_aio_windows_handle( parameters are valid and can be used to restart the operation, for example */ void** message2, - ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* space_id) { ulint orig_seg = segment; os_aio_array_t* array; @@ -3926,6 +3931,7 @@ os_aio_windows_handle( *message2 = slot->message2; *type = slot->type; + *space_id = slot->space_id; if (ret && len == slot->len) { ret_val = TRUE; @@ -4009,7 +4015,8 @@ os_aio_simulated_handle( parameters are valid and can be used to restart the operation, for example */ void** message2, - ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ + ulint* space_id) { os_aio_array_t* array; ulint segment; @@ -4300,6 +4307,7 @@ slot_io_done: *message2 = slot->message2; *type = slot->type; + *space_id = slot->space_id; os_mutex_exit(array->mutex); diff --git a/storage/xtradb/page/page0cur.c b/storage/xtradb/page/page0cur.c index fa3d2532deb..f10f16a7dd9 100644 --- a/storage/xtradb/page/page0cur.c +++ b/storage/xtradb/page/page0cur.c @@ -564,74 +564,6 @@ page_cur_open_on_rnd_user_rec( } while (rnd--); } -UNIV_INTERN -void -page_cur_open_on_nth_user_rec( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor, /*!< out: page cursor */ - ulint nth) -{ - ulint n_recs = page_get_n_recs(buf_block_get_frame(block)); - - page_cur_set_before_first(block, cursor); - - if (UNIV_UNLIKELY(n_recs == 0)) { - - return; - } - - nth--; - - if (nth >= n_recs) { - nth = n_recs - 1; - } - - do { - page_cur_move_to_next(cursor); - } while (nth--); -} - -UNIV_INTERN -ibool -page_cur_open_on_rnd_user_rec_after_nth( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor, /*!< out: page cursor */ - ulint nth) -{ - ulint rnd; - ulint n_recs = page_get_n_recs(buf_block_get_frame(block)); - ibool ret; - - page_cur_set_before_first(block, cursor); - - if (UNIV_UNLIKELY(n_recs == 0)) { - - return (FALSE); - } - - nth--; - - if (nth >= n_recs) { - nth = n_recs - 1; - } - - rnd = (ulint) (nth + page_cur_lcg_prng() % (n_recs - nth)); - - if (rnd == nth) { - ret = TRUE; - } else { - ret = FALSE; - } - - do { - page_cur_move_to_next(cursor); - } while (rnd--); - - return (ret); -} - /***********************************************************//** Writes the log record of a record insert on a page. */ static diff --git a/storage/xtradb/page/page0zip.c b/storage/xtradb/page/page0zip.c index a94d2d54417..5b4f5d3b76a 100644 --- a/storage/xtradb/page/page0zip.c +++ b/storage/xtradb/page/page0zip.c @@ -4443,7 +4443,7 @@ page_zip_reorganize( log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); #ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(0); + temp_block = buf_block_alloc(); btr_search_drop_page_hash_index(block); block->check_index_page_at_flush = TRUE; #else /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/rem/rem0cmp.c b/storage/xtradb/rem/rem0cmp.c index 8ee434f85da..04d2c15437b 100644 --- a/storage/xtradb/rem/rem0cmp.c +++ b/storage/xtradb/rem/rem0cmp.c @@ -862,15 +862,18 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current comparison */ - ulint* matched_bytes, /*!< in/out: number of already matched + ulint* matched_bytes) /*!< in/out: number of already matched bytes within the first field not completely matched; when the function returns, contains the value for the current comparison */ - ulint stats_method) { ulint rec1_n_fields; /* the number of fields in rec */ ulint rec1_f_len; /* length of current field in rec */ @@ -962,13 +965,13 @@ cmp_rec_rec_with_match( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { - - if (stats_method == SRV_STATS_METHOD_NULLS_EQUAL) { - goto next_field; - } else { + /* This is limited to stats collection, + cannot use it for regular search */ + if (nulls_unequal) { ret = -1; + } else { + goto next_field; } - } else if (rec2_f_len == UNIV_SQL_NULL) { /* We define the SQL null to be the diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index 3372e1480b5..efcea62f212 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -2026,6 +2026,8 @@ row_ins_index_entry_low( } #ifdef UNIV_DEBUG + if (!srv_use_sys_stats_table + || index != UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { page_t* page = btr_cur_get_page(&cursor); rec_t* first_rec = page_rec_get_next( @@ -2136,7 +2138,7 @@ function_exit: err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(&cursor), - rec, offsets, big_rec, &mtr); + rec, offsets, &mtr, FALSE, big_rec); if (modify) { dtuple_big_rec_free(big_rec); diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c index 205aead8efe..1b97cbb0009 100644 --- a/storage/xtradb/row/row0mysql.c +++ b/storage/xtradb/row/row0mysql.c @@ -52,6 +52,7 @@ Created 9/17/2000 Heikki Tuuri #include "btr0sea.h" #include "fil0fil.h" #include "ibuf0ibuf.h" +#include "ha_prototypes.h" /** Provide optional 4.x backwards compatibility for 5.0 and above */ UNIV_INTERN ibool row_rollback_on_timeout = FALSE; @@ -2096,6 +2097,32 @@ row_insert_stats_for_mysql( } /*********************************************************************//** +*/ +UNIV_INTERN +int +row_delete_stats_for_mysql( +/*=============================*/ + dict_index_t* index, + trx_t* trx) +{ + pars_info_t* info = pars_info_create(); + + trx->op_info = "delete rows from SYS_STATS"; + + trx_start_if_not_started(trx); + trx->error_state = DB_SUCCESS; + + pars_info_add_dulint_literal(info, "indexid", index->id); + + return((int) que_eval_sql(info, + "PROCEDURE DELETE_STATISTICS_PROC () IS\n" + "BEGIN\n" + "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n" + "END;\n" + , TRUE, trx)); +} + +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. diff --git a/storage/xtradb/row/row0purge.c b/storage/xtradb/row/row0purge.c index 8bf2ae0f458..752a2ec9e83 100644 --- a/storage/xtradb/row/row0purge.c +++ b/storage/xtradb/row/row0purge.c @@ -387,8 +387,11 @@ Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void -row_purge_upd_exist_or_extern( -/*==========================*/ +row_purge_upd_exist_or_extern_func( +/*===============================*/ +#ifdef UNIV_DEBUG + const que_thr_t*thr, /*!< in: query thread */ +#endif /* UNIV_DEBUG */ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; @@ -413,8 +416,8 @@ row_purge_upd_exist_or_extern( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary(NULL, NULL, node->index, - node->update)) { + if (row_upd_changes_ord_field_binary(node->index, node->update, + thr, NULL, NULL)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); @@ -496,6 +499,14 @@ skip_secondaries: } } +#ifdef UNIV_DEBUG +# define row_purge_upd_exist_or_extern(thr,node) \ + row_purge_upd_exist_or_extern_func(thr,node) +#else /* UNIV_DEBUG */ +# define row_purge_upd_exist_or_extern(thr,node) \ + row_purge_upd_exist_or_extern_func(node) +#endif /* UNIV_DEBUG */ + /***********************************************************//** Parses the row reference and other info in a modify undo log record. @return TRUE if purge operation required: NOTE that then the CALLER @@ -602,47 +613,32 @@ err_exit: /***********************************************************//** Fetches an undo log record and does the purge for the recorded operation. If none left, or the current purge completed, returns the control to the -parent node, which is always a query thread node. -@return DB_SUCCESS if operation successfully completed, else error code */ -static -ulint +parent node, which is always a query thread node. */ +static __attribute__((nonnull)) +void row_purge( /*======*/ purge_node_t* node, /*!< in: row purge node */ que_thr_t* thr) /*!< in: query thread */ { - roll_ptr_t roll_ptr; - ibool purge_needed; ibool updated_extern; - trx_t* trx; - ut_ad(node && thr); - - trx = thr_get_trx(thr); + ut_ad(node); + ut_ad(thr); - node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr, - &(node->reservation), + node->undo_rec = trx_purge_fetch_next_rec(&node->roll_ptr, + &node->reservation, node->heap); if (!node->undo_rec) { /* Purge completed for this query thread */ thr->run_node = que_node_get_parent(node); - return(DB_SUCCESS); - } - - node->roll_ptr = roll_ptr; - - if (node->undo_rec == &trx_purge_dummy_rec) { - purge_needed = FALSE; - } else { - purge_needed = row_purge_parse_undo_rec(node, &updated_extern, - thr); - /* If purge_needed == TRUE, we must also remember to unfreeze - data dictionary! */ + return; } - if (purge_needed) { + if (node->undo_rec != &trx_purge_dummy_rec + && row_purge_parse_undo_rec(node, &updated_extern, thr)) { node->found_clust = FALSE; node->index = dict_table_get_next_index( @@ -654,14 +650,14 @@ row_purge( } else if (updated_extern || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { - row_purge_upd_exist_or_extern(node); + row_purge_upd_exist_or_extern(thr, node); } if (node->found_clust) { btr_pcur_close(&(node->pcur)); } - row_mysql_unfreeze_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary(thr_get_trx(thr)); } /* Do some cleanup */ @@ -669,8 +665,6 @@ row_purge( mem_heap_empty(node->heap); thr->run_node = node; - - return(DB_SUCCESS); } /***********************************************************//** @@ -684,9 +678,6 @@ row_purge_step( que_thr_t* thr) /*!< in: query thread */ { purge_node_t* node; -#ifdef UNIV_DEBUG - ulint err; -#endif /* UNIV_DEBUG */ ut_ad(thr); @@ -694,12 +685,7 @@ row_purge_step( ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); -#ifdef UNIV_DEBUG - err = -#endif /* UNIV_DEBUG */ row_purge(node, thr); - ut_ad(err == DB_SUCCESS); - return(thr); } diff --git a/storage/xtradb/row/row0row.c b/storage/xtradb/row/row0row.c index 8e806a14a98..0783d482f76 100644 --- a/storage/xtradb/row/row0row.c +++ b/storage/xtradb/row/row0row.c @@ -347,6 +347,14 @@ row_rec_to_index_entry_low( rec_len = rec_offs_n_fields(offsets); + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { + if (rec_len < dict_index_get_n_fields(index)) { + /* the new record should be extended */ + rec_len = dict_index_get_n_fields(index); + } + } + entry = dtuple_create(heap, rec_len); dtuple_set_n_fields_cmp(entry, @@ -358,6 +366,14 @@ row_rec_to_index_entry_low( for (i = 0; i < rec_len; i++) { dfield = dtuple_get_nth_field(entry, i); + + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes) + && i >= rec_offs_n_fields(offsets)) { + dfield_set_null(dfield); + continue; + } + field = rec_get_nth_field(rec, offsets, i, &len); dfield_set_data(dfield, field, len); diff --git a/storage/xtradb/row/row0umod.c b/storage/xtradb/row/row0umod.c index 562f8093c38..5202a498eed 100644 --- a/storage/xtradb/row/row0umod.c +++ b/storage/xtradb/row/row0umod.c @@ -173,40 +173,26 @@ row_undo_mod_remove_clust_low( mtr_t* mtr, /*!< in: mtr */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { - btr_pcur_t* pcur; btr_cur_t* btr_cur; ulint err; - ibool success; ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); - pcur = &(node->pcur); - btr_cur = btr_pcur_get_btr_cur(pcur); - success = btr_pcur_restore_position(mode, pcur, mtr); + /* Find out if the record has been purged already + or if we can remove it. */ - if (!success) { + if (!btr_pcur_restore_position(mode, &node->pcur, mtr) + || row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) { return(DB_SUCCESS); } - /* Find out if we can remove the whole clustered index record */ - - if (node->rec_type == TRX_UNDO_UPD_DEL_REC - && !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) { - - /* Ok, we can remove */ - } else { - return(DB_SUCCESS); - } + btr_cur = btr_pcur_get_btr_cur(&node->pcur); if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete(btr_cur, mtr); - - if (success) { - err = DB_SUCCESS; - } else { - err = DB_FAIL; - } + err = btr_cur_optimistic_delete(btr_cur, mtr) + ? DB_SUCCESS + : DB_FAIL; } else { ut_ad(mode == BTR_MODIFY_TREE); @@ -668,8 +654,9 @@ row_undo_mod_upd_exist_sec( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary( - node->row, node->ext, node->index, node->update)) { + if (row_upd_changes_ord_field_binary(node->index, node->update, + thr, + node->row, node->ext)) { /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c index e1c78949603..a6fb266c4ed 100644 --- a/storage/xtradb/row/row0upd.c +++ b/storage/xtradb/row/row0upd.c @@ -439,6 +439,12 @@ row_upd_changes_field_size_or_external( 0); } + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes) + && upd_field->field_no >= rec_offs_n_fields(offsets)) { + return(TRUE); + } + old_len = rec_offs_nth_size(offsets, upd_field->field_no); if (rec_offs_comp(offsets) @@ -844,6 +850,18 @@ row_upd_build_difference_binary( for (i = 0; i < dtuple_get_n_fields(entry); i++) { + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes) + && i >= rec_offs_n_fields(offsets)) { + dfield = dtuple_get_nth_field(entry, i); + + upd_field = upd_get_nth_field(update, n_diff); + dfield_copy(&(upd_field->new_val), dfield); + upd_field_set_field_no(upd_field, i, index, trx); + n_diff++; + goto skip_compare; + } + data = rec_get_nth_field(rec, offsets, i, &len); dfield = dtuple_get_nth_field(entry, i); @@ -1192,25 +1210,31 @@ NOTE: we compare the fields as binary strings! @return TRUE if update vector changes an ordering field in the index record */ UNIV_INTERN ibool -row_upd_changes_ord_field_binary( -/*=============================*/ +row_upd_changes_ord_field_binary_func( +/*==================================*/ + dict_index_t* index, /*!< in: index of the record */ + const upd_t* update, /*!< in: update vector for the row; NOTE: the + field numbers in this MUST be clustered index + positions! */ +#ifdef UNIV_DEBUG + const que_thr_t*thr, /*!< in: query thread */ +#endif /* UNIV_DEBUG */ const dtuple_t* row, /*!< in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at compile time */ - const row_ext_t*ext, /*!< NULL, or prefixes of the externally + const row_ext_t*ext) /*!< NULL, or prefixes of the externally stored columns in the old row */ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update) /*!< in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ { ulint n_unique; ulint i; const dict_index_t* clust_index; - ut_ad(update); ut_ad(index); + ut_ad(update); + ut_ad(thr); + ut_ad(thr->graph); + ut_ad(thr->graph->trx); n_unique = dict_index_get_n_unique(index); @@ -1252,6 +1276,10 @@ row_upd_changes_ord_field_binary( || dfield_is_null(dfield)) { /* do nothing special */ } else if (UNIV_LIKELY_NULL(ext)) { + /* Silence a compiler warning without + silencing a Valgrind error. */ + dfield_len = 0; + UNIV_MEM_INVALID(&dfield_len, sizeof dfield_len); /* See if the column is stored externally. */ buf = row_ext_lookup(ext, col_no, &dfield_len); @@ -1259,9 +1287,14 @@ row_upd_changes_ord_field_binary( if (UNIV_LIKELY_NULL(buf)) { if (UNIV_UNLIKELY(buf == field_ref_zero)) { - /* This should never happen, but - we try to fail safe here. */ - ut_ad(0); + /* The externally stored field + was not written yet. This + record should only be seen by + recv_recovery_rollback_active(), + when the server had crashed before + storing the field. */ + ut_ad(thr->graph->trx->is_recovered); + ut_ad(trx_is_recv(thr->graph->trx)); return(TRUE); } @@ -1608,8 +1641,8 @@ row_upd_sec_step( ut_ad(!dict_index_is_clust(node->index)); if (node->state == UPD_NODE_UPDATE_ALL_SEC - || row_upd_changes_ord_field_binary(node->row, node->ext, - node->index, node->update)) { + || row_upd_changes_ord_field_binary(node->index, node->update, + thr, node->row, node->ext)) { return(row_upd_sec_index_entry(node, thr)); } @@ -1937,7 +1970,7 @@ row_upd_clust_rec( index, btr_cur_get_block(btr_cur), rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), - big_rec, mtr); + mtr, TRUE, big_rec); mtr_commit(mtr); } @@ -2136,8 +2169,8 @@ exit_func: row_upd_store_row(node); - if (row_upd_changes_ord_field_binary(node->row, node->ext, index, - node->update)) { + if (row_upd_changes_ord_field_binary(index, node->update, thr, + node->row, node->ext)) { /* Update causes an ordering field (ordering fields within the B-tree) of the clustered index record to change: perform diff --git a/storage/xtradb/row/row0vers.c b/storage/xtradb/row/row0vers.c index b6d35363f08..d4fde0b939b 100644 --- a/storage/xtradb/row/row0vers.c +++ b/storage/xtradb/row/row0vers.c @@ -669,11 +669,15 @@ row_vers_build_for_semi_consistent_read( mutex_enter(&kernel_mutex); version_trx = trx_get_on_id(version_trx_id); + if (version_trx + && (version_trx->conc_state == TRX_COMMITTED_IN_MEMORY + || version_trx->conc_state == TRX_NOT_STARTED)) { + + version_trx = NULL; + } mutex_exit(&kernel_mutex); - if (!version_trx - || version_trx->conc_state == TRX_NOT_STARTED - || version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) { + if (!version_trx) { /* We found a version that belongs to a committed transaction: return it. */ diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index 3184308f573..f39d1b8a758 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -269,6 +269,11 @@ UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75; /* variable counts amount of data read in total (in bytes) */ UNIV_INTERN ulint srv_data_read = 0; +/* Internal setting for "innodb_stats_method". Decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL; + /* here we count the amount of data written in total (in bytes) */ UNIV_INTERN ulint srv_data_written = 0; @@ -388,7 +393,6 @@ UNIV_INTERN ibool srv_innodb_status = FALSE; /* When estimating number of different key values in an index, sample this many index pages */ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; -UNIV_INTERN ulong srv_stats_method = 0; UNIV_INTERN ulong srv_stats_auto_update = 1; UNIV_INTERN ulint srv_stats_update_need_lock = 1; UNIV_INTERN ibool srv_use_sys_stats_table = FALSE; @@ -419,6 +423,8 @@ UNIV_INTERN ulint srv_pass_corrupt_table = 0; /* 0:disable 1:enable */ UNIV_INTERN ulong srv_extra_rsegments = 0; /* extra rseg for users */ UNIV_INTERN ulong srv_dict_size_limit = 0; + +UNIV_INTERN ulint srv_lazy_drop_table = 0; /*-------------------------------------------*/ UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; @@ -2709,7 +2715,8 @@ srv_master_thread( unsigned space:32; unsigned offset:32; ib_uint64_t oldest_modification; - } prev_flush_info; + }; + struct t_prev_flush_info_struct prev_flush_info = {0,0,0,0}; ib_uint64_t lsn_old; @@ -2768,6 +2775,8 @@ loop: for (i = 0; i < 10; i++) { ulint cur_time = ut_time_ms(); + n_pages_flushed = 0; /* initialize */ + n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read + buf_pool->stat.n_pages_written; srv_main_thread_op_info = "sleeping"; @@ -3033,7 +3042,8 @@ retry_flush_batch: if (prev_adaptive_checkpoint == 3) { lint n_flush; - lint blocks_sum, new_blocks_sum, flushed_blocks_sum; + lint blocks_sum; + ulint new_blocks_sum, flushed_blocks_sum; blocks_sum = new_blocks_sum = flushed_blocks_sum = 0; diff --git a/storage/xtradb/sync/sync0arr.c b/storage/xtradb/sync/sync0arr.c index 223e1715944..57a288089c7 100644 --- a/storage/xtradb/sync/sync0arr.c +++ b/storage/xtradb/sync/sync0arr.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -714,7 +714,7 @@ print: fprintf(stderr, "rw-lock %p ", (void*) lock); sync_array_cell_print(stderr, cell); - rw_lock_debug_print(debug); + rw_lock_debug_print(stderr, debug); return(TRUE); } } diff --git a/storage/xtradb/sync/sync0rw.c b/storage/xtradb/sync/sync0rw.c index 0bdffcb98b0..9431de15fda 100644 --- a/storage/xtradb/sync/sync0rw.c +++ b/storage/xtradb/sync/sync0rw.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -231,10 +231,10 @@ rw_lock_create_func( # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ const char* cfile_name, /*!< in: file name where created */ - ulint cline) /*!< in: file line where created */ + ulint cline, /*!< in: file line where created */ +#endif /* UNIV_DEBUG */ + const char* cmutex_name) /*!< in: mutex name */ { /* If this is the very first time a synchronization object is created, then the following call initializes the sync system. */ @@ -247,11 +247,12 @@ rw_lock_create_func( lock->mutex.cmutex_name = cmutex_name; ut_d(lock->mutex.mutex_type = 1); +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ +# ifdef UNIV_DEBUG + UT_NOT_USED(cfile_name); + UT_NOT_USED(cline); +# endif #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ -#if defined(INNODB_RW_LOCKS_USE_ATOMICS) || !defined(UNIV_DEBUG) - (void) cfile_name; - (void) cline; -#endif lock->lock_word = X_LOCK_DECR; lock->waiters = 0; @@ -923,7 +924,7 @@ rw_lock_list_print_info( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(file, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -971,7 +972,7 @@ rw_lock_print( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(stderr, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -983,28 +984,29 @@ UNIV_INTERN void rw_lock_debug_print( /*================*/ + FILE* f, /*!< in: output stream */ rw_lock_debug_t* info) /*!< in: debug struct */ { ulint rwt; rwt = info->lock_type; - fprintf(stderr, "Locked: thread %lu file %s line %lu ", + fprintf(f, "Locked: thread %lu file %s line %lu ", (ulong) os_thread_pf(info->thread_id), info->file_name, (ulong) info->line); if (rwt == RW_LOCK_SHARED) { - fputs("S-LOCK", stderr); + fputs("S-LOCK", f); } else if (rwt == RW_LOCK_EX) { - fputs("X-LOCK", stderr); + fputs("X-LOCK", f); } else if (rwt == RW_LOCK_WAIT_EX) { - fputs("WAIT X-LOCK", stderr); + fputs("WAIT X-LOCK", f); } else { ut_error; } if (info->pass != 0) { - fprintf(stderr, " pass value %lu", (ulong) info->pass); + fprintf(f, " pass value %lu", (ulong) info->pass); } - putc('\n', stderr); + putc('\n', f); } /***************************************************************//** diff --git a/storage/xtradb/sync/sync0sync.c b/storage/xtradb/sync/sync0sync.c index 71b444dbe54..3a80da9318b 100644 --- a/storage/xtradb/sync/sync0sync.c +++ b/storage/xtradb/sync/sync0sync.c @@ -238,14 +238,14 @@ void mutex_create_func( /*==============*/ mutex_t* mutex, /*!< in: pointer to memory */ - const char* cmutex_name, /*!< in: mutex name */ #ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ const char* cfile_name, /*!< in: file name where created */ - ulint cline) /*!< in: file line where created */ + ulint cline, /*!< in: file line where created */ +#endif /* UNIV_DEBUG */ + const char* cmutex_name) /*!< in: mutex name */ { #if defined(HAVE_ATOMIC_BUILTINS) mutex_reset_lock_word(mutex); @@ -266,9 +266,6 @@ mutex_create_func( #ifdef UNIV_DEBUG mutex->cfile_name = cfile_name; mutex->cline = cline; -#else - (void) cfile_name; - (void) cline; #endif /* UNIV_DEBUG */ mutex->count_os_wait = 0; mutex->cmutex_name= cmutex_name; @@ -1164,6 +1161,7 @@ sync_thread_add_level( case SYNC_LOG: case SYNC_THR_LOCAL: case SYNC_ANY_LATCH: + case SYNC_OUTER_ANY_LATCH: case SYNC_TRX_SYS_HEADER: case SYNC_FILE_FORMAT_TAG: case SYNC_DOUBLEWRITE: diff --git a/storage/xtradb/trx/trx0rec.c b/storage/xtradb/trx/trx0rec.c index f50e10ed756..71629f01d73 100644 --- a/storage/xtradb/trx/trx0rec.c +++ b/storage/xtradb/trx/trx0rec.c @@ -665,14 +665,27 @@ trx_undo_page_report_modify( /* Save to the undo log the old values of the columns to be updated. */ if (update) { + ulint extended = 0; + if (trx_undo_left(undo_page, ptr) < 5) { return(0); } - ptr += mach_write_compressed(ptr, upd_get_n_fields(update)); + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { + for (i = 0; i < upd_get_n_fields(update); i++) { + ulint pos = upd_get_nth_field(update, i)->field_no; + + if (pos >= rec_offs_n_fields(offsets)) { + extended++; + } + } + } + + ptr += mach_write_compressed(ptr, upd_get_n_fields(update) - extended); - for (i = 0; i < upd_get_n_fields(update); i++) { + for (i = 0; i < upd_get_n_fields(update) - extended; i++) { ulint pos = upd_get_nth_field(update, i)->field_no; diff --git a/storage/xtradb/trx/trx0roll.c b/storage/xtradb/trx/trx0roll.c index 1a43e419214..a4bbf7fd652 100644 --- a/storage/xtradb/trx/trx0roll.c +++ b/storage/xtradb/trx/trx0roll.c @@ -48,8 +48,8 @@ Created 3/26/1996 Heikki Tuuri rollback */ #define TRX_ROLL_TRUNC_THRESHOLD 1 -/** In crash recovery, the current trx to be rolled back */ -static trx_t* trx_roll_crash_recv_trx = NULL; +/** In crash recovery, the current trx to be rolled back; NULL otherwise */ +static const trx_t* trx_roll_crash_recv_trx = NULL; /** In crash recovery we set this to the undo n:o of the current trx to be rolled back. Then we can print how many % the rollback has progressed. */ diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c index 75bbe1b342a..98bd9e4ac58 100644 --- a/storage/xtradb/trx/trx0trx.c +++ b/storage/xtradb/trx/trx0trx.c @@ -2106,18 +2106,18 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL */ +@return trx or NULL; on match, the trx->xid will be invalidated */ UNIV_INTERN trx_t* trx_get_trx_by_xid( /*===============*/ - XID* xid) /*!< in: X/Open XA transaction identification */ + const XID* xid) /*!< in: X/Open XA transaction identifier */ { trx_t* trx; if (xid == NULL) { - return (NULL); + return(NULL); } mutex_enter(&kernel_mutex); @@ -2130,10 +2130,16 @@ trx_get_trx_by_xid( of gtrid_lenght+bqual_length bytes should be the same */ - if (xid->gtrid_length == trx->xid.gtrid_length + if (trx->conc_state == TRX_PREPARED + && xid->gtrid_length == trx->xid.gtrid_length && xid->bqual_length == trx->xid.bqual_length && memcmp(xid->data, trx->xid.data, xid->gtrid_length + xid->bqual_length) == 0) { + + /* Invalidate the XID, so that subsequent calls + will not find it. */ + memset(&trx->xid, 0, sizeof(trx->xid)); + trx->xid.formatID = -1; break; } @@ -2142,14 +2148,5 @@ trx_get_trx_by_xid( mutex_exit(&kernel_mutex); - if (trx) { - if (trx->conc_state != TRX_PREPARED) { - - return(NULL); - } - - return(trx); - } else { - return(NULL); - } + return(trx); } |