From 7fb17e42cf2f6f309f43907f2db84389d8d895e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 10 Jan 2011 15:34:45 +0200 Subject: Bug#59181 InnoDB compilation failure on the Sun Studio compiler Define UNIV_PREFETCH_R(add) as sun_prefetch_read_many((void*) addr), because apparently some versions of the Sun library omit the const qualifier. --- storage/innodb_plugin/include/univ.i | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'storage') diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i index bbff8ddf1e3..4425950748b 100644 --- a/storage/innodb_plugin/include/univ.i +++ b/storage/innodb_plugin/include/univ.i @@ -412,7 +412,7 @@ it is read or written. */ /* Use sun_prefetch when compile with Sun Studio */ # define UNIV_EXPECT(expr,value) (expr) # define UNIV_LIKELY_NULL(expr) (expr) -# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr) +# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr) # define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr) #else /* Dummy versions of the macros */ -- cgit v1.2.1 From 9cd4d4984025857782e12e53d32cea5e4b7684e5 Mon Sep 17 00:00:00 2001 From: Jimmy Yang Date: Fri, 14 Jan 2011 09:02:28 -0800 Subject: Fix Bug#30423 "InnoDBs treatment of NULL in index stats causes bad "rows examined" estimates". This change implements "innodb_stats_method" with options of "nulls_equal", "nulls_unequal" and "null_ignored". rb://553 approved by Marko --- storage/innobase/btr/btr0cur.c | 146 ++++++++++++++++++++++------ storage/innobase/dict/dict0dict.c | 10 ++ storage/innobase/handler/ha_innodb.cc | 95 ++++++++++++++++-- storage/innobase/include/btr0cur.h | 5 +- storage/innobase/include/dict0mem.h | 6 ++ storage/innobase/include/rem0cmp.h | 4 + storage/innobase/include/rem0cmp.ic | 2 +- storage/innobase/include/srv0srv.h | 18 ++++ storage/innobase/rem/rem0cmp.c | 14 ++- storage/innobase/srv/srv0srv.c | 5 + storage/innodb_plugin/ChangeLog | 8 ++ storage/innodb_plugin/btr/btr0cur.c | 150 ++++++++++++++++++++++++----- storage/innodb_plugin/dict/dict0dict.c | 10 ++ storage/innodb_plugin/handler/ha_innodb.cc | 95 ++++++++++++++++-- storage/innodb_plugin/include/btr0cur.h | 5 +- storage/innodb_plugin/include/dict0mem.h | 6 ++ storage/innodb_plugin/include/rem0cmp.h | 4 + storage/innodb_plugin/include/rem0cmp.ic | 2 +- storage/innodb_plugin/include/srv0srv.h | 18 ++++ storage/innodb_plugin/rem/rem0cmp.c | 14 ++- storage/innodb_plugin/srv/srv0srv.c | 5 + 21 files changed, 545 insertions(+), 77 deletions(-) (limited to 'storage') diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index a7160d74a32..9f4babfaae6 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -66,6 +66,13 @@ this many index pages */ /*--------------------------------------*/ #define BTR_BLOB_HDR_SIZE 8 +/* Estimated table level stats from sampled value. */ +#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, ext_size, not_empty) \ + ((value * (ib_longlong) index->stat_n_leaf_pages \ + + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 + ext_size \ + + not_empty) \ + / (BTR_KEY_VAL_ESTIMATE_N_PAGES + ext_size)) + /*********************************************************************** Marks all extern fields in a record as owned by the record. This function should be called if the delete mark of a record is removed: a not delete @@ -2834,10 +2841,55 @@ btr_estimate_n_rows_in_range( } } +/*********************************************************************** +Record the number of non_null key values in a given index for +each n-column prefix of the index where n < dict_index_get_n_unique(index). +The estimates are eventually stored in the array: +index->stat_n_non_null_key_vals. */ +static +void +btr_record_not_null_field_in_rec( +/*=============================*/ + rec_t* rec, /* in: physical record */ + ulint n_unique, /* in: dict_index_get_n_unique(index), + number of columns uniquely determine + an index entry */ + const ulint* offsets, /* in: rec_get_offsets(rec, index), + its size could be for all fields or + that of "n_unique" */ + ib_longlong* n_not_null) /* in/out: array to record number of + not null rows for n-column prefix */ +{ + ulint i; + + ut_ad(rec_offs_n_fields(offsets) >= n_unique); + + if (n_not_null == NULL) { + return; + } + + for (i = 0; i < n_unique; i++) { + ulint rec_len; + byte* field; + + field = rec_get_nth_field(rec, offsets, i, &rec_len); + + if (rec_len != UNIV_SQL_NULL) { + n_not_null[i]++; + } else { + /* Break if we hit the first NULL value */ + break; + } + } +} + /*********************************************************************** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is "nulls_ignored", we also record the number of +non-null values for each prefix and store the estimates in +array index->stat_n_non_null_key_vals. */ void btr_estimate_number_of_different_key_vals( @@ -2851,6 +2903,8 @@ btr_estimate_number_of_different_key_vals( ulint matched_fields; ulint matched_bytes; ib_longlong* n_diff; + ib_longlong* n_not_null; + ibool stats_null_not_equal; ulint not_empty_flag = 0; ulint total_external_size = 0; ulint i; @@ -2858,24 +2912,47 @@ btr_estimate_number_of_different_key_vals( ulint add_on; mtr_t mtr; mem_heap_t* heap = NULL; - ulint offsets_rec_[REC_OFFS_NORMAL_SIZE]; - ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets_rec = offsets_rec_; - ulint* offsets_next_rec= offsets_next_rec_; - *offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_; - *offsets_next_rec_ - = (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_; + ulint* offsets_rec = NULL; + ulint* offsets_next_rec = NULL; n_cols = dict_index_get_n_unique(index); - n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong)); + heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) + * (n_cols + 1) + + dict_index_get_n_fields(index) + * (sizeof *offsets_rec + + sizeof *offsets_next_rec)); + + n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_longlong)); + + n_not_null = NULL; + + /* Check srv_innodb_stats_method setting, and decide whether we + need to record non-null value and also decide if NULL is + considered equal (by setting stats_null_not_equal value) */ + switch (srv_innodb_stats_method) { + case SRV_STATS_NULLS_IGNORED: + n_not_null = mem_heap_zalloc(heap, (n_cols + 1) + * sizeof *n_not_null); + /* fall through */ + + case SRV_STATS_NULLS_UNEQUAL: + /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL + case, we will treat NULLs as unequal value */ + stats_null_not_equal = TRUE; + break; - memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong)); + case SRV_STATS_NULLS_EQUAL: + stats_null_not_equal = FALSE; + break; + + default: + ut_error; + } /* We sample some pages in the index to get an estimate */ for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) { - rec_t* supremum; mtr_start(&mtr); btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); @@ -2888,18 +2965,22 @@ btr_estimate_number_of_different_key_vals( page = btr_cur_get_page(&cursor); - supremum = page_get_supremum_rec(page); rec = page_rec_get_next(page_get_infimum_rec(page)); - if (rec != supremum) { + if (!page_rec_is_supremum(rec)) { not_empty_flag = 1; offsets_rec = rec_get_offsets(rec, index, offsets_rec, ULINT_UNDEFINED, &heap); + + if (n_not_null) { + btr_record_not_null_field_in_rec( + rec, n_cols, offsets_rec, n_not_null); + } } - while (rec != supremum) { + while (!page_rec_is_supremum(rec)) { rec_t* next_rec = page_rec_get_next(rec); - if (next_rec == supremum) { + if (page_rec_is_supremum(next_rec)) { break; } @@ -2911,7 +2992,8 @@ btr_estimate_number_of_different_key_vals( cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, - index, &matched_fields, + index, stats_null_not_equal, + &matched_fields, &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { @@ -2921,6 +3003,12 @@ btr_estimate_number_of_different_key_vals( n_diff[j]++; } + if (n_not_null) { + btr_record_not_null_field_in_rec( + next_rec, n_cols, offsets_next_rec, + n_not_null); + } + total_external_size += btr_rec_get_externally_stored_len( rec, offsets_rec); @@ -2971,14 +3059,8 @@ btr_estimate_number_of_different_key_vals( included in index->stat_n_leaf_pages) */ for (j = 0; j <= n_cols; j++) { - index->stat_n_diff_key_vals[j] - = ((n_diff[j] - * (ib_longlong)index->stat_n_leaf_pages - + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 - + total_external_size - + not_empty_flag) - / (BTR_KEY_VAL_ESTIMATE_N_PAGES - + total_external_size)); + index->stat_n_diff_key_vals[j] = BTR_TABLE_STATS_FROM_SAMPLE( + n_diff[j], index, total_external_size, not_empty_flag); /* If the tree is small, smaller than 10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then @@ -2997,12 +3079,20 @@ btr_estimate_number_of_different_key_vals( } index->stat_n_diff_key_vals[j] += add_on; - } - mem_free(n_diff); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); + /* Update the stat_n_non_null_key_vals[] with our + sampled result. stat_n_non_null_key_vals[] is created + and initialized to zero in dict_index_add_to_cache(), + along with stat_n_diff_key_vals[] array */ + if (n_not_null != NULL && (j < n_cols)) { + index->stat_n_non_null_key_vals[j] = + BTR_TABLE_STATS_FROM_SAMPLE( + n_not_null[j], index, + total_external_size, not_empty_flag); + } } + + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c index fda6555e082..beea0a2f411 100644 --- a/storage/innobase/dict/dict0dict.c +++ b/storage/innobase/dict/dict0dict.c @@ -1358,6 +1358,12 @@ dict_index_add_to_cache( new_index->heap, (1 + dict_index_get_n_unique(new_index)) * sizeof(ib_longlong)); + + new_index->stat_n_non_null_key_vals = mem_heap_zalloc( + new_index->heap, + (1 + dict_index_get_n_unique(new_index)) + * sizeof(*new_index->stat_n_non_null_key_vals)); + /* Give some sensible values to stat_n_... in case we do not calculate statistics quickly enough */ @@ -3817,6 +3823,10 @@ dict_update_statistics_low( for (i = dict_index_get_n_unique(index); i; ) { index->stat_n_diff_key_vals[i--] = 1; } + + memset(index->stat_n_non_null_key_vals, 0, + (1 + dict_index_get_n_unique(index)) + * sizeof(*index->stat_n_non_null_key_vals)); } index = dict_table_get_next_index(index); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4c52326a58a..6f58fd70fbd 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -130,6 +130,25 @@ static my_bool innobase_adaptive_hash_index = TRUE; static char* internal_innobase_data_file_path = NULL; +/* Possible values for system variable "innodb_stats_method". The values +are defined the same as its corresponding MyISAM system variable +"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ +static const char* innodb_stats_method_names[] = { + "nulls_equal", + "nulls_unequal", + "nulls_ignored", + NullS +}; + +/* Used to define an enumerate type of the system variable innodb_stats_method. +This is the same as "myisam_stats_method_typelib" */ +static TYPELIB innodb_stats_method_typelib = { + array_elements(innodb_stats_method_names) - 1, + "innodb_stats_method_typelib", + innodb_stats_method_names, + NULL +}; + /* The following counter is used to convey information to InnoDB about server activity: in selects it is not sensible to call srv_active_wake_master_thread after each fetch or search, we only do @@ -6362,6 +6381,65 @@ ha_innobase::read_time( return(ranges + (double) rows / (double) total_rows * time_for_scan); } +/************************************************************************* +Calculate Record Per Key value. Need to exclude the NULL value if +innodb_stats_method is set to "nulls_ignored" */ +static +ha_rows +innodb_rec_per_key( +/*===============*/ + /* out: estimated record per key + value */ + dict_index_t* index, /* in: dict_index_t structure */ + ulint i, /* in: the column we are + calculating rec per key */ + ha_rows records) /* in: estimated total records */ +{ + ha_rows rec_per_key; + + ut_ad(i < dict_index_get_n_unique(index)); + + /* Note the stat_n_diff_key_vals[] stores the diff value with + n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */ + if (index->stat_n_diff_key_vals[i + 1] == 0) { + + rec_per_key = records; + } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) { + ib_longlong num_null; + + /* Number of rows with NULL value in this + field */ + num_null = records - index->stat_n_non_null_key_vals[i]; + + /* In theory, index->stat_n_non_null_key_vals[i] + should always be less than the number of records. + Since this is statistics value, the value could + have slight discrepancy. But we will make sure + the number of null values is not a negative number. */ + num_null = (num_null < 0) ? 0 : num_null; + + /* If the number of NULL values is the same as or + large than that of the distinct values, we could + consider that the table consists mostly of NULL value. + Set rec_per_key to 1. */ + if (index->stat_n_diff_key_vals[i + 1] <= num_null) { + rec_per_key = 1; + } else { + /* Need to exclude rows with NULL values from + rec_per_key calculation */ + rec_per_key = (ha_rows)( + (records - num_null) + / (index->stat_n_diff_key_vals[i + 1] + - num_null)); + } + } else { + rec_per_key = (ha_rows) + (records / index->stat_n_diff_key_vals[i + 1]); + } + + return(rec_per_key); +} + /************************************************************************* Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ @@ -6568,13 +6646,8 @@ ha_innobase::info_low( break; } - if (index->stat_n_diff_key_vals[j + 1] == 0) { - - rec_per_key = stats.records; - } else { - rec_per_key = (ha_rows)(stats.records / - index->stat_n_diff_key_vals[j + 1]); - } + rec_per_key = innodb_rec_per_key( + index, j, stats.records); /* Since MySQL seems to favor table scans too much over index searches, we pretend @@ -8990,6 +9063,13 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode, AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */ AUTOINC_NO_LOCKING, 0); /* Maximum value */ +static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, + PLUGIN_VAR_RQCMDARG, + "Specifies how InnoDB index statistics collection code should " + "treat NULLs. Possible values are NULLS_EQUAL (default), " + "NULLS_UNEQUAL and NULLS_IGNORED", + NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib); + #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, PLUGIN_VAR_RQCMDARG, @@ -9031,6 +9111,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_on_metadata), MYSQL_SYSVAR(use_legacy_cardinality_algorithm), MYSQL_SYSVAR(adaptive_hash_index), + MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(status_file), MYSQL_SYSVAR(support_xa), MYSQL_SYSVAR(sync_spin_loops), diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 213dcb7f568..20235c55f22 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -404,7 +404,10 @@ btr_estimate_n_rows_in_range( /*********************************************************************** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in +array index->stat_n_non_null_key_vals. */ void btr_estimate_number_of_different_key_vals( diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 2f2a7441478..83dbf65ea41 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -222,6 +222,12 @@ struct dict_index_struct{ for this index, for each n-column prefix where n <= dict_get_n_unique(index); we periodically calculate new estimates */ + ib_longlong* stat_n_non_null_key_vals; + /* approximate number of non-null key values + for this index, for each column where + n < dict_get_n_unique(index); This + is used when innodb_stats_method is + "nulls_ignored". */ ulint stat_index_size; /* approximate index size in database pages */ ulint stat_n_leaf_pages; diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h index c6a6e5de4db..22a22d13e17 100644 --- a/storage/innobase/include/rem0cmp.h +++ b/storage/innobase/include/rem0cmp.h @@ -141,6 +141,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index, /* in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /* in/out: number of already completely matched fields; when the function returns, contains the value the for current diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic index 52dc7ff5dc9..45e12301a3c 100644 --- a/storage/innobase/include/rem0cmp.ic +++ b/storage/innobase/include/rem0cmp.ic @@ -72,5 +72,5 @@ cmp_rec_rec( ulint match_b = 0; return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - &match_f, &match_b)); + FALSE, &match_f, &match_b)); } diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 3dd4bb961f9..811074b2be8 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -91,6 +91,11 @@ extern ulint srv_lock_table_size; extern ulint srv_n_file_io_threads; +/* The "innodb_stats_method" setting, decides how InnoDB is going +to treat NULL value when collecting statistics. It is not defined +as enum type because the configure option takes unsigned integer type. */ +extern ulong srv_innodb_stats_method; + #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; extern ibool srv_archive_recovery; @@ -286,6 +291,19 @@ of lower numbers are included. */ #define SRV_FORCE_NO_LOG_REDO 6 /* do not do the log roll-forward in connection with recovery */ +/* Alternatives for srv_innodb_stats_method, which could be changed by +setting innodb_stats_method */ +enum srv_stats_method_name_enum { + SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as + equal. This is the default setting + for innodb_stats_method */ + SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as + NOT equal. */ + SRV_STATS_NULLS_IGNORED /* NULL values are ignored */ +}; + +typedef enum srv_stats_method_name_enum srv_stats_method_name_t; + /************************************************************************* Boots Innobase server. */ diff --git a/storage/innobase/rem/rem0cmp.c b/storage/innobase/rem/rem0cmp.c index ca0ec663548..2939c119e2e 100644 --- a/storage/innobase/rem/rem0cmp.c +++ b/storage/innobase/rem/rem0cmp.c @@ -720,6 +720,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index, /* in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /* in/out: number of already completely matched fields; when the function returns, contains the value the for current @@ -821,9 +825,13 @@ cmp_rec_rec_with_match( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { - - goto next_field; - + /* This is limited to stats collection, + cannot use it for regular search */ + if (nulls_unequal) { + ret = -1; + } else { + goto next_field; + } } else if (rec2_f_len == UNIV_SQL_NULL) { /* We define the SQL null to be the diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 5b1184fb416..9c34e73109c 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -218,6 +218,11 @@ ulong srv_max_buf_pool_modified_pct = 90; /* variable counts amount of data read in total (in bytes) */ ulint srv_data_read = 0; +/* Internal setting for "innodb_stats_method". Decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL; + /* here we count the amount of data written in total (in bytes) */ ulint srv_data_written = 0; diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 8eb63fe8c78..43ffa762ddb 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,11 @@ +2011-01-14 The InnoDB Team + * btr/btr0cur.c, dict/dict0dict.c, handler/ha_innodb.cc, + include/btr0cur.h, include/dict0mem.h, include/rem0cmp.h, + include/rem0cmp.ic, include/srv0srv.h, rem/rem0cmp.c, + srv/srv0srv.c, innodb_bug30423.test: + Fix Bug#30423 InnoDBs treatment of NULL in index stats causes + bad "rows examined" estimates + 2011-01-06 The InnoDB Team * handler/i_s.cc, include/trx0i_s.h, trx/trx0i_s.c: Fix Bug#55397 cannot select from innodb_trx when trx_query contains diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index c57255a25ae..1fb0bc39933 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -100,6 +100,18 @@ can be released by page reorganize, then it is reorganized */ /*--------------------------------------*/ #define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB part header, in bytes */ + +/** Estimated table level stats from sampled value. +@param value sampled stats +@param index index being sampled +@param sample number of sampled rows +@param ext_size external stored data size +@param not_empty table not empty +@return estimated table wide stats from sampled value */ +#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\ + (((value) * (ib_int64_t) index->stat_n_leaf_pages \ + + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) + /* @} */ #endif /* !UNIV_HOTBACKUP */ @@ -3200,10 +3212,55 @@ btr_estimate_n_rows_in_range( } } +/*******************************************************************//** +Record the number of non_null key values in a given index for +each n-column prefix of the index where n < dict_index_get_n_unique(index). +The estimates are eventually stored in the array: +index->stat_n_non_null_key_vals. */ +static +void +btr_record_not_null_field_in_rec( +/*=============================*/ + rec_t* rec, /*!< in: physical record */ + ulint n_unique, /*!< in: dict_index_get_n_unique(index), + number of columns uniquely determine + an index entry */ + const ulint* offsets, /*!< in: rec_get_offsets(rec, index), + its size could be for all fields or + that of "n_unique" */ + ib_int64_t* n_not_null) /*!< in/out: array to record number of + not null rows for n-column prefix */ +{ + ulint i; + + ut_ad(rec_offs_n_fields(offsets) >= n_unique); + + if (n_not_null == NULL) { + return; + } + + for (i = 0; i < n_unique; i++) { + ulint rec_len; + byte* field; + + field = rec_get_nth_field(rec, offsets, i, &rec_len); + + if (rec_len != UNIV_SQL_NULL) { + n_not_null[i]++; + } else { + /* Break if we hit the first NULL value */ + break; + } + } +} + /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is "nulls_ignored", we also record the number of +non-null values for each prefix and store the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( @@ -3217,6 +3274,8 @@ btr_estimate_number_of_different_key_vals( ulint matched_fields; ulint matched_bytes; ib_int64_t* n_diff; + ib_int64_t* n_not_null; + ibool stats_null_not_equal; ullint n_sample_pages; /* number of pages to sample */ ulint not_empty_flag = 0; ulint total_external_size = 0; @@ -3225,16 +3284,43 @@ btr_estimate_number_of_different_key_vals( ullint add_on; mtr_t mtr; mem_heap_t* heap = NULL; - ulint offsets_rec_[REC_OFFS_NORMAL_SIZE]; - ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets_rec = offsets_rec_; - ulint* offsets_next_rec= offsets_next_rec_; - rec_offs_init(offsets_rec_); - rec_offs_init(offsets_next_rec_); + ulint* offsets_rec = NULL; + ulint* offsets_next_rec = NULL; n_cols = dict_index_get_n_unique(index); - n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t)); + heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) + * (n_cols + 1) + + dict_index_get_n_fields(index) + * (sizeof *offsets_rec + + sizeof *offsets_next_rec)); + + n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + n_not_null = NULL; + + /* Check srv_innodb_stats_method setting, and decide whether we + need to record non-null value and also decide if NULL is + considered equal (by setting stats_null_not_equal value) */ + switch (srv_innodb_stats_method) { + case SRV_STATS_NULLS_IGNORED: + n_not_null = mem_heap_zalloc(heap, (n_cols + 1) + * sizeof *n_not_null); + /* fall through */ + + case SRV_STATS_NULLS_UNEQUAL: + /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL + case, we will treat NULLs as unequal value */ + stats_null_not_equal = TRUE; + break; + + case SRV_STATS_NULLS_EQUAL: + stats_null_not_equal = FALSE; + break; + + default: + ut_error; + } /* It makes no sense to test more pages than are contained in the index, thus we lower the number if it is too high */ @@ -3251,7 +3337,6 @@ btr_estimate_number_of_different_key_vals( /* We sample some pages in the index to get an estimate */ for (i = 0; i < n_sample_pages; i++) { - rec_t* supremum; mtr_start(&mtr); btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); @@ -3264,18 +3349,22 @@ btr_estimate_number_of_different_key_vals( page = btr_cur_get_page(&cursor); - supremum = page_get_supremum_rec(page); rec = page_rec_get_next(page_get_infimum_rec(page)); - if (rec != supremum) { + if (!page_rec_is_supremum(rec)) { not_empty_flag = 1; offsets_rec = rec_get_offsets(rec, index, offsets_rec, ULINT_UNDEFINED, &heap); + + if (n_not_null) { + btr_record_not_null_field_in_rec( + rec, n_cols, offsets_rec, n_not_null); + } } - while (rec != supremum) { + while (!page_rec_is_supremum(rec)) { rec_t* next_rec = page_rec_get_next(rec); - if (next_rec == supremum) { + if (page_rec_is_supremum(next_rec)) { break; } @@ -3287,7 +3376,8 @@ btr_estimate_number_of_different_key_vals( cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, - index, &matched_fields, + index, stats_null_not_equal, + &matched_fields, &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { @@ -3297,6 +3387,12 @@ btr_estimate_number_of_different_key_vals( n_diff[j]++; } + if (n_not_null) { + btr_record_not_null_field_in_rec( + next_rec, n_cols, offsets_next_rec, + n_not_null); + } + total_external_size += btr_rec_get_externally_stored_len( rec, offsets_rec); @@ -3348,13 +3444,9 @@ btr_estimate_number_of_different_key_vals( for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] - = ((n_diff[j] - * (ib_int64_t)index->stat_n_leaf_pages - + n_sample_pages - 1 - + total_external_size - + not_empty_flag) - / (n_sample_pages - + total_external_size)); + = BTR_TABLE_STATS_FROM_SAMPLE( + n_diff[j], index, n_sample_pages, + total_external_size, not_empty_flag); /* If the tree is small, smaller than 10 * n_sample_pages + total_external_size, then @@ -3373,12 +3465,20 @@ btr_estimate_number_of_different_key_vals( } index->stat_n_diff_key_vals[j] += add_on; - } - mem_free(n_diff); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); + /* Update the stat_n_non_null_key_vals[] with our + sampled result. stat_n_non_null_key_vals[] is created + and initialized to zero in dict_index_add_to_cache(), + along with stat_n_diff_key_vals[] array */ + if (n_not_null != NULL && (j < n_cols)) { + index->stat_n_non_null_key_vals[j] = + BTR_TABLE_STATS_FROM_SAMPLE( + n_not_null[j], index, n_sample_pages, + total_external_size, not_empty_flag); + } } + + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ diff --git a/storage/innodb_plugin/dict/dict0dict.c b/storage/innodb_plugin/dict/dict0dict.c index 67765555658..ff56e9cb76a 100644 --- a/storage/innodb_plugin/dict/dict0dict.c +++ b/storage/innodb_plugin/dict/dict0dict.c @@ -1669,6 +1669,12 @@ undo_size_ok: new_index->heap, (1 + dict_index_get_n_unique(new_index)) * sizeof(ib_int64_t)); + + new_index->stat_n_non_null_key_vals = mem_heap_zalloc( + new_index->heap, + (1 + dict_index_get_n_unique(new_index)) + * sizeof(*new_index->stat_n_non_null_key_vals)); + /* Give some sensible values to stat_n_... in case we do not calculate statistics quickly enough */ @@ -4291,6 +4297,10 @@ dict_update_statistics( for (i = dict_index_get_n_unique(index); i; ) { index->stat_n_diff_key_vals[i--] = 1; } + + memset(index->stat_n_non_null_key_vals, 0, + (1 + dict_index_get_n_unique(index)) + * sizeof(*index->stat_n_non_null_key_vals)); } index = dict_table_get_next_index(index); diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index 86168e2bc9b..2d60c7397b0 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -174,6 +174,25 @@ static char* internal_innobase_data_file_path = NULL; static char* innodb_version_str = (char*) INNODB_VERSION_STR; +/** Possible values for system variable "innodb_stats_method". The values +are defined the same as its corresponding MyISAM system variable +"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ +static const char* innodb_stats_method_names[] = { + "nulls_equal", + "nulls_unequal", + "nulls_ignored", + NullS +}; + +/** Used to define an enumerate type of the system variable innodb_stats_method. +This is the same as "myisam_stats_method_typelib" */ +static TYPELIB innodb_stats_method_typelib = { + array_elements(innodb_stats_method_names) - 1, + "innodb_stats_method_typelib", + innodb_stats_method_names, + NULL +}; + /* The following counter is used to convey information to InnoDB about server activity: in selects it is not sensible to call srv_active_wake_master_thread after each fetch or search, we only do @@ -7507,6 +7526,65 @@ innobase_get_mysql_key_number_for_index( return(0); } + +/*********************************************************************//** +Calculate Record Per Key value. Need to exclude the NULL value if +innodb_stats_method is set to "nulls_ignored" +@return estimated record per key value */ +static +ha_rows +innodb_rec_per_key( +/*===============*/ + dict_index_t* index, /*!< in: dict_index_t structure */ + ulint i, /*!< in: the column we are + calculating rec per key */ + ha_rows records) /*!< in: estimated total records */ +{ + ha_rows rec_per_key; + + ut_ad(i < dict_index_get_n_unique(index)); + + /* Note the stat_n_diff_key_vals[] stores the diff value with + n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */ + if (index->stat_n_diff_key_vals[i + 1] == 0) { + + rec_per_key = records; + } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) { + ib_int64_t num_null; + + /* Number of rows with NULL value in this + field */ + num_null = records - index->stat_n_non_null_key_vals[i]; + + /* In theory, index->stat_n_non_null_key_vals[i] + should always be less than the number of records. + Since this is statistics value, the value could + have slight discrepancy. But we will make sure + the number of null values is not a negative number. */ + num_null = (num_null < 0) ? 0 : num_null; + + /* If the number of NULL values is the same as or + large than that of the distinct values, we could + consider that the table consists mostly of NULL value. + Set rec_per_key to 1. */ + if (index->stat_n_diff_key_vals[i + 1] <= num_null) { + rec_per_key = 1; + } else { + /* Need to exclude rows with NULL values from + rec_per_key calculation */ + rec_per_key = (ha_rows)( + (records - num_null) + / (index->stat_n_diff_key_vals[i + 1] + - num_null)); + } + } else { + rec_per_key = (ha_rows) + (records / index->stat_n_diff_key_vals[i + 1]); + } + + return(rec_per_key); +} + /*********************************************************************//** Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ @@ -7737,13 +7815,8 @@ ha_innobase::info_low( break; } - if (index->stat_n_diff_key_vals[j + 1] == 0) { - - rec_per_key = stats.records; - } else { - rec_per_key = (ha_rows)(stats.records / - index->stat_n_diff_key_vals[j + 1]); - } + rec_per_key = innodb_rec_per_key( + index, j, stats.records); /* Since MySQL seems to favor table scans too much over index searches, we pretend @@ -10934,6 +11007,13 @@ static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, innodb_change_buffering_validate, innodb_change_buffering_update, "inserts"); +static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, + PLUGIN_VAR_RQCMDARG, + "Specifies how InnoDB index statistics collection code should " + "treat NULLs. Possible values are NULLS_EQUAL (default), " + "NULLS_UNEQUAL and NULLS_IGNORED", + NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib); + #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, PLUGIN_VAR_RQCMDARG, @@ -10988,6 +11068,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_on_metadata), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), + MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(replication_delay), MYSQL_SYSVAR(status_file), MYSQL_SYSVAR(strict_mode), diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h index b477ad0320a..cb8cb399715 100644 --- a/storage/innodb_plugin/include/btr0cur.h +++ b/storage/innodb_plugin/include/btr0cur.h @@ -478,7 +478,10 @@ btr_estimate_n_rows_in_range( /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ +The estimates are stored in the array index->stat_n_diff_key_vals. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in +array index->stat_n_non_null_key_vals. */ UNIV_INTERN void btr_estimate_number_of_different_key_vals( diff --git a/storage/innodb_plugin/include/dict0mem.h b/storage/innodb_plugin/include/dict0mem.h index 19782c2e76a..09a068ccb93 100644 --- a/storage/innodb_plugin/include/dict0mem.h +++ b/storage/innodb_plugin/include/dict0mem.h @@ -321,6 +321,12 @@ struct dict_index_struct{ dict_get_n_unique(index); we periodically calculate new estimates */ + ib_int64_t* stat_n_non_null_key_vals; + /* approximate number of non-null key values + for this index, for each column where + n < dict_get_n_unique(index); This + is used when innodb_stats_method is + "nulls_ignored". */ ulint stat_index_size; /*!< approximate index size in database pages */ diff --git a/storage/innodb_plugin/include/rem0cmp.h b/storage/innodb_plugin/include/rem0cmp.h index 2f751a38864..a908521c9f7 100644 --- a/storage/innodb_plugin/include/rem0cmp.h +++ b/storage/innodb_plugin/include/rem0cmp.h @@ -165,6 +165,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current diff --git a/storage/innodb_plugin/include/rem0cmp.ic b/storage/innodb_plugin/include/rem0cmp.ic index 39ef5f4fba3..63415fe7837 100644 --- a/storage/innodb_plugin/include/rem0cmp.ic +++ b/storage/innodb_plugin/include/rem0cmp.ic @@ -87,5 +87,5 @@ cmp_rec_rec( ulint match_b = 0; return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - &match_f, &match_b)); + FALSE, &match_f, &match_b)); } diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h index 7aa2ce74720..91ae895040c 100644 --- a/storage/innodb_plugin/include/srv0srv.h +++ b/storage/innodb_plugin/include/srv0srv.h @@ -154,6 +154,11 @@ capacity. PCT_IO(5) -> returns the number of IO operations that is 5% of the max where max is srv_io_capacity. */ #define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0))) +/* The "innodb_stats_method" setting, decides how InnoDB is going +to treat NULL value when collecting statistics. It is not defined +as enum type because the configure option takes unsigned integer type. */ +extern ulong srv_innodb_stats_method; + #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; extern ibool srv_archive_recovery; @@ -363,6 +368,19 @@ enum { in connection with recovery */ }; +/* Alternatives for srv_innodb_stats_method, which could be changed by +setting innodb_stats_method */ +enum srv_stats_method_name_enum { + SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as + equal. This is the default setting + for innodb_stats_method */ + SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as + NOT equal. */ + SRV_STATS_NULLS_IGNORED /* NULL values are ignored */ +}; + +typedef enum srv_stats_method_name_enum srv_stats_method_name_t; + #ifndef UNIV_HOTBACKUP /** Types of threads existing in the system. */ enum srv_thread_type { diff --git a/storage/innodb_plugin/rem/rem0cmp.c b/storage/innodb_plugin/rem/rem0cmp.c index 35b67992558..04d2c15437b 100644 --- a/storage/innodb_plugin/rem/rem0cmp.c +++ b/storage/innodb_plugin/rem/rem0cmp.c @@ -862,6 +862,10 @@ cmp_rec_rec_with_match( const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index, /*!< in: data dictionary index */ + ibool nulls_unequal, + /* in: TRUE if this is for index statistics + cardinality estimation, and innodb_stats_method + is "nulls_unequal" or "nulls_ignored" */ ulint* matched_fields, /*!< in/out: number of already completely matched fields; when the function returns, contains the value the for current @@ -961,9 +965,13 @@ cmp_rec_rec_with_match( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { - - goto next_field; - + /* This is limited to stats collection, + cannot use it for regular search */ + if (nulls_unequal) { + ret = -1; + } else { + goto next_field; + } } else if (rec2_f_len == UNIV_SQL_NULL) { /* We define the SQL null to be the diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c index f7e7e351bdc..3cf17f33c40 100644 --- a/storage/innodb_plugin/srv/srv0srv.c +++ b/storage/innodb_plugin/srv/srv0srv.c @@ -243,6 +243,11 @@ UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75; /* variable counts amount of data read in total (in bytes) */ UNIV_INTERN ulint srv_data_read = 0; +/* Internal setting for "innodb_stats_method". Decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL; + /* here we count the amount of data written in total (in bytes) */ UNIV_INTERN ulint srv_data_written = 0; -- cgit v1.2.1 From 1f3975b4f8b22eef97b2d86b8ecbc17c90c5f1ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 17 Jan 2011 14:06:48 +0200 Subject: Non-functional changes. Remove the unused data type dict_cluster_t. Remove a bogus comment about latching order. --- storage/innobase/include/dict0types.h | 5 ----- storage/innobase/include/trx0rseg.h | 4 +--- storage/innodb_plugin/include/dict0types.h | 5 ----- storage/innodb_plugin/include/trx0rseg.h | 4 +--- 4 files changed, 2 insertions(+), 16 deletions(-) (limited to 'storage') diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h index b90545f2105..6674b5ff397 100644 --- a/storage/innobase/include/dict0types.h +++ b/storage/innobase/include/dict0types.h @@ -16,11 +16,6 @@ typedef struct dict_index_struct dict_index_t; typedef struct dict_table_struct dict_table_t; typedef struct dict_foreign_struct dict_foreign_t; -/* A cluster object is a table object with the type field set to -DICT_CLUSTERED */ - -typedef dict_table_t dict_cluster_t; - typedef struct ind_node_struct ind_node_t; typedef struct tab_node_struct tab_node_t; diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index 46ba010bd1d..22f8aa89181 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -121,9 +121,7 @@ struct trx_rseg_struct{ ulint id; /* rollback segment id == the index of its slot in the trx system file copy */ mutex_t mutex; /* mutex protecting the fields in this - struct except id; NOTE that the latching - order must always be kernel mutex -> - rseg mutex */ + struct except id, which is constant */ ulint space; /* space where the rollback segment is header is placed */ ulint page_no;/* page number of the rollback segment diff --git a/storage/innodb_plugin/include/dict0types.h b/storage/innodb_plugin/include/dict0types.h index 7ad69193cc9..f14b59a19d4 100644 --- a/storage/innodb_plugin/include/dict0types.h +++ b/storage/innodb_plugin/include/dict0types.h @@ -33,11 +33,6 @@ typedef struct dict_index_struct dict_index_t; typedef struct dict_table_struct dict_table_t; typedef struct dict_foreign_struct dict_foreign_t; -/* A cluster object is a table object with the type field set to -DICT_CLUSTERED */ - -typedef dict_table_t dict_cluster_t; - typedef struct ind_node_struct ind_node_t; typedef struct tab_node_struct tab_node_t; diff --git a/storage/innodb_plugin/include/trx0rseg.h b/storage/innodb_plugin/include/trx0rseg.h index a25d84f1e84..e3674089735 100644 --- a/storage/innodb_plugin/include/trx0rseg.h +++ b/storage/innodb_plugin/include/trx0rseg.h @@ -135,9 +135,7 @@ struct trx_rseg_struct{ ulint id; /*!< rollback segment id == the index of its slot in the trx system file copy */ mutex_t mutex; /*!< mutex protecting the fields in this - struct except id; NOTE that the latching - order must always be kernel mutex -> - rseg mutex */ + struct except id, which is constant */ ulint space; /*!< space where the rollback segment is header is placed */ ulint zip_size;/* compressed page size of space -- cgit v1.2.1 From 359bddbee1a27864a38195e85fceab8a1678081d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 18 Jan 2011 12:25:13 +0200 Subject: Bug#59579 rw_lock_debug_print outputs to stderr rw_lock_debug_print(): Add parameter FILE* for specifying the output stream. rw_lock_list_print_info(): Invoke rw_lock_debug_print() on file, not stderr. --- storage/innobase/include/sync0rw.h | 3 ++- storage/innobase/sync/sync0arr.c | 4 ++-- storage/innobase/sync/sync0rw.c | 19 ++++++++++--------- storage/innodb_plugin/ChangeLog | 6 ++++++ storage/innodb_plugin/include/sync0rw.h | 3 ++- storage/innodb_plugin/sync/sync0arr.c | 4 ++-- storage/innodb_plugin/sync/sync0rw.c | 19 ++++++++++--------- 7 files changed, 34 insertions(+), 24 deletions(-) (limited to 'storage') diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h index 008df80a2c7..dd898557d6e 100644 --- a/storage/innobase/include/sync0rw.h +++ b/storage/innobase/include/sync0rw.h @@ -1,7 +1,7 @@ /****************************************************** The read-write lock (for threads, not for database transactions) -(c) 1995 Innobase Oy +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Created 9/11/1995 Heikki Tuuri *******************************************************/ @@ -409,6 +409,7 @@ Prints info of a debug struct. */ void rw_lock_debug_print( /*================*/ + FILE* f, /* in: output stream */ rw_lock_debug_t* info); /* in: debug struct */ #endif /* UNIV_SYNC_DEBUG */ diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c index 154593a9035..41d3492c8c9 100644 --- a/storage/innobase/sync/sync0arr.c +++ b/storage/innobase/sync/sync0arr.c @@ -1,7 +1,7 @@ /****************************************************** The wait array used in synchronization primitives -(c) 1995 Innobase Oy +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Created 9/5/1995 Heikki Tuuri *******************************************************/ @@ -709,7 +709,7 @@ print: fprintf(stderr, "rw-lock %p ", (void*) lock); sync_array_cell_print(stderr, cell); - rw_lock_debug_print(debug); + rw_lock_debug_print(stderr, debug); return(TRUE); } } diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c index 0b05fb826ac..ef4c07e8c26 100644 --- a/storage/innobase/sync/sync0rw.c +++ b/storage/innobase/sync/sync0rw.c @@ -1,7 +1,7 @@ /****************************************************** The read-write lock (for thread synchronization) -(c) 1995 Innobase Oy +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Created 9/11/1995 Heikki Tuuri *******************************************************/ @@ -830,7 +830,7 @@ rw_lock_list_print_info( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(file, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -870,7 +870,7 @@ rw_lock_print( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(stderr, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -882,28 +882,29 @@ Prints info of a debug struct. */ void rw_lock_debug_print( /*================*/ + FILE* f, /* in: output stream */ rw_lock_debug_t* info) /* in: debug struct */ { ulint rwt; rwt = info->lock_type; - fprintf(stderr, "Locked: thread %lu file %s line %lu ", + fprintf(f, "Locked: thread %lu file %s line %lu ", (ulong) os_thread_pf(info->thread_id), info->file_name, (ulong) info->line); if (rwt == RW_LOCK_SHARED) { - fputs("S-LOCK", stderr); + fputs("S-LOCK", f); } else if (rwt == RW_LOCK_EX) { - fputs("X-LOCK", stderr); + fputs("X-LOCK", f); } else if (rwt == RW_LOCK_WAIT_EX) { - fputs("WAIT X-LOCK", stderr); + fputs("WAIT X-LOCK", f); } else { ut_error; } if (info->pass != 0) { - fprintf(stderr, " pass value %lu", (ulong) info->pass); + fprintf(f, " pass value %lu", (ulong) info->pass); } - putc('\n', stderr); + putc('\n', f); } /******************************************************************* diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 43ffa762ddb..4d35bcff4a1 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,9 @@ +2011-01-18 The InnoDB Team + + * include/sync0rw.h, sync/sync0arr.c, sync/sync0rw.c: + Fix Bug#59579 rw_lock_debug_print outputs to stderr, not to + SHOW ENGINE INNODB STATUS + 2011-01-14 The InnoDB Team * btr/btr0cur.c, dict/dict0dict.c, handler/ha_innodb.cc, include/btr0cur.h, include/dict0mem.h, include/rem0cmp.h, diff --git a/storage/innodb_plugin/include/sync0rw.h b/storage/innodb_plugin/include/sync0rw.h index 175f3deb77c..47f7dbfe0eb 100644 --- a/storage/innodb_plugin/include/sync0rw.h +++ b/storage/innodb_plugin/include/sync0rw.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -490,6 +490,7 @@ UNIV_INTERN void rw_lock_debug_print( /*================*/ + FILE* f, /*!< in: output stream */ rw_lock_debug_t* info); /*!< in: debug struct */ #endif /* UNIV_SYNC_DEBUG */ diff --git a/storage/innodb_plugin/sync/sync0arr.c b/storage/innodb_plugin/sync/sync0arr.c index 3c825e2202b..ad29b90d344 100644 --- a/storage/innodb_plugin/sync/sync0arr.c +++ b/storage/innodb_plugin/sync/sync0arr.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -715,7 +715,7 @@ print: fprintf(stderr, "rw-lock %p ", (void*) lock); sync_array_cell_print(stderr, cell); - rw_lock_debug_print(debug); + rw_lock_debug_print(stderr, debug); return(TRUE); } } diff --git a/storage/innodb_plugin/sync/sync0rw.c b/storage/innodb_plugin/sync/sync0rw.c index 572c3690a7f..00e0324becd 100644 --- a/storage/innodb_plugin/sync/sync0rw.c +++ b/storage/innodb_plugin/sync/sync0rw.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -925,7 +925,7 @@ rw_lock_list_print_info( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(file, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -973,7 +973,7 @@ rw_lock_print( info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { - rw_lock_debug_print(info); + rw_lock_debug_print(stderr, info); info = UT_LIST_GET_NEXT(list, info); } } @@ -985,28 +985,29 @@ UNIV_INTERN void rw_lock_debug_print( /*================*/ + FILE* f, /*!< in: output stream */ rw_lock_debug_t* info) /*!< in: debug struct */ { ulint rwt; rwt = info->lock_type; - fprintf(stderr, "Locked: thread %lu file %s line %lu ", + fprintf(f, "Locked: thread %lu file %s line %lu ", (ulong) os_thread_pf(info->thread_id), info->file_name, (ulong) info->line); if (rwt == RW_LOCK_SHARED) { - fputs("S-LOCK", stderr); + fputs("S-LOCK", f); } else if (rwt == RW_LOCK_EX) { - fputs("X-LOCK", stderr); + fputs("X-LOCK", f); } else if (rwt == RW_LOCK_WAIT_EX) { - fputs("WAIT X-LOCK", stderr); + fputs("WAIT X-LOCK", f); } else { ut_error; } if (info->pass != 0) { - fprintf(stderr, " pass value %lu", (ulong) info->pass); + fprintf(f, " pass value %lu", (ulong) info->pass); } - putc('\n', stderr); + putc('\n', f); } /***************************************************************//** -- cgit v1.2.1 From 60a622d1c1940f80829a4df312ff49a6feae265e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 25 Jan 2011 09:56:18 +0200 Subject: Bug#59707 Unused compression-related parameters in buffer pool functions buf_block_alloc(): ulint zip_size is always 0. buf_LRU_get_free_block(): ulint zip_size is always 0. buf_LRU_free_block(): ibool* buf_pool_mutex_released is always NULL. Remove these parameters. buf_LRU_get_free_block(): Simplify the initialization of block->page.zip and release buf_pool_mutex() earlier. --- storage/innodb_plugin/ChangeLog | 9 +++++++ storage/innodb_plugin/btr/btr0btr.c | 2 +- storage/innodb_plugin/btr/btr0cur.c | 5 ++-- storage/innodb_plugin/btr/btr0sea.c | 2 +- storage/innodb_plugin/buf/buf0buddy.c | 2 +- storage/innodb_plugin/buf/buf0buf.c | 14 +++++----- storage/innodb_plugin/buf/buf0lru.c | 44 ++++++-------------------------- storage/innodb_plugin/include/buf0buf.h | 6 ++--- storage/innodb_plugin/include/buf0buf.ic | 8 +++--- storage/innodb_plugin/include/buf0lru.h | 14 ++++------ storage/innodb_plugin/mem/mem0mem.c | 2 +- storage/innodb_plugin/page/page0zip.c | 2 +- 12 files changed, 40 insertions(+), 70 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 4d35bcff4a1..24cac7ac2be 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,12 @@ +2011-01-25 The InnoDB Team + + * btr/btr0btr.c, btr/btr0cur.c, btr/btr0sea.c, + buf/buf0buddy.c, buf/buf0buf.c, buf/buf0lru.c, + include/buf0buf.h, include/buf0buf.ic, include/buf0lru.h, + mem/mem0mem.c, page/page0zip.c: + Fix Bug#59707 Unused compression-related parameters + in buffer pool functions + 2011-01-18 The InnoDB Team * include/sync0rw.h, sync/sync0arr.c, sync/sync0rw.c: diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c index 32e2caecdb8..3d8d6048603 100644 --- a/storage/innodb_plugin/btr/btr0btr.c +++ b/storage/innodb_plugin/btr/btr0btr.c @@ -979,7 +979,7 @@ btr_page_reorganize_low( log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); #ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(0); + temp_block = buf_block_alloc(); #else /* !UNIV_HOTBACKUP */ ut_ad(block == back_block1); temp_block = back_block2; diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 1fb0bc39933..f41b125b281 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -3767,13 +3767,12 @@ btr_blob_free( && buf_block_get_space(block) == space && buf_block_get_page_no(block) == page_no) { - if (buf_LRU_free_block(&block->page, all, NULL) - != BUF_LRU_FREED + if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED && all && block->page.zip.data) { /* Attempt to deallocate the uncompressed page if the whole block cannot be deallocted. */ - buf_LRU_free_block(&block->page, FALSE, NULL); + buf_LRU_free_block(&block->page, FALSE); } } diff --git a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c index 035fdbb61d2..9835efcf712 100644 --- a/storage/innodb_plugin/btr/btr0sea.c +++ b/storage/innodb_plugin/btr/btr0sea.c @@ -141,7 +141,7 @@ btr_search_check_free_space_in_heap(void) be enough free space in the hash table. */ if (heap->free_block == NULL) { - buf_block_t* block = buf_block_alloc(0); + buf_block_t* block = buf_block_alloc(); rw_lock_x_lock(&btr_search_latch); diff --git a/storage/innodb_plugin/buf/buf0buddy.c b/storage/innodb_plugin/buf/buf0buddy.c index ee5a569c3ff..63c99571510 100644 --- a/storage/innodb_plugin/buf/buf0buddy.c +++ b/storage/innodb_plugin/buf/buf0buddy.c @@ -327,7 +327,7 @@ buf_buddy_alloc_low( /* Try replacing an uncompressed page in the buffer pool. */ buf_pool_mutex_exit(); - block = buf_LRU_get_free_block(0); + block = buf_LRU_get_free_block(); *lru = TRUE; buf_pool_mutex_enter(); diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index dac416f9472..6e76e4c65be 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -1283,7 +1283,7 @@ shrink_again: buf_LRU_make_block_old(&block->page); dirty++; - } else if (buf_LRU_free_block(&block->page, TRUE, NULL) + } else if (buf_LRU_free_block(&block->page, TRUE) != BUF_LRU_FREED) { nonfree++; } @@ -1729,8 +1729,7 @@ err_exit: mutex_enter(block_mutex); /* Discard the uncompressed page frame if possible. */ - if (buf_LRU_free_block(bpage, FALSE, NULL) - == BUF_LRU_FREED) { + if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) { mutex_exit(block_mutex); goto lookup; @@ -2165,7 +2164,7 @@ wait_until_unfixed: buf_pool_mutex_exit(); mutex_exit(&buf_pool_zip_mutex); - block = buf_LRU_get_free_block(0); + block = buf_LRU_get_free_block(); ut_a(block); buf_pool_mutex_enter(); @@ -2291,8 +2290,7 @@ wait_until_unfixed: /* Try to evict the block from the buffer pool, to use the insert buffer as much as possible. */ - if (buf_LRU_free_block(&block->page, TRUE, NULL) - == BUF_LRU_FREED) { + if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) { buf_pool_mutex_exit(); mutex_exit(&block->mutex); fprintf(stderr, @@ -2829,7 +2827,7 @@ buf_page_init_for_read( && UNIV_LIKELY(!recv_recovery_is_on())) { block = NULL; } else { - block = buf_LRU_get_free_block(0); + block = buf_LRU_get_free_block(); ut_ad(block); } @@ -3001,7 +2999,7 @@ buf_page_create( ut_ad(mtr->state == MTR_ACTIVE); ut_ad(space || !zip_size); - free_block = buf_LRU_get_free_block(0); + free_block = buf_LRU_get_free_block(); buf_pool_mutex_enter(); diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c index e4cf218bf2e..39feb06ff23 100644 --- a/storage/innodb_plugin/buf/buf0lru.c +++ b/storage/innodb_plugin/buf/buf0lru.c @@ -575,7 +575,7 @@ buf_LRU_free_from_unzip_LRU_list( ut_ad(block->page.in_LRU_list); mutex_enter(&block->mutex); - freed = buf_LRU_free_block(&block->page, FALSE, NULL); + freed = buf_LRU_free_block(&block->page, FALSE); mutex_exit(&block->mutex); switch (freed) { @@ -636,7 +636,7 @@ buf_LRU_free_from_common_LRU_list( mutex_enter(block_mutex); accessed = buf_page_is_accessed(bpage); - freed = buf_LRU_free_block(bpage, TRUE, NULL); + freed = buf_LRU_free_block(bpage, TRUE); mutex_exit(block_mutex); switch (freed) { @@ -798,10 +798,8 @@ LRU list to the free list. @return the free control block, in state BUF_BLOCK_READY_FOR_USE */ UNIV_INTERN buf_block_t* -buf_LRU_get_free_block( -/*===================*/ - ulint zip_size) /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_LRU_get_free_block(void) +/*========================*/ { buf_block_t* block = NULL; ibool freed; @@ -877,26 +875,10 @@ loop: /* If there is a block in the free list, take it */ block = buf_LRU_get_free_only(); - if (block) { - -#ifdef UNIV_DEBUG - block->page.zip.m_start = -#endif /* UNIV_DEBUG */ - block->page.zip.m_end = - block->page.zip.m_nonempty = - block->page.zip.n_blobs = 0; - - if (UNIV_UNLIKELY(zip_size)) { - ibool lru; - page_zip_set_size(&block->page.zip, zip_size); - block->page.zip.data = buf_buddy_alloc(zip_size, &lru); - UNIV_MEM_DESC(block->page.zip.data, zip_size, block); - } else { - page_zip_set_size(&block->page.zip, 0); - block->page.zip.data = NULL; - } + buf_pool_mutex_exit(); - buf_pool_mutex_exit(); + if (block) { + memset(&block->page.zip, 0, sizeof block->page.zip); if (started_monitor) { srv_print_innodb_monitor = mon_value_was; @@ -908,8 +890,6 @@ loop: /* If no block was in the free list, search from the end of the LRU list and try to free a block there */ - buf_pool_mutex_exit(); - freed = buf_LRU_search_and_free_block(n_iterations); if (freed > 0) { @@ -1378,12 +1358,8 @@ enum buf_lru_free_block_status buf_LRU_free_block( /*===============*/ buf_page_t* bpage, /*!< in: block to be freed */ - ibool zip, /*!< in: TRUE if should remove also the + ibool zip) /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released) - /*!< in: pointer to a variable that will - be assigned TRUE if buf_pool_mutex - was temporarily released, or NULL */ { buf_page_t* b = NULL; mutex_t* block_mutex = buf_page_get_mutex(bpage); @@ -1554,10 +1530,6 @@ alloc: b->io_fix = BUF_IO_READ; } - if (buf_pool_mutex_released) { - *buf_pool_mutex_released = TRUE; - } - buf_pool_mutex_exit(); mutex_exit(block_mutex); diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h index cd4ee5906f0..d903b443920 100644 --- a/storage/innodb_plugin/include/buf0buf.h +++ b/storage/innodb_plugin/include/buf0buf.h @@ -165,10 +165,8 @@ Allocates a buffer block. @return own: the allocated block, in state BUF_BLOCK_MEMORY */ UNIV_INLINE buf_block_t* -buf_block_alloc( -/*============*/ - ulint zip_size); /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_block_alloc(void); +/*=================*/ /********************************************************************//** Frees a buffer block which does not contain a file page. */ UNIV_INLINE diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic index 23db684806c..0025bef5aac 100644 --- a/storage/innodb_plugin/include/buf0buf.ic +++ b/storage/innodb_plugin/include/buf0buf.ic @@ -719,14 +719,12 @@ Allocates a buffer block. @return own: the allocated block, in state BUF_BLOCK_MEMORY */ UNIV_INLINE buf_block_t* -buf_block_alloc( -/*============*/ - ulint zip_size) /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_block_alloc(void) +/*=================*/ { buf_block_t* block; - block = buf_LRU_get_free_block(zip_size); + block = buf_LRU_get_free_block(); buf_block_set_state(block, BUF_BLOCK_MEMORY); diff --git a/storage/innodb_plugin/include/buf0lru.h b/storage/innodb_plugin/include/buf0lru.h index 5a9cfd059f3..d543bce53cd 100644 --- a/storage/innodb_plugin/include/buf0lru.h +++ b/storage/innodb_plugin/include/buf0lru.h @@ -110,12 +110,9 @@ enum buf_lru_free_block_status buf_LRU_free_block( /*===============*/ buf_page_t* bpage, /*!< in: block to be freed */ - ibool zip, /*!< in: TRUE if should remove also the + ibool zip) /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released); - /*!< in: pointer to a variable that will - be assigned TRUE if buf_pool_mutex - was temporarily released, or NULL */ + __attribute__((nonnull)); /******************************************************************//** Try to free a replaceable block. @return TRUE if found and freed */ @@ -146,10 +143,9 @@ LRU list to the free list. @return the free control block, in state BUF_BLOCK_READY_FOR_USE */ UNIV_INTERN buf_block_t* -buf_LRU_get_free_block( -/*===================*/ - ulint zip_size); /*!< in: compressed page size in bytes, - or 0 if uncompressed tablespace */ +buf_LRU_get_free_block(void) +/*========================*/ + __attribute__((warn_unused_result)); /******************************************************************//** Puts a block back to the free list. */ diff --git a/storage/innodb_plugin/mem/mem0mem.c b/storage/innodb_plugin/mem/mem0mem.c index 1dd4db30841..86100b04fd6 100644 --- a/storage/innodb_plugin/mem/mem0mem.c +++ b/storage/innodb_plugin/mem/mem0mem.c @@ -347,7 +347,7 @@ mem_heap_create_block( return(NULL); } } else { - buf_block = buf_block_alloc(0); + buf_block = buf_block_alloc(); } block = (mem_block_t*) buf_block->frame; diff --git a/storage/innodb_plugin/page/page0zip.c b/storage/innodb_plugin/page/page0zip.c index d3b1edefc6b..bb9b0995c72 100644 --- a/storage/innodb_plugin/page/page0zip.c +++ b/storage/innodb_plugin/page/page0zip.c @@ -4439,7 +4439,7 @@ page_zip_reorganize( log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); #ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(0); + temp_block = buf_block_alloc(); btr_search_drop_page_hash_index(block); block->check_index_page_at_flush = TRUE; #else /* !UNIV_HOTBACKUP */ -- cgit v1.2.1 From 46b7ef69916635ca0575ea4898ed8980f4bf6f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 25 Jan 2011 11:54:50 +0200 Subject: Bug#59464 Race condition in row_vers_build_for_semi_consistent_read row_vers_build_for_semi_consistent_read(): Dereference version_trx before releasing kernel_mutex, but not thereafter. --- storage/innobase/row/row0vers.c | 10 +++++++--- storage/innodb_plugin/ChangeLog | 5 +++++ storage/innodb_plugin/row/row0vers.c | 10 +++++++--- 3 files changed, 19 insertions(+), 6 deletions(-) (limited to 'storage') diff --git a/storage/innobase/row/row0vers.c b/storage/innobase/row/row0vers.c index f4adfa855df..23aca8c3f2e 100644 --- a/storage/innobase/row/row0vers.c +++ b/storage/innobase/row/row0vers.c @@ -593,11 +593,15 @@ row_vers_build_for_semi_consistent_read( mutex_enter(&kernel_mutex); version_trx = trx_get_on_id(version_trx_id); + if (version_trx + && (version_trx->conc_state == TRX_COMMITTED_IN_MEMORY + || version_trx->conc_state == TRX_NOT_STARTED)) { + + version_trx = NULL; + } mutex_exit(&kernel_mutex); - if (!version_trx - || version_trx->conc_state == TRX_NOT_STARTED - || version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) { + if (!version_trx) { /* We found a version that belongs to a committed transaction: return it. */ diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 24cac7ac2be..d5e9a6bc825 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,8 @@ +2011-01-25 The InnoDB Team + + * row/row0vers.c: + Fix Bug#59464 Race condition in row_vers_build_for_semi_consistent_read + 2011-01-25 The InnoDB Team * btr/btr0btr.c, btr/btr0cur.c, btr/btr0sea.c, diff --git a/storage/innodb_plugin/row/row0vers.c b/storage/innodb_plugin/row/row0vers.c index b6d35363f08..d4fde0b939b 100644 --- a/storage/innodb_plugin/row/row0vers.c +++ b/storage/innodb_plugin/row/row0vers.c @@ -669,11 +669,15 @@ row_vers_build_for_semi_consistent_read( mutex_enter(&kernel_mutex); version_trx = trx_get_on_id(version_trx_id); + if (version_trx + && (version_trx->conc_state == TRX_COMMITTED_IN_MEMORY + || version_trx->conc_state == TRX_NOT_STARTED)) { + + version_trx = NULL; + } mutex_exit(&kernel_mutex); - if (!version_trx - || version_trx->conc_state == TRX_NOT_STARTED - || version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) { + if (!version_trx) { /* We found a version that belongs to a committed transaction: return it. */ -- cgit v1.2.1 From 896e0ba4e0304fbd1b056022d8e27f6ce146a83e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 25 Jan 2011 12:17:28 +0200 Subject: Bug#59486 Incorrect usage of UNIV_UNLIKELY() in mlog_parse_string() mlog_parse_string(): Enclose the comparison in UNIV_UNLIKELY, not the comparand. --- storage/innodb_plugin/ChangeLog | 5 +++++ storage/innodb_plugin/mtr/mtr0log.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'storage') diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index d5e9a6bc825..cac72fd3075 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,8 @@ +2011-01-25 The InnoDB Team + + * mtr/mtr0log.c: + Bug#59486 Incorrect usage of UNIV_UNLIKELY() in mlog_parse_string() + 2011-01-25 The InnoDB Team * row/row0vers.c: diff --git a/storage/innodb_plugin/mtr/mtr0log.c b/storage/innodb_plugin/mtr/mtr0log.c index 3f3dab36b76..3349036b5b3 100644 --- a/storage/innodb_plugin/mtr/mtr0log.c +++ b/storage/innodb_plugin/mtr/mtr0log.c @@ -408,7 +408,7 @@ mlog_parse_string( ptr += 2; if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) - || UNIV_UNLIKELY(len + offset) > UNIV_PAGE_SIZE) { + || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) { recv_sys->found_corrupt_log = TRUE; return(NULL); -- cgit v1.2.1 From e44703db76e87fccbcc2e51606f04b18b55a0544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 25 Jan 2011 15:43:08 +0200 Subject: Bug#59585 Fix 58912 introduces compiler warning due to potentially uninitialized variable row_upd_changes_ord_field_binary(): Initialize dfield_len to suppress the warning. The compiler cannot know that row_ext_lookup() does initialize dfield_len for us, as it is defined in a different module. --- storage/innodb_plugin/ChangeLog | 6 ++++++ storage/innodb_plugin/row/row0upd.c | 4 ++++ 2 files changed, 10 insertions(+) (limited to 'storage') diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index cac72fd3075..e2fdeecfcc1 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,9 @@ +2011-01-25 The InnoDB Team + + * row/row0upd.c: + Bug#59585 Fix 58912 introduces compiler warning + due to potentially uninitialized variable + 2011-01-25 The InnoDB Team * mtr/mtr0log.c: diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index 4aa1474a25b..691d263e6ed 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -1252,6 +1252,10 @@ row_upd_changes_ord_field_binary( || dfield_is_null(dfield)) { /* do nothing special */ } else if (UNIV_LIKELY_NULL(ext)) { + /* Silence a compiler warning without + silencing a Valgrind error. */ + dfield_len = 0; + UNIV_MEM_INVALID(&dfield_len, sizeof dfield_len); /* See if the column is stored externally. */ buf = row_ext_lookup(ext, col_no, &dfield_len); -- cgit v1.2.1 From 786ac62c82038ed42278b3699b0661f0bb3c80ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 27 Jan 2011 13:27:29 +0200 Subject: Bug#59440 Race condition in XA ROLLBACK and XA COMMIT after server restart trx_get_trx_by_xid(): Invalidate trx->xid after a successful lookup, so that subsequent callers will not find the same transaction. The only callers of trx_get_trx_by_xid() will be invoking innobase_commit_low() or innobase_rollback_trx(), and those code paths should not depend on trx->xid. rb://584 approved by Jimmy Yang --- storage/innobase/include/trx0trx.h | 5 +++-- storage/innobase/trx/trx0trx.c | 26 ++++++++++++-------------- storage/innodb_plugin/ChangeLog | 6 ++++++ storage/innodb_plugin/include/trx0trx.h | 4 ++-- storage/innodb_plugin/trx/trx0trx.c | 25 +++++++++++-------------- 5 files changed, 34 insertions(+), 32 deletions(-) (limited to 'storage') diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 97a47d9f46e..4652f45892e 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -198,8 +198,9 @@ which is in the prepared state */ trx_t * trx_get_trx_by_xid( /*===============*/ - /* out: trx or NULL */ - XID* xid); /* in: X/Open XA transaction identification */ + /* out: trx or NULL; + on match, the trx->xid will be invalidated */ + const XID* xid); /* in: X/Open XA transaction identifier */ /************************************************************************** If required, flushes the log to disk if we called trx_commit_for_mysql() with trx->flush_log_later == TRUE. */ diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c index 21f75e0818f..a82d7f452fc 100644 --- a/storage/innobase/trx/trx0trx.c +++ b/storage/innobase/trx/trx0trx.c @@ -2041,14 +2041,15 @@ which is in the prepared state */ trx_t* trx_get_trx_by_xid( /*===============*/ - /* out: trx or NULL */ - XID* xid) /* in: X/Open XA transaction identification */ + /* out: trx or NULL; + on match, the trx->xid will be invalidated */ + const XID* xid) /* in: X/Open XA transaction identifier */ { trx_t* trx; if (xid == NULL) { - return (NULL); + return(NULL); } mutex_enter(&kernel_mutex); @@ -2061,10 +2062,16 @@ trx_get_trx_by_xid( of gtrid_lenght+bqual_length bytes should be the same */ - if (xid->gtrid_length == trx->xid.gtrid_length + if (trx->conc_state == TRX_PREPARED + && xid->gtrid_length == trx->xid.gtrid_length && xid->bqual_length == trx->xid.bqual_length && memcmp(xid->data, trx->xid.data, xid->gtrid_length + xid->bqual_length) == 0) { + + /* Invalidate the XID, so that subsequent calls + will not find it. */ + memset(&trx->xid, 0, sizeof(trx->xid)); + trx->xid.formatID = -1; break; } @@ -2073,14 +2080,5 @@ trx_get_trx_by_xid( mutex_exit(&kernel_mutex); - if (trx) { - if (trx->conc_state != TRX_PREPARED) { - - return(NULL); - } - - return(trx); - } else { - return(NULL); - } + return(trx); } diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index e2fdeecfcc1..3e14b0052e7 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,9 @@ +2011-01-27 The InnoDB Team + + * include/trx0trx.h, trx/trx0trx.c: + Bug#59440 Race condition in XA ROLLBACK and XA COMMIT + after server restart + 2011-01-25 The InnoDB Team * row/row0upd.c: diff --git a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h index abd175d365b..833bae4a4ff 100644 --- a/storage/innodb_plugin/include/trx0trx.h +++ b/storage/innodb_plugin/include/trx0trx.h @@ -214,12 +214,12 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL */ +@return trx or NULL; on match, the trx->xid will be invalidated */ UNIV_INTERN trx_t * trx_get_trx_by_xid( /*===============*/ - XID* xid); /*!< in: X/Open XA transaction identification */ + const XID* xid); /*!< in: X/Open XA transaction identifier */ /**********************************************************************//** If required, flushes the log to disk if we called trx_commit_for_mysql() with trx->flush_log_later == TRUE. diff --git a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c index ee744fd58b1..f0bbf220815 100644 --- a/storage/innodb_plugin/trx/trx0trx.c +++ b/storage/innodb_plugin/trx/trx0trx.c @@ -2010,18 +2010,18 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL */ +@return trx or NULL; on match, the trx->xid will be invalidated */ UNIV_INTERN trx_t* trx_get_trx_by_xid( /*===============*/ - XID* xid) /*!< in: X/Open XA transaction identification */ + const XID* xid) /*!< in: X/Open XA transaction identifier */ { trx_t* trx; if (xid == NULL) { - return (NULL); + return(NULL); } mutex_enter(&kernel_mutex); @@ -2034,10 +2034,16 @@ trx_get_trx_by_xid( of gtrid_lenght+bqual_length bytes should be the same */ - if (xid->gtrid_length == trx->xid.gtrid_length + if (trx->conc_state == TRX_PREPARED + && xid->gtrid_length == trx->xid.gtrid_length && xid->bqual_length == trx->xid.bqual_length && memcmp(xid->data, trx->xid.data, xid->gtrid_length + xid->bqual_length) == 0) { + + /* Invalidate the XID, so that subsequent calls + will not find it. */ + memset(&trx->xid, 0, sizeof(trx->xid)); + trx->xid.formatID = -1; break; } @@ -2046,14 +2052,5 @@ trx_get_trx_by_xid( mutex_exit(&kernel_mutex); - if (trx) { - if (trx->conc_state != TRX_PREPARED) { - - return(NULL); - } - - return(trx); - } else { - return(NULL); - } + return(trx); } -- cgit v1.2.1 From 71e8043bae2071ba875b18326504b1058b8deb98 Mon Sep 17 00:00:00 2001 From: Jimmy Yang Date: Fri, 28 Jan 2011 00:50:10 -0800 Subject: Fix Bug #59465 btr_estimate_number_of_different_key_vals use incorrect offset for external_size rb://581 approved by Marko --- storage/innobase/btr/btr0cur.c | 10 +++++----- storage/innodb_plugin/ChangeLog | 7 +++++++ storage/innodb_plugin/btr/btr0cur.c | 10 +++++----- 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'storage') diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 9f4babfaae6..6c0497cbd41 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -2981,6 +2981,9 @@ btr_estimate_number_of_different_key_vals( while (!page_rec_is_supremum(rec)) { rec_t* next_rec = page_rec_get_next(rec); if (page_rec_is_supremum(next_rec)) { + total_external_size += + btr_rec_get_externally_stored_len( + rec, offsets_rec); break; } @@ -2988,7 +2991,8 @@ btr_estimate_number_of_different_key_vals( matched_bytes = 0; offsets_next_rec = rec_get_offsets(next_rec, index, offsets_next_rec, - n_cols, &heap); + ULINT_UNDEFINED, + &heap); cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, @@ -3043,10 +3047,6 @@ btr_estimate_number_of_different_key_vals( } } - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - ULINT_UNDEFINED, &heap); - total_external_size += btr_rec_get_externally_stored_len( - rec, offsets_rec); mtr_commit(&mtr); } diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 3e14b0052e7..7a901fc1fa1 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,9 @@ +2011-01-27 The InnoDB Team + + * btr/btr0cur.c: + Bug#59465 btr_estimate_number_of_different_key_vals use + incorrect offset for external_size + 2011-01-27 The InnoDB Team * include/trx0trx.h, trx/trx0trx.c: @@ -29,6 +35,7 @@ Fix Bug#59707 Unused compression-related parameters in buffer pool functions +>>>>>>> MERGE-SOURCE 2011-01-18 The InnoDB Team * include/sync0rw.h, sync/sync0arr.c, sync/sync0rw.c: diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index f41b125b281..874db3066b5 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -3365,6 +3365,9 @@ btr_estimate_number_of_different_key_vals( while (!page_rec_is_supremum(rec)) { rec_t* next_rec = page_rec_get_next(rec); if (page_rec_is_supremum(next_rec)) { + total_external_size += + btr_rec_get_externally_stored_len( + rec, offsets_rec); break; } @@ -3372,7 +3375,8 @@ btr_estimate_number_of_different_key_vals( matched_bytes = 0; offsets_next_rec = rec_get_offsets(next_rec, index, offsets_next_rec, - n_cols, &heap); + ULINT_UNDEFINED, + &heap); cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, @@ -3427,10 +3431,6 @@ btr_estimate_number_of_different_key_vals( } } - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - ULINT_UNDEFINED, &heap); - total_external_size += btr_rec_get_externally_stored_len( - rec, offsets_rec); mtr_commit(&mtr); } -- cgit v1.2.1 From e952ee1158be7611f3443bf9e1919652eb3b4602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 31 Jan 2011 09:56:51 +0200 Subject: Bug#59230 assert 0 row_upd_changes_ord_field_binary() in post-crash trx rollback or purge This patch does not relax the failing debug assertion during purge. That will be revisited once we have managed to repeat the assertion failure. row_upd_changes_ord_field_binary_func(): Renamed from row_upd_changes_ord_field_binary(). Add the parameter que_thr_t* in UNIV_DEBUG builds. When the off-page column cannot be retrieved, assert that the current transaction is a recovered one and that it is the one that is currently being rolled back. row_upd_changes_ord_field_binary(): A wrapper macro for row_upd_changes_ord_field_binary_func() that discards the que_thr_t* parameter unless UNIV_DEBUG is defined. row_purge_upd_exist_or_extern_func(): Renamed from row_purge_upd_exist_or_extern(). Add the parameter que_thr_t* in UNIV_DEBUG builds. row_purge_upd_exist_or_extern(): A wrapper macro for row_purge_upd_exist_or_extern_func() that discards the que_thr_t* parameter unless UNIV_DEBUG is defined. Make trx_roll_crash_recv_trx const. If there were a 'do not dereference' attribute, it would be appropriate as well. rb://588 approved by Jimmy Yang --- storage/innodb_plugin/ChangeLog | 7 ++++++ storage/innodb_plugin/btr/btr0cur.c | 4 ++-- storage/innodb_plugin/include/row0upd.h | 26 ++++++++++++++------- storage/innodb_plugin/row/row0purge.c | 21 +++++++++++++---- storage/innodb_plugin/row/row0umod.c | 5 ++-- storage/innodb_plugin/row/row0upd.c | 41 +++++++++++++++++++++------------ storage/innodb_plugin/trx/trx0roll.c | 4 ++-- 7 files changed, 74 insertions(+), 34 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 18ab48f32a5..0cbdc8ed9d2 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,10 @@ +2011-01-31 The InnoDB Team + + * btr/btr0cur.c, include/row0upd.h, + row/row0purge.c, row/row0umod.c, row/row0upd.c: + Bug#59230 assert 0 row_upd_changes_ord_field_binary() + in post-crash rollback or purge + 2011-01-27 The InnoDB Team * btr/btr0cur.c: diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 874db3066b5..143135ef24c 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -1768,8 +1768,8 @@ btr_cur_update_in_place( NOT call it if index is secondary */ if (!dict_index_is_clust(index) - || row_upd_changes_ord_field_binary(NULL, NULL, - index, update)) { + || row_upd_changes_ord_field_binary(index, update, thr, + NULL, NULL)) { /* Remove possible hash index pointer to this record */ btr_search_update_hash_on_delete(cursor); diff --git a/storage/innodb_plugin/include/row0upd.h b/storage/innodb_plugin/include/row0upd.h index b61e6b6dca1..97b7ec49a17 100644 --- a/storage/innodb_plugin/include/row0upd.h +++ b/storage/innodb_plugin/include/row0upd.h @@ -280,19 +280,29 @@ NOTE: we compare the fields as binary strings! @return TRUE if update vector changes an ordering field in the index record */ UNIV_INTERN ibool -row_upd_changes_ord_field_binary( -/*=============================*/ +row_upd_changes_ord_field_binary_func( +/*==================================*/ + dict_index_t* index, /*!< in: index of the record */ + const upd_t* update, /*!< in: update vector for the row; NOTE: the + field numbers in this MUST be clustered index + positions! */ +#ifdef UNIV_DEBUG + const que_thr_t*thr, /*!< in: query thread */ +#endif /* UNIV_DEBUG */ const dtuple_t* row, /*!< in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at compile time */ - const row_ext_t*ext, /*!< NULL, or prefixes of the externally + const row_ext_t*ext) /*!< NULL, or prefixes of the externally stored columns in the old row */ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update) /*!< in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ - __attribute__((nonnull(3,4), warn_unused_result)); + __attribute__((nonnull(1,2), warn_unused_result)); +#ifdef UNIV_DEBUG +# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \ + row_upd_changes_ord_field_binary_func(index,update,thr,row,ext) +#else /* UNIV_DEBUG */ +# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \ + row_upd_changes_ord_field_binary_func(index,update,row,ext) +#endif /* UNIV_DEBUG */ /***********************************************************//** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering diff --git a/storage/innodb_plugin/row/row0purge.c b/storage/innodb_plugin/row/row0purge.c index 8bf2ae0f458..c91ec2e8a3b 100644 --- a/storage/innodb_plugin/row/row0purge.c +++ b/storage/innodb_plugin/row/row0purge.c @@ -387,8 +387,11 @@ Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void -row_purge_upd_exist_or_extern( -/*==========================*/ +row_purge_upd_exist_or_extern_func( +/*===============================*/ +#ifdef UNIV_DEBUG + const que_thr_t*thr, /*!< in: query thread */ +#endif /* UNIV_DEBUG */ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; @@ -413,8 +416,8 @@ row_purge_upd_exist_or_extern( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary(NULL, NULL, node->index, - node->update)) { + if (row_upd_changes_ord_field_binary(node->index, node->update, + thr, NULL, NULL)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); @@ -496,6 +499,14 @@ skip_secondaries: } } +#ifdef UNIV_DEBUG +# define row_purge_upd_exist_or_extern(thr,node) \ + row_purge_upd_exist_or_extern_func(thr,node) +#else /* UNIV_DEBUG */ +# define row_purge_upd_exist_or_extern(thr,node) \ + row_purge_upd_exist_or_extern_func(node) +#endif /* UNIV_DEBUG */ + /***********************************************************//** Parses the row reference and other info in a modify undo log record. @return TRUE if purge operation required: NOTE that then the CALLER @@ -654,7 +665,7 @@ row_purge( } else if (updated_extern || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { - row_purge_upd_exist_or_extern(node); + row_purge_upd_exist_or_extern(thr, node); } if (node->found_clust) { diff --git a/storage/innodb_plugin/row/row0umod.c b/storage/innodb_plugin/row/row0umod.c index 562f8093c38..f7736935489 100644 --- a/storage/innodb_plugin/row/row0umod.c +++ b/storage/innodb_plugin/row/row0umod.c @@ -668,8 +668,9 @@ row_undo_mod_upd_exist_sec( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary( - node->row, node->ext, node->index, node->update)) { + if (row_upd_changes_ord_field_binary(node->index, node->update, + thr, + node->row, node->ext)) { /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index 691d263e6ed..b5d4aeb434e 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -1192,25 +1192,31 @@ NOTE: we compare the fields as binary strings! @return TRUE if update vector changes an ordering field in the index record */ UNIV_INTERN ibool -row_upd_changes_ord_field_binary( -/*=============================*/ +row_upd_changes_ord_field_binary_func( +/*==================================*/ + dict_index_t* index, /*!< in: index of the record */ + const upd_t* update, /*!< in: update vector for the row; NOTE: the + field numbers in this MUST be clustered index + positions! */ +#ifdef UNIV_DEBUG + const que_thr_t*thr, /*!< in: query thread */ +#endif /* UNIV_DEBUG */ const dtuple_t* row, /*!< in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at compile time */ - const row_ext_t*ext, /*!< NULL, or prefixes of the externally + const row_ext_t*ext) /*!< NULL, or prefixes of the externally stored columns in the old row */ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update) /*!< in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ { ulint n_unique; ulint i; const dict_index_t* clust_index; - ut_ad(update); ut_ad(index); + ut_ad(update); + ut_ad(thr); + ut_ad(thr->graph); + ut_ad(thr->graph->trx); n_unique = dict_index_get_n_unique(index); @@ -1263,9 +1269,14 @@ row_upd_changes_ord_field_binary( if (UNIV_LIKELY_NULL(buf)) { if (UNIV_UNLIKELY(buf == field_ref_zero)) { - /* This should never happen, but - we try to fail safe here. */ - ut_ad(0); + /* The externally stored field + was not written yet. This + record should only be seen by + recv_recovery_rollback_active(), + when the server had crashed before + storing the field. */ + ut_ad(thr->graph->trx->is_recovered); + ut_ad(trx_is_recv(thr->graph->trx)); return(TRUE); } @@ -1612,8 +1623,8 @@ row_upd_sec_step( ut_ad(!dict_index_is_clust(node->index)); if (node->state == UPD_NODE_UPDATE_ALL_SEC - || row_upd_changes_ord_field_binary(node->row, node->ext, - node->index, node->update)) { + || row_upd_changes_ord_field_binary(node->index, node->update, + thr, node->row, node->ext)) { return(row_upd_sec_index_entry(node, thr)); } @@ -2140,8 +2151,8 @@ exit_func: row_upd_store_row(node); - if (row_upd_changes_ord_field_binary(node->row, node->ext, index, - node->update)) { + if (row_upd_changes_ord_field_binary(index, node->update, thr, + node->row, node->ext)) { /* Update causes an ordering field (ordering fields within the B-tree) of the clustered index record to change: perform diff --git a/storage/innodb_plugin/trx/trx0roll.c b/storage/innodb_plugin/trx/trx0roll.c index 1a43e419214..a4bbf7fd652 100644 --- a/storage/innodb_plugin/trx/trx0roll.c +++ b/storage/innodb_plugin/trx/trx0roll.c @@ -48,8 +48,8 @@ Created 3/26/1996 Heikki Tuuri rollback */ #define TRX_ROLL_TRUNC_THRESHOLD 1 -/** In crash recovery, the current trx to be rolled back */ -static trx_t* trx_roll_crash_recv_trx = NULL; +/** In crash recovery, the current trx to be rolled back; NULL otherwise */ +static const trx_t* trx_roll_crash_recv_trx = NULL; /** In crash recovery we set this to the undo n:o of the current trx to be rolled back. Then we can print how many % the rollback has progressed. */ -- cgit v1.2.1 From 1f796c2b646824aeac2927599ba3e51594cd3534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 2 Feb 2011 14:05:12 +0200 Subject: Bug #55284 diagnostics: Enable UNIV_DEBUG_FILE_ACCESSES by UNIV_DEBUG It was the enabling of UNIV_DEBUG_FILE_ACCESSES that caught Bug #55284 in the first place. This is a very light piece of of debug code, and there really is no reason why it is not enabled in all debug builds. rb://551 approved by Jimmy Yang --- storage/innodb_plugin/buf/buf0buf.c | 30 ++++++++++++++++-------------- storage/innodb_plugin/fsp/fsp0fsp.c | 8 ++++---- storage/innodb_plugin/ibuf/ibuf0ibuf.c | 8 ++++---- storage/innodb_plugin/include/buf0buf.h | 8 ++++---- storage/innodb_plugin/include/univ.i | 5 ++--- 5 files changed, 30 insertions(+), 29 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index 6e76e4c65be..6bbd5565c58 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -657,9 +657,9 @@ buf_block_init( block->modify_clock = 0; -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG block->page.file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ block->check_index_page_at_flush = FALSE; block->index = NULL; @@ -1600,7 +1600,7 @@ buf_page_peek_if_search_hashed( return(is_hashed); } -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG /********************************************************************//** Sets file_page_was_freed TRUE if the page is found in the buffer pool. This function should be called when we free a file page and want the @@ -1621,6 +1621,8 @@ buf_page_set_file_page_was_freed( bpage = buf_page_hash_get(space, offset); if (bpage) { + /* bpage->file_page_was_freed can already hold + when this code is invoked from dict_drop_index_tree() */ bpage->file_page_was_freed = TRUE; } @@ -1656,7 +1658,7 @@ buf_page_reset_file_page_was_freed( return(bpage); } -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ /********************************************************************//** Get read access to a compressed page (usually of type @@ -1753,7 +1755,7 @@ got_block: buf_page_set_accessed_make_young(bpage, access_time); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!bpage->file_page_was_freed); #endif @@ -2321,7 +2323,7 @@ wait_until_unfixed: buf_page_set_accessed_make_young(&block->page, access_time); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!block->page.file_page_was_freed); #endif @@ -2479,7 +2481,7 @@ buf_page_optimistic_get( ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); #endif if (UNIV_UNLIKELY(!access_time)) { @@ -2587,7 +2589,7 @@ buf_page_get_known_nowait( ut_a(block->page.buf_fix_count > 0); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); #endif @@ -2670,9 +2672,9 @@ buf_page_try_get_func( ut_a(block->page.buf_fix_count > 0); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); buf_pool->stat.n_page_gets++; @@ -2701,9 +2703,9 @@ buf_page_init_low( bpage->newest_modification = 0; bpage->oldest_modification = 0; HASH_INVALIDATE(bpage, hash); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG bpage->file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ } /********************************************************************//** @@ -3009,9 +3011,9 @@ buf_page_create( #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(space, offset) == 0); #endif -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG block->page.file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ /* Page can be found in buf_pool */ buf_pool_mutex_exit(); diff --git a/storage/innodb_plugin/fsp/fsp0fsp.c b/storage/innodb_plugin/fsp/fsp0fsp.c index e9d24b8fdf6..d091a14c474 100644 --- a/storage/innodb_plugin/fsp/fsp0fsp.c +++ b/storage/innodb_plugin/fsp/fsp0fsp.c @@ -3444,9 +3444,9 @@ fseg_free_page( fseg_free_page_low(seg_inode, space, zip_size, page, mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG buf_page_set_file_page_was_freed(space, page); -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ } /**********************************************************************//** @@ -3513,13 +3513,13 @@ fseg_free_extent( fsp_free_extent(space, zip_size, page, mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG for (i = 0; i < FSP_EXTENT_SIZE; i++) { buf_page_set_file_page_was_freed(space, first_page_in_extent + i); } -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ } /**********************************************************************//** diff --git a/storage/innodb_plugin/ibuf/ibuf0ibuf.c b/storage/innodb_plugin/ibuf/ibuf0ibuf.c index 701e8f0ef04..23981ac388e 100644 --- a/storage/innodb_plugin/ibuf/ibuf0ibuf.c +++ b/storage/innodb_plugin/ibuf/ibuf0ibuf.c @@ -1878,9 +1878,9 @@ ibuf_remove_free_page(void) fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, IBUF_SPACE_ID, page_no, &mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no); -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ ibuf_enter(); @@ -1922,9 +1922,9 @@ ibuf_remove_free_page(void) ibuf_bitmap_page_set_bits( bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no); -#endif +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ mtr_commit(&mtr); mutex_exit(&ibuf_mutex); diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h index d903b443920..a16de67aa3a 100644 --- a/storage/innodb_plugin/include/buf0buf.h +++ b/storage/innodb_plugin/include/buf0buf.h @@ -368,7 +368,7 @@ buf_reset_check_index_page_at_flush( /*================================*/ ulint space, /*!< in: space id */ ulint offset);/*!< in: page number */ -#ifdef UNIV_DEBUG_FILE_ACCESSES +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG /********************************************************************//** Sets file_page_was_freed TRUE if the page is found in the buffer pool. This function should be called when we free a file page and want the @@ -393,7 +393,7 @@ buf_page_reset_file_page_was_freed( /*===============================*/ ulint space, /*!< in: space id */ ulint offset); /*!< in: page number */ -#endif /* UNIV_DEBUG_FILE_ACCESSES */ +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ /********************************************************************//** Reads the freed_page_clock of a buffer block. @return freed_page_clock */ @@ -1135,11 +1135,11 @@ struct buf_page_struct{ 0 if the block was never accessed in the buffer pool */ /* @} */ -# ifdef UNIV_DEBUG_FILE_ACCESSES +# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ibool file_page_was_freed; /*!< this is set to TRUE when fsp frees a page in buffer pool */ -# endif /* UNIV_DEBUG_FILE_ACCESSES */ +# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ #endif /* !UNIV_HOTBACKUP */ }; diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i index 4425950748b..4ba48b10896 100644 --- a/storage/innodb_plugin/include/univ.i +++ b/storage/innodb_plugin/include/univ.i @@ -182,9 +182,8 @@ command. Not tested on Windows. */ #define UNIV_DEBUG_LOCK_VALIDATE /* Enable ut_ad(lock_rec_validate_page()) assertions. */ -#define UNIV_DEBUG_FILE_ACCESSES /* Debug .ibd file access - (field file_page_was_freed - in buf_page_t) */ +#define UNIV_DEBUG_FILE_ACCESSES /* Enable freed block access + debugging without UNIV_DEBUG */ #define UNIV_LRU_DEBUG /* debug the buffer pool LRU */ #define UNIV_HASH_DEBUG /* debug HASH_ macros */ #define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */ -- cgit v1.2.1 From f2eacde4cd905303ff906d9e33308b8ba03c17f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 2 Feb 2011 14:10:12 +0200 Subject: Bug #55284 diagnostics: When UNIV_DEBUG, do not tolerate garbage in Antelope files in btr_check_blob_fil_page_type(). Unfortunately, we must keep the check in production builds, because InnoDB wrote uninitialized garbage to FIL_PAGE_TYPE until fairly recently (5.1.x). rb://546 approved by Jimmy Yang --- storage/innodb_plugin/btr/btr0cur.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'storage') diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 143135ef24c..46cd4a81ec5 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -4169,6 +4169,7 @@ btr_check_blob_fil_page_type( if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) { ulint flags = fil_space_get_flags(space_id); +#ifndef UNIV_DEBUG /* Improve debug test coverage */ if (UNIV_LIKELY ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) { /* Old versions of InnoDB did not initialize @@ -4177,6 +4178,7 @@ btr_check_blob_fil_page_type( a BLOB page that is in Antelope format.*/ return; } +#endif /* !UNIV_DEBUG */ ut_print_timestamp(stderr); fprintf(stderr, -- cgit v1.2.1 From 7c45708f5d29747da043aea144c2f8ba8c142c08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 2 Feb 2011 14:12:49 +0200 Subject: Non-functional changes (cleanup) made while narrowing down Bug #55284: row_purge(): Change the return type to void. (The return value always was DB_SUCCESS.) Remove some local variables. row_undo_mod_remove_clust_low(): Remove some local variables. rb://547 approved by Jimmy Yang --- storage/innodb_plugin/row/row0purge.c | 47 ++++++++--------------------------- storage/innodb_plugin/row/row0umod.c | 30 ++++++---------------- 2 files changed, 19 insertions(+), 58 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/row/row0purge.c b/storage/innodb_plugin/row/row0purge.c index c91ec2e8a3b..752a2ec9e83 100644 --- a/storage/innodb_plugin/row/row0purge.c +++ b/storage/innodb_plugin/row/row0purge.c @@ -613,47 +613,32 @@ err_exit: /***********************************************************//** Fetches an undo log record and does the purge for the recorded operation. If none left, or the current purge completed, returns the control to the -parent node, which is always a query thread node. -@return DB_SUCCESS if operation successfully completed, else error code */ -static -ulint +parent node, which is always a query thread node. */ +static __attribute__((nonnull)) +void row_purge( /*======*/ purge_node_t* node, /*!< in: row purge node */ que_thr_t* thr) /*!< in: query thread */ { - roll_ptr_t roll_ptr; - ibool purge_needed; ibool updated_extern; - trx_t* trx; - - ut_ad(node && thr); - trx = thr_get_trx(thr); + ut_ad(node); + ut_ad(thr); - node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr, - &(node->reservation), + node->undo_rec = trx_purge_fetch_next_rec(&node->roll_ptr, + &node->reservation, node->heap); if (!node->undo_rec) { /* Purge completed for this query thread */ thr->run_node = que_node_get_parent(node); - return(DB_SUCCESS); - } - - node->roll_ptr = roll_ptr; - - if (node->undo_rec == &trx_purge_dummy_rec) { - purge_needed = FALSE; - } else { - purge_needed = row_purge_parse_undo_rec(node, &updated_extern, - thr); - /* If purge_needed == TRUE, we must also remember to unfreeze - data dictionary! */ + return; } - if (purge_needed) { + if (node->undo_rec != &trx_purge_dummy_rec + && row_purge_parse_undo_rec(node, &updated_extern, thr)) { node->found_clust = FALSE; node->index = dict_table_get_next_index( @@ -672,7 +657,7 @@ row_purge( btr_pcur_close(&(node->pcur)); } - row_mysql_unfreeze_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary(thr_get_trx(thr)); } /* Do some cleanup */ @@ -680,8 +665,6 @@ row_purge( mem_heap_empty(node->heap); thr->run_node = node; - - return(DB_SUCCESS); } /***********************************************************//** @@ -695,9 +678,6 @@ row_purge_step( que_thr_t* thr) /*!< in: query thread */ { purge_node_t* node; -#ifdef UNIV_DEBUG - ulint err; -#endif /* UNIV_DEBUG */ ut_ad(thr); @@ -705,12 +685,7 @@ row_purge_step( ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); -#ifdef UNIV_DEBUG - err = -#endif /* UNIV_DEBUG */ row_purge(node, thr); - ut_ad(err == DB_SUCCESS); - return(thr); } diff --git a/storage/innodb_plugin/row/row0umod.c b/storage/innodb_plugin/row/row0umod.c index f7736935489..5202a498eed 100644 --- a/storage/innodb_plugin/row/row0umod.c +++ b/storage/innodb_plugin/row/row0umod.c @@ -173,40 +173,26 @@ row_undo_mod_remove_clust_low( mtr_t* mtr, /*!< in: mtr */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { - btr_pcur_t* pcur; btr_cur_t* btr_cur; ulint err; - ibool success; ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); - pcur = &(node->pcur); - btr_cur = btr_pcur_get_btr_cur(pcur); - success = btr_pcur_restore_position(mode, pcur, mtr); + /* Find out if the record has been purged already + or if we can remove it. */ - if (!success) { + if (!btr_pcur_restore_position(mode, &node->pcur, mtr) + || row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) { return(DB_SUCCESS); } - /* Find out if we can remove the whole clustered index record */ - - if (node->rec_type == TRX_UNDO_UPD_DEL_REC - && !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) { - - /* Ok, we can remove */ - } else { - return(DB_SUCCESS); - } + btr_cur = btr_pcur_get_btr_cur(&node->pcur); if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete(btr_cur, mtr); - - if (success) { - err = DB_SUCCESS; - } else { - err = DB_FAIL; - } + err = btr_cur_optimistic_delete(btr_cur, mtr) + ? DB_SUCCESS + : DB_FAIL; } else { ut_ad(mode == BTR_MODIFY_TREE); -- cgit v1.2.1 From 5adf2313f713821841d979472eef0d7b07658965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 2 Feb 2011 15:51:08 +0200 Subject: Bug #55284 diagnostics: Introduce UNIV_BLOB_LIGHT_DEBUG, enabled by UNIV_DEBUG btr_rec_get_field_ref_offs(), btr_rec_get_field_ref(): New functions. Get the pointer to an externally stored field. btr_cur_set_ownership_of_extern_field(): Assert that the BLOB has not already been disowned. btr_store_big_rec_extern_fields(): Rename to btr_store_big_rec_extern_fields_func() and add the debug parameter update_in_place. All pointers to externally stored columns in the record must either be zero or they must be pointers to inherited columns, owned by this record or an earlier record version. For any BLOB that is stored, the BLOB pointer must previously have been zero. When the function completes, all BLOB pointers must be nonzero and owned by the record. rb://549 approved by Jimmy Yang --- storage/innodb_plugin/btr/btr0cur.c | 150 ++++++++++++++++++++++---------- storage/innodb_plugin/include/btr0cur.h | 42 +++++++-- storage/innodb_plugin/include/univ.i | 2 + storage/innodb_plugin/row/row0ins.c | 2 +- storage/innodb_plugin/row/row0upd.c | 2 +- 5 files changed, 144 insertions(+), 54 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 46cd4a81ec5..704cc606a5f 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -186,7 +186,7 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - rec_t* rec, /*!< in: record */ + const rec_t* rec, /*!< in: record */ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ #endif /* !UNIV_HOTBACKUP */ @@ -3483,6 +3483,35 @@ btr_estimate_number_of_different_key_vals( /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ +/***********************************************************//** +Gets the offset of the pointer to the externally stored part of a field. +@return offset of the pointer to the externally stored part */ +static +ulint +btr_rec_get_field_ref_offs( +/*=======================*/ + const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ + ulint n) /*!< in: index of the external field */ +{ + ulint field_ref_offs; + ulint local_len; + + ut_a(rec_offs_nth_extern(offsets, n)); + field_ref_offs = rec_get_nth_field_offs(offsets, n, &local_len); + ut_a(local_len != UNIV_SQL_NULL); + ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); + + return(field_ref_offs + local_len - BTR_EXTERN_FIELD_REF_SIZE); +} + +/** Gets a pointer to the externally stored part of a field. +@param rec record +@param offsets rec_get_offsets(rec) +@param n index of the externally stored field +@return pointer to the externally stored part */ +#define btr_rec_get_field_ref(rec, offsets, n) \ + ((rec) + btr_rec_get_field_ref_offs(offsets, n)) + /***********************************************************//** Gets the externally stored size of a record, in units of a database page. @return externally stored part, in units of a database page */ @@ -3490,28 +3519,27 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - rec_t* rec, /*!< in: record */ + const rec_t* rec, /*!< in: record */ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ { ulint n_fields; - byte* data; - ulint local_len; - ulint extern_len; ulint total_extern_len = 0; ulint i; ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + + if (!rec_offs_any_extern(offsets)) { + return(0); + } + n_fields = rec_offs_n_fields(offsets); for (i = 0; i < n_fields; i++) { if (rec_offs_nth_extern(offsets, i)) { - data = rec_get_nth_field(rec, offsets, i, &local_len); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - extern_len = mach_read_from_4(data + local_len - + BTR_EXTERN_LEN + 4); + ulint extern_len = mach_read_from_4( + btr_rec_get_field_ref(rec, offsets, i) + + BTR_EXTERN_LEN + 4); total_extern_len += ut_calc_align(extern_len, UNIV_PAGE_SIZE); @@ -3541,7 +3569,7 @@ btr_cur_set_ownership_of_extern_field( ulint byte_val; data = rec_get_nth_field(rec, offsets, i, &local_len); - + ut_ad(rec_offs_nth_extern(offsets, i)); ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); local_len -= BTR_EXTERN_FIELD_REF_SIZE; @@ -3551,6 +3579,9 @@ btr_cur_set_ownership_of_extern_field( if (val) { byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); } else { +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!(byte_val & BTR_EXTERN_OWNER_FLAG)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ byte_val = byte_val | BTR_EXTERN_OWNER_FLAG; } @@ -3788,8 +3819,8 @@ file segment of the index tree. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint -btr_store_big_rec_extern_fields( -/*============================*/ +btr_store_big_rec_extern_fields_func( +/*=================================*/ dict_index_t* index, /*!< in: index of rec; the index tree MUST be X-latched */ buf_block_t* rec_block, /*!< in/out: block containing rec */ @@ -3798,11 +3829,17 @@ btr_store_big_rec_extern_fields( the "external storage" flags in offsets will not correspond to rec when this function returns */ - big_rec_t* big_rec_vec, /*!< in: vector containing fields +#ifdef UNIV_DEBUG + mtr_t* local_mtr, /*!< in: mtr containing the + latch to rec and to the tree */ +#endif /* UNIV_DEBUG */ +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ibool update_in_place,/*! in: TRUE if the record is updated + in place (not delete+insert) */ +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + const big_rec_t*big_rec_vec) /*!< in: vector containing fields to be stored externally */ - mtr_t* local_mtr __attribute__((unused))) /*!< in: mtr - containing the latch to rec and to the - tree */ + { ulint rec_page_no; byte* field_ref; @@ -3820,6 +3857,7 @@ btr_store_big_rec_extern_fields( z_stream c_stream; ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(rec_offs_any_extern(offsets)); ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); @@ -3851,21 +3889,37 @@ btr_store_big_rec_extern_fields( ut_a(err == Z_OK); } +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* All pointers to externally stored columns in the record + must either be zero or they must be pointers to inherited + columns, owned by this record or an earlier record version. */ + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (!rec_offs_nth_extern(offsets, i)) { + continue; + } + field_ref = btr_rec_get_field_ref(rec, offsets, i); + + ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); + /* Either this must be an update in place, + or the BLOB must be inherited, or the BLOB pointer + must be zero (will be written in this function). */ + ut_a(update_in_place + || (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG) + || !memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + } +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ /* We have to create a file segment to the tablespace for each field and put the pointer to the field in rec */ for (i = 0; i < big_rec_vec->n_fields; i++) { - ut_ad(rec_offs_nth_extern(offsets, - big_rec_vec->fields[i].field_no)); - { - ulint local_len; - field_ref = rec_get_nth_field( - rec, offsets, big_rec_vec->fields[i].field_no, - &local_len); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - field_ref += local_len; - } + field_ref = btr_rec_get_field_ref( + rec, offsets, big_rec_vec->fields[i].field_no); +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* A zero BLOB pointer should have been initially inserted. */ + ut_a(!memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ extern_len = big_rec_vec->fields[i].len; UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data, extern_len); @@ -4147,6 +4201,23 @@ next_zip_page: mem_heap_free(heap); } +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* All pointers to externally stored columns in the record + must be valid. */ + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (!rec_offs_nth_extern(offsets, i)) { + continue; + } + + field_ref = btr_rec_get_field_ref(rec, offsets, i); + + /* The pointer must not be zero. */ + ut_a(0 != memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + /* The column must not be disowned by this record. */ + ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); + } +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ return(DB_SUCCESS); } @@ -4228,23 +4299,13 @@ btr_free_externally_stored_field( ulint page_no; ulint next_page_no; mtr_t mtr; -#ifdef UNIV_DEBUG + ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains_page(local_mtr, field_ref, MTR_MEMO_PAGE_X_FIX)); ut_ad(!rec || rec_offs_validate(rec, index, offsets)); - - if (rec) { - ulint local_len; - const byte* f = rec_get_nth_field(rec, offsets, - i, &local_len); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - f += local_len; - ut_ad(f == field_ref); - } -#endif /* UNIV_DEBUG */ + ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i)); if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { @@ -4409,13 +4470,8 @@ btr_rec_free_externally_stored_fields( for (i = 0; i < n_fields; i++) { if (rec_offs_nth_extern(offsets, i)) { - ulint len; - byte* data - = rec_get_nth_field(rec, offsets, i, &len); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - btr_free_externally_stored_field( - index, data + len - BTR_EXTERN_FIELD_REF_SIZE, + index, btr_rec_get_field_ref(rec, offsets, i), rec, offsets, page_zip, i, rb_ctx, mtr); } } diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h index cb8cb399715..ece3621fa97 100644 --- a/storage/innodb_plugin/include/btr0cur.h +++ b/storage/innodb_plugin/include/btr0cur.h @@ -512,8 +512,8 @@ file segment of the index tree. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint -btr_store_big_rec_extern_fields( -/*============================*/ +btr_store_big_rec_extern_fields_func( +/*=================================*/ dict_index_t* index, /*!< in: index of rec; the index tree MUST be X-latched */ buf_block_t* rec_block, /*!< in/out: block containing rec */ @@ -522,10 +522,42 @@ btr_store_big_rec_extern_fields( the "external storage" flags in offsets will not correspond to rec when this function returns */ - big_rec_t* big_rec_vec, /*!< in: vector containing fields +#ifdef UNIV_DEBUG + mtr_t* local_mtr, /*!< in: mtr containing the + latch to rec and to the tree */ +#endif /* UNIV_DEBUG */ +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ibool update_in_place,/*! in: TRUE if the record is updated + in place (not delete+insert) */ +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + const big_rec_t*big_rec_vec) /*!< in: vector containing fields to be stored externally */ - mtr_t* local_mtr); /*!< in: mtr containing the latch to - rec and to the tree */ + __attribute__((nonnull)); + +/** Stores the fields in big_rec_vec to the tablespace and puts pointers to +them in rec. The extern flags in rec will have to be set beforehand. +The fields are stored on pages allocated from leaf node +file segment of the index tree. +@param index in: clustered index; MUST be X-latched by mtr +@param b in/out: block containing rec; MUST be X-latched by mtr +@param rec in/out: clustered index record +@param offsets in: rec_get_offsets(rec, index); + the "external storage" flags in offsets will not be adjusted +@param mtr in: mini-transaction that holds x-latch on index and b +@param upd in: TRUE if the record is updated in place (not delete+insert) +@param big in: vector containing fields to be stored externally +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +#ifdef UNIV_DEBUG +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big) +#elif defined UNIV_BLOB_LIGHT_DEBUG +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big) +#else +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big) +#endif + /*******************************************************************//** Frees the space in an externally stored field to the file space management if the field in data is owned the externally stored field, diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i index 4ba48b10896..690bfd5d6a9 100644 --- a/storage/innodb_plugin/include/univ.i +++ b/storage/innodb_plugin/include/univ.i @@ -177,6 +177,8 @@ command. Not tested on Windows. */ debugging without UNIV_DEBUG */ #define UNIV_BUF_DEBUG /* Enable buffer pool debugging without UNIV_DEBUG */ +#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column + debugging without UNIV_DEBUG */ #define UNIV_DEBUG /* Enable ut_ad() assertions and disable UNIV_INLINE */ #define UNIV_DEBUG_LOCK_VALIDATE /* Enable diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c index 298c601c7e3..8050c099751 100644 --- a/storage/innodb_plugin/row/row0ins.c +++ b/storage/innodb_plugin/row/row0ins.c @@ -2130,7 +2130,7 @@ function_exit: err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(&cursor), - rec, offsets, big_rec, &mtr); + rec, offsets, &mtr, FALSE, big_rec); if (modify) { dtuple_big_rec_free(big_rec); diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index b5d4aeb434e..9ded3d68018 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -1952,7 +1952,7 @@ row_upd_clust_rec( index, btr_cur_get_block(btr_cur), rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), - big_rec, mtr); + mtr, TRUE, big_rec); mtr_commit(mtr); } -- cgit v1.2.1 From 89621ad7387fb206023e8767b164ad6750d8e43b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 8 Feb 2011 12:56:23 +0200 Subject: Implement UNIV_BLOB_DEBUG. An early version of this caught Bug #55284. This option is known to be broken when tablespaces contain off-page columns after crash recovery. It has only been tested when creating the data files from the scratch. btr_blob_dbg_t: A map from page_no:heap_no:field_no to first_blob_page_no. This map is instantiated for every clustered index in index->blobs. It is protected by index->blobs_mutex. btr_blob_dbg_msg_issue(): Issue a diagnostic message. Invoked when btr_blob_dbg_msg is set. btr_blob_dbg_rbt_insert(): Insert a btr_blob_dbg_t into index->blobs. btr_blob_dbg_rbt_delete(): Remove a btr_blob_dbg_t from index->blobs. btr_blob_dbg_cmp(): Comparator for btr_blob_dbg_t. btr_blob_dbg_add_blob(): Add a BLOB reference to the map. btr_blob_dbg_add_rec(): Add all BLOB references from a record to the map. btr_blob_dbg_print(): Display the map of BLOB references in an index. btr_blob_dbg_remove_rec(): Remove all BLOB references of a record from the map. btr_blob_dbg_is_empty(): Check that no BLOB references exist to or from a page. Disowned references from delete-marked records are tolerated. btr_blob_dbg_op(): Perform an operation on all BLOB references on a B-tree page. btr_blob_dbg_add(): Add all BLOB references from a B-tree page to the map. btr_blob_dbg_remove(): Remove all BLOB references from a B-tree page from the map. btr_blob_dbg_restore(): Restore the BLOB references after a failed page reorganize. btr_blob_dbg_set_deleted_flag(): Modify the 'deleted' flag in the BLOB references of a record. btr_blob_dbg_owner(): Own or disown a BLOB reference. btr_page_create(), btr_page_free_low(): Assert that no BLOB references exist. btr_create(): Create index->blobs for clustered indexes. btr_page_reorganize_low(): Invoke btr_blob_dbg_remove() before copying the records. Invoke btr_blob_dbg_restore() if the operation fails. btr_page_empty(), btr_lift_page_up(), btr_compress(), btr_discard_page(): Invoke btr_blob_dbg_remove(). btr_cur_del_mark_set_clust_rec(): Invoke btr_blob_dbg_set_deleted_flag(). Other cases of modifying the delete mark are either in the secondary index or during crash recovery, which we do not promise to support. btr_cur_set_ownership_of_extern_field(): Invoke btr_blob_dbg_owner(). btr_store_big_rec_extern_fields(): Invoke btr_blob_dbg_add_blob(). btr_free_externally_stored_field(): Invoke btr_blob_dbg_assert_empty() on the first BLOB page. page_cur_insert_rec_low(), page_cur_insert_rec_zip(), page_copy_rec_list_end_to_created_page(): Invoke btr_blob_dbg_add_rec(). page_cur_insert_rec_zip_reorg(), page_copy_rec_list_end(), page_copy_rec_list_start(): After failure, invoke btr_blob_dbg_remove() and btr_blob_dbg_add(). page_cur_delete_rec(): Invoke btr_blob_dbg_remove_rec(). page_delete_rec_list_end(): Invoke btr_blob_dbg_op(btr_blob_dbg_remove_rec). page_zip_reorganize(): Invoke btr_blob_dbg_remove() before copying the records. page_zip_copy_recs(): Invoke btr_blob_dbg_add(). row_upd_rec_in_place(): Invoke btr_blob_dbg_rbt_delete() and btr_blob_dbg_rbt_insert(). innobase_start_or_create_for_mysql(): Warn when UNIV_BLOB_DEBUG is enabled. rb://550 approved by Jimmy Yang --- storage/innodb_plugin/btr/btr0btr.c | 571 ++++++++++++++++++++++++++++++ storage/innodb_plugin/btr/btr0cur.c | 44 +++ storage/innodb_plugin/dict/dict0mem.c | 9 + storage/innodb_plugin/include/btr0btr.h | 85 +++++ storage/innodb_plugin/include/btr0types.h | 125 +++++++ storage/innodb_plugin/include/dict0mem.h | 7 + storage/innodb_plugin/include/page0zip.h | 2 +- storage/innodb_plugin/include/univ.i | 2 + storage/innodb_plugin/page/page0cur.c | 10 + storage/innodb_plugin/page/page0page.c | 11 + storage/innodb_plugin/page/page0zip.c | 5 +- storage/innodb_plugin/row/row0upd.c | 35 ++ storage/innodb_plugin/srv/srv0start.c | 6 + 13 files changed, 910 insertions(+), 2 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c index 3d8d6048603..46810c011c4 100644 --- a/storage/innodb_plugin/btr/btr0btr.c +++ b/storage/innodb_plugin/btr/btr0btr.c @@ -42,6 +42,560 @@ Created 6/2/1994 Heikki Tuuri #include "ibuf0ibuf.h" #include "trx0trx.h" +#ifdef UNIV_BLOB_DEBUG +# include "srv0srv.h" +# include "ut0rbt.h" + +/** TRUE when messages about index->blobs modification are enabled. */ +static ibool btr_blob_dbg_msg; + +/** Issue a message about an operation on index->blobs. +@param op operation +@param b the entry being subjected to the operation +@param ctx the context of the operation */ +#define btr_blob_dbg_msg_issue(op, b, ctx) \ + fprintf(stderr, op " %u:%u:%u->%u %s(%u,%u,%u)\n", \ + (b)->ref_page_no, (b)->ref_heap_no, \ + (b)->ref_field_no, (b)->blob_page_no, ctx, \ + (b)->owner, (b)->always_owner, (b)->del) + +/** Insert to index->blobs a reference to an off-page column. +@param index the index tree +@param b the reference +@param ctx context (for logging) */ +UNIV_INTERN +void +btr_blob_dbg_rbt_insert( +/*====================*/ + dict_index_t* index, /*!< in/out: index tree */ + const btr_blob_dbg_t* b, /*!< in: the reference */ + const char* ctx) /*!< in: context (for logging) */ +{ + if (btr_blob_dbg_msg) { + btr_blob_dbg_msg_issue("insert", b, ctx); + } + mutex_enter(&index->blobs_mutex); + rbt_insert(index->blobs, b, b); + mutex_exit(&index->blobs_mutex); +} + +/** Remove from index->blobs a reference to an off-page column. +@param index the index tree +@param b the reference +@param ctx context (for logging) */ +UNIV_INTERN +void +btr_blob_dbg_rbt_delete( +/*====================*/ + dict_index_t* index, /*!< in/out: index tree */ + const btr_blob_dbg_t* b, /*!< in: the reference */ + const char* ctx) /*!< in: context (for logging) */ +{ + if (btr_blob_dbg_msg) { + btr_blob_dbg_msg_issue("delete", b, ctx); + } + mutex_enter(&index->blobs_mutex); + ut_a(rbt_delete(index->blobs, b)); + mutex_exit(&index->blobs_mutex); +} + +/**************************************************************//** +Comparator for items (btr_blob_dbg_t) in index->blobs. +The key in index->blobs is (ref_page_no, ref_heap_no, ref_field_no). +@return negative, 0 or positive if *a<*b, *a=*b, *a>*b */ +static +int +btr_blob_dbg_cmp( +/*=============*/ + const void* a, /*!< in: first btr_blob_dbg_t to compare */ + const void* b) /*!< in: second btr_blob_dbg_t to compare */ +{ + const btr_blob_dbg_t* aa = a; + const btr_blob_dbg_t* bb = b; + + ut_ad(aa != NULL); + ut_ad(bb != NULL); + + if (aa->ref_page_no != bb->ref_page_no) { + return(aa->ref_page_no < bb->ref_page_no ? -1 : 1); + } + if (aa->ref_heap_no != bb->ref_heap_no) { + return(aa->ref_heap_no < bb->ref_heap_no ? -1 : 1); + } + if (aa->ref_field_no != bb->ref_field_no) { + return(aa->ref_field_no < bb->ref_field_no ? -1 : 1); + } + return(0); +} + +/**************************************************************//** +Add a reference to an off-page column to the index->blobs map. */ +UNIV_INTERN +void +btr_blob_dbg_add_blob( +/*==================*/ + const rec_t* rec, /*!< in: clustered index record */ + ulint field_no, /*!< in: off-page column number */ + ulint page_no, /*!< in: start page of the column */ + dict_index_t* index, /*!< in/out: index tree */ + const char* ctx) /*!< in: context (for logging) */ +{ + btr_blob_dbg_t b; + const page_t* page = page_align(rec); + + ut_a(index->blobs); + + b.blob_page_no = page_no; + b.ref_page_no = page_get_page_no(page); + b.ref_heap_no = page_rec_get_heap_no(rec); + b.ref_field_no = field_no; + ut_a(b.ref_field_no >= index->n_uniq); + b.always_owner = b.owner = TRUE; + b.del = FALSE; + ut_a(!rec_get_deleted_flag(rec, page_is_comp(page))); + btr_blob_dbg_rbt_insert(index, &b, ctx); +} + +/**************************************************************//** +Add to index->blobs any references to off-page columns from a record. +@return number of references added */ +UNIV_INTERN +ulint +btr_blob_dbg_add_rec( +/*=================*/ + const rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in/out: index */ + const ulint* offsets,/*!< in: offsets */ + const char* ctx) /*!< in: context (for logging) */ +{ + ulint count = 0; + ulint i; + btr_blob_dbg_t b; + ibool del; + + ut_ad(rec_offs_validate(rec, index, offsets)); + + if (!rec_offs_any_extern(offsets)) { + return(0); + } + + b.ref_page_no = page_get_page_no(page_align(rec)); + b.ref_heap_no = page_rec_get_heap_no(rec); + del = (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) != 0); + + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (rec_offs_nth_extern(offsets, i)) { + ulint len; + const byte* field_ref = rec_get_nth_field( + rec, offsets, i, &len); + + ut_a(len != UNIV_SQL_NULL); + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; + + if (!memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)) { + /* the column has not been stored yet */ + continue; + } + + b.ref_field_no = i; + b.blob_page_no = mach_read_from_4( + field_ref + BTR_EXTERN_PAGE_NO); + ut_a(b.ref_field_no >= index->n_uniq); + b.always_owner = b.owner + = !(field_ref[BTR_EXTERN_LEN] + & BTR_EXTERN_OWNER_FLAG); + b.del = del; + + btr_blob_dbg_rbt_insert(index, &b, ctx); + count++; + } + } + + return(count); +} + +/**************************************************************//** +Display the references to off-page columns. +This function is to be called from a debugger, +for example when a breakpoint on ut_dbg_assertion_failed is hit. */ +UNIV_INTERN +void +btr_blob_dbg_print( +/*===============*/ + const dict_index_t* index) /*!< in: index tree */ +{ + const ib_rbt_node_t* node; + + if (!index->blobs) { + return; + } + + /* We intentionally do not acquire index->blobs_mutex here. + This function is to be called from a debugger, and the caller + should make sure that the index->blobs_mutex is held. */ + + for (node = rbt_first(index->blobs); + node != NULL; node = rbt_next(index->blobs, node)) { + const btr_blob_dbg_t* b + = rbt_value(btr_blob_dbg_t, node); + fprintf(stderr, "%u:%u:%u->%u%s%s%s\n", + b->ref_page_no, b->ref_heap_no, b->ref_field_no, + b->blob_page_no, + b->owner ? "" : "(disowned)", + b->always_owner ? "" : "(has disowned)", + b->del ? "(deleted)" : ""); + } +} + +/**************************************************************//** +Remove from index->blobs any references to off-page columns from a record. +@return number of references removed */ +UNIV_INTERN +ulint +btr_blob_dbg_remove_rec( +/*====================*/ + const rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in/out: index */ + const ulint* offsets,/*!< in: offsets */ + const char* ctx) /*!< in: context (for logging) */ +{ + ulint i; + ulint count = 0; + btr_blob_dbg_t b; + + ut_ad(rec_offs_validate(rec, index, offsets)); + + if (!rec_offs_any_extern(offsets)) { + return(0); + } + + b.ref_page_no = page_get_page_no(page_align(rec)); + b.ref_heap_no = page_rec_get_heap_no(rec); + + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (rec_offs_nth_extern(offsets, i)) { + ulint len; + const byte* field_ref = rec_get_nth_field( + rec, offsets, i, &len); + + ut_a(len != UNIV_SQL_NULL); + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; + + b.ref_field_no = i; + b.blob_page_no = mach_read_from_4( + field_ref + BTR_EXTERN_PAGE_NO); + + switch (b.blob_page_no) { + case 0: + /* The column has not been stored yet. + The BLOB pointer must be all zero. + There cannot be a BLOB starting at + page 0, because page 0 is reserved for + the tablespace header. */ + ut_a(!memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + /* fall through */ + case FIL_NULL: + /* the column has been freed already */ + continue; + } + + btr_blob_dbg_rbt_delete(index, &b, ctx); + count++; + } + } + + return(count); +} + +/**************************************************************//** +Check that there are no references to off-page columns from or to +the given page. Invoked when freeing or clearing a page. +@return TRUE when no orphan references exist */ +UNIV_INTERN +ibool +btr_blob_dbg_is_empty( +/*==================*/ + dict_index_t* index, /*!< in: index */ + ulint page_no) /*!< in: page number */ +{ + const ib_rbt_node_t* node; + ibool success = TRUE; + + if (!index->blobs) { + return(success); + } + + mutex_enter(&index->blobs_mutex); + + for (node = rbt_first(index->blobs); + node != NULL; node = rbt_next(index->blobs, node)) { + const btr_blob_dbg_t* b + = rbt_value(btr_blob_dbg_t, node); + + if (b->ref_page_no != page_no && b->blob_page_no != page_no) { + continue; + } + + fprintf(stderr, + "InnoDB: orphan BLOB ref%s%s%s %u:%u:%u->%u\n", + b->owner ? "" : "(disowned)", + b->always_owner ? "" : "(has disowned)", + b->del ? "(deleted)" : "", + b->ref_page_no, b->ref_heap_no, b->ref_field_no, + b->blob_page_no); + + if (b->blob_page_no != page_no || b->owner || !b->del) { + success = FALSE; + } + } + + mutex_exit(&index->blobs_mutex); + return(success); +} + +/**************************************************************//** +Count and process all references to off-page columns on a page. +@return number of references processed */ +UNIV_INTERN +ulint +btr_blob_dbg_op( +/*============*/ + const page_t* page, /*!< in: B-tree leaf page */ + const rec_t* rec, /*!< in: record to start from + (NULL to process the whole page) */ + dict_index_t* index, /*!< in/out: index */ + const char* ctx, /*!< in: context (for logging) */ + const btr_blob_dbg_op_f op) /*!< in: operation on records */ +{ + ulint count = 0; + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + rec_offs_init(offsets_); + + ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX); + ut_a(!rec || page_align(rec) == page); + + if (!index->blobs || !page_is_leaf(page) + || !dict_index_is_clust(index)) { + return(0); + } + + if (rec == NULL) { + rec = page_get_infimum_rec(page); + } + + do { + offsets = rec_get_offsets(rec, index, offsets, + ULINT_UNDEFINED, &heap); + count += op(rec, index, offsets, ctx); + rec = page_rec_get_next_const(rec); + } while (!page_rec_is_supremum(rec)); + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + + return(count); +} + +/**************************************************************//** +Count and add to index->blobs any references to off-page columns +from records on a page. +@return number of references added */ +UNIV_INTERN +ulint +btr_blob_dbg_add( +/*=============*/ + const page_t* page, /*!< in: rewritten page */ + dict_index_t* index, /*!< in/out: index */ + const char* ctx) /*!< in: context (for logging) */ +{ + btr_blob_dbg_assert_empty(index, page_get_page_no(page)); + + return(btr_blob_dbg_op(page, NULL, index, ctx, btr_blob_dbg_add_rec)); +} + +/**************************************************************//** +Count and remove from index->blobs any references to off-page columns +from records on a page. +Used when reorganizing a page, before copying the records. +@return number of references removed */ +UNIV_INTERN +ulint +btr_blob_dbg_remove( +/*================*/ + const page_t* page, /*!< in: b-tree page */ + dict_index_t* index, /*!< in/out: index */ + const char* ctx) /*!< in: context (for logging) */ +{ + ulint count; + + count = btr_blob_dbg_op(page, NULL, index, ctx, + btr_blob_dbg_remove_rec); + + /* Check that no references exist. */ + btr_blob_dbg_assert_empty(index, page_get_page_no(page)); + + return(count); +} + +/**************************************************************//** +Restore in index->blobs any references to off-page columns +Used when page reorganize fails due to compressed page overflow. */ +UNIV_INTERN +void +btr_blob_dbg_restore( +/*=================*/ + const page_t* npage, /*!< in: page that failed to compress */ + const page_t* page, /*!< in: copy of original page */ + dict_index_t* index, /*!< in/out: index */ + const char* ctx) /*!< in: context (for logging) */ +{ + ulint removed; + ulint added; + + ut_a(page_get_page_no(npage) == page_get_page_no(page)); + ut_a(page_get_space_id(npage) == page_get_space_id(page)); + + removed = btr_blob_dbg_remove(npage, index, ctx); + added = btr_blob_dbg_add(page, index, ctx); + ut_a(added == removed); +} + +/**************************************************************//** +Modify the 'deleted' flag of a record. */ +UNIV_INTERN +void +btr_blob_dbg_set_deleted_flag( +/*==========================*/ + const rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in/out: index */ + const ulint* offsets,/*!< in: rec_get_offs(rec, index) */ + ibool del) /*!< in: TRUE=deleted, FALSE=exists */ +{ + const ib_rbt_node_t* node; + btr_blob_dbg_t b; + btr_blob_dbg_t* c; + ulint i; + + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_a(dict_index_is_clust(index)); + ut_a(del == !!del);/* must be FALSE==0 or TRUE==1 */ + + if (!rec_offs_any_extern(offsets) || !index->blobs) { + + return; + } + + b.ref_page_no = page_get_page_no(page_align(rec)); + b.ref_heap_no = page_rec_get_heap_no(rec); + + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (rec_offs_nth_extern(offsets, i)) { + ulint len; + const byte* field_ref = rec_get_nth_field( + rec, offsets, i, &len); + + ut_a(len != UNIV_SQL_NULL); + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; + + b.ref_field_no = i; + b.blob_page_no = mach_read_from_4( + field_ref + BTR_EXTERN_PAGE_NO); + + switch (b.blob_page_no) { + case 0: + ut_a(memcmp(field_ref, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + /* page number 0 is for the + page allocation bitmap */ + case FIL_NULL: + /* the column has been freed already */ + ut_error; + } + + mutex_enter(&index->blobs_mutex); + node = rbt_lookup(index->blobs, &b); + ut_a(node); + + c = rbt_value(btr_blob_dbg_t, node); + /* The flag should be modified. */ + c->del = del; + if (btr_blob_dbg_msg) { + b = *c; + mutex_exit(&index->blobs_mutex); + btr_blob_dbg_msg_issue("del_mk", &b, ""); + } else { + mutex_exit(&index->blobs_mutex); + } + } + } +} + +/**************************************************************//** +Change the ownership of an off-page column. */ +UNIV_INTERN +void +btr_blob_dbg_owner( +/*===============*/ + const rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in/out: index */ + const ulint* offsets,/*!< in: rec_get_offs(rec, index) */ + ulint i, /*!< in: ith field in rec */ + ibool own) /*!< in: TRUE=owned, FALSE=disowned */ +{ + const ib_rbt_node_t* node; + btr_blob_dbg_t b; + const byte* field_ref; + ulint len; + + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_a(rec_offs_nth_extern(offsets, i)); + + field_ref = rec_get_nth_field(rec, offsets, i, &len); + ut_a(len != UNIV_SQL_NULL); + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; + + b.ref_page_no = page_get_page_no(page_align(rec)); + b.ref_heap_no = page_rec_get_heap_no(rec); + b.ref_field_no = i; + b.owner = !(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG); + b.blob_page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO); + + ut_a(b.owner == own); + + mutex_enter(&index->blobs_mutex); + node = rbt_lookup(index->blobs, &b); + /* row_ins_clust_index_entry_by_modify() invokes + btr_cur_unmark_extern_fields() also for the newly inserted + references, which are all zero bytes until the columns are stored. + The node lookup must fail if and only if that is the case. */ + ut_a(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE) + == !node); + + if (node) { + btr_blob_dbg_t* c = rbt_value(btr_blob_dbg_t, node); + /* Some code sets ownership from TRUE to TRUE. + We do not allow changing ownership from FALSE to FALSE. */ + ut_a(own || c->owner); + + c->owner = own; + if (!own) { + c->always_owner = FALSE; + } + } + + mutex_exit(&index->blobs_mutex); +} +#endif /* UNIV_BLOB_DEBUG */ + /* Latching strategy of the InnoDB B-tree -------------------------------------- @@ -296,6 +850,7 @@ btr_page_create( page_t* page = buf_block_get_frame(block); ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); + btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block)); if (UNIV_LIKELY_NULL(page_zip)) { page_create_zip(block, index, level, mtr); @@ -489,6 +1044,7 @@ btr_page_free_low( modify clock */ buf_block_modify_clock_inc(block); + btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block)); if (dict_index_is_ibuf(index)) { @@ -773,6 +1329,13 @@ btr_create( block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr); } else { +#ifdef UNIV_BLOB_DEBUG + if ((type & DICT_CLUSTERED) && !index->blobs) { + mutex_create(&index->blobs_mutex, SYNC_ANY_LATCH); + index->blobs = rbt_create(sizeof(btr_blob_dbg_t), + btr_blob_dbg_cmp); + } +#endif /* UNIV_BLOB_DEBUG */ block = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr); } @@ -996,6 +1559,7 @@ btr_page_reorganize_low( block->check_index_page_at_flush = TRUE; #endif /* !UNIV_HOTBACKUP */ + btr_blob_dbg_remove(page, index, "btr_page_reorganize"); /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ @@ -1024,6 +1588,8 @@ btr_page_reorganize_low( (!page_zip_compress(page_zip, page, index, NULL))) { /* Restore the old page and exit. */ + btr_blob_dbg_restore(page, temp_page, index, + "btr_page_reorganize_compress_fail"); #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG /* Check that the bytes that we skip are identical. */ @@ -1157,6 +1723,7 @@ btr_page_empty( #endif /* UNIV_ZIP_DEBUG */ btr_search_drop_page_hash_index(block); + btr_blob_dbg_remove(page, index, "btr_page_empty"); /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ @@ -2497,6 +3064,7 @@ btr_lift_page_up( index); } + btr_blob_dbg_remove(page, index, "btr_lift_page_up"); lock_update_copy_and_discard(father_block, block); /* Go upward to root page, decrementing levels by one. */ @@ -2758,6 +3326,7 @@ err_exit: lock_update_merge_right(merge_block, orig_succ, block); } + btr_blob_dbg_remove(page, index, "btr_compress"); mem_heap_free(heap); if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) { @@ -2988,6 +3557,8 @@ btr_discard_page( block); } + btr_blob_dbg_remove(page, index, "btr_discard_page"); + /* Free the file page */ btr_page_free(index, block, mtr); diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 704cc606a5f..86d77c79e7b 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -2572,6 +2572,7 @@ btr_cur_del_mark_set_clust_rec( page_zip = buf_block_get_page_zip(block); + btr_blob_dbg_set_deleted_flag(rec, index, offsets, val); btr_rec_set_deleted_flag(rec, page_zip, val); trx = thr_get_trx(thr); @@ -3595,6 +3596,8 @@ btr_cur_set_ownership_of_extern_field( } else { mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val); } + + btr_blob_dbg_owner(rec, index, offsets, i, val); } /*******************************************************************//** @@ -4094,6 +4097,11 @@ btr_store_big_rec_extern_fields_func( } if (prev_page_no == FIL_NULL) { + btr_blob_dbg_add_blob( + rec, big_rec_vec->fields[i] + .field_no, page_no, index, + "store"); + mach_write_to_4(field_ref + BTR_EXTERN_SPACE_ID, space_id); @@ -4169,6 +4177,11 @@ next_zip_page: MLOG_4BYTES, &mtr); if (prev_page_no == FIL_NULL) { + btr_blob_dbg_add_blob( + rec, big_rec_vec->fields[i] + .field_no, page_no, index, + "store"); + mlog_write_ulint(field_ref + BTR_EXTERN_SPACE_ID, space_id, @@ -4337,6 +4350,37 @@ btr_free_externally_stored_field( rec_zip_size = 0; } +#ifdef UNIV_BLOB_DEBUG + if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG) + && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG) + && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) { + /* This off-page column will be freed. + Check that no references remain. */ + + btr_blob_dbg_t b; + + b.blob_page_no = mach_read_from_4( + field_ref + BTR_EXTERN_PAGE_NO); + + if (rec) { + /* Remove the reference from the record to the + BLOB. If the BLOB were not freed, the + reference would be removed when the record is + removed. Freeing the BLOB will overwrite the + BTR_EXTERN_PAGE_NO in the field_ref of the + record with FIL_NULL, which would make the + btr_blob_dbg information inconsistent with the + record. */ + b.ref_page_no = page_get_page_no(page_align(rec)); + b.ref_heap_no = page_rec_get_heap_no(rec); + b.ref_field_no = i; + btr_blob_dbg_rbt_delete(index, &b, "free"); + } + + btr_blob_dbg_assert_empty(index, b.blob_page_no); + } +#endif /* UNIV_BLOB_DEBUG */ + for (;;) { #ifdef UNIV_SYNC_DEBUG buf_block_t* rec_block; diff --git a/storage/innodb_plugin/dict/dict0mem.c b/storage/innodb_plugin/dict/dict0mem.c index 3287247029f..aef815dd2f6 100644 --- a/storage/innodb_plugin/dict/dict0mem.c +++ b/storage/innodb_plugin/dict/dict0mem.c @@ -36,6 +36,9 @@ Created 1/8/1996 Heikki Tuuri #ifndef UNIV_HOTBACKUP # include "lock0lock.h" #endif /* !UNIV_HOTBACKUP */ +#ifdef UNIV_BLOB_DEBUG +# include "ut0rbt.h" +#endif /* UNIV_BLOB_DEBUG */ #define DICT_HEAP_SIZE 100 /*!< initial memory heap size when creating a table or index object */ @@ -316,6 +319,12 @@ dict_mem_index_free( { ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); +#ifdef UNIV_BLOB_DEBUG + if (index->blobs) { + mutex_free(&index->blobs_mutex); + rbt_free(index->blobs); + } +#endif /* UNIV_BLOB_DEBUG */ mem_heap_free(index->heap); } diff --git a/storage/innodb_plugin/include/btr0btr.h b/storage/innodb_plugin/include/btr0btr.h index dde3a0bab69..5aa02694e0e 100644 --- a/storage/innodb_plugin/include/btr0btr.h +++ b/storage/innodb_plugin/include/btr0btr.h @@ -81,6 +81,91 @@ UNIQUE definition on secondary indexes when we decide if we can use the insert buffer to speed up inserts */ #define BTR_IGNORE_SEC_UNIQUE 2048 +#ifdef UNIV_BLOB_DEBUG +# include "ut0rbt.h" +/** An index->blobs entry for keeping track of off-page column references */ +struct btr_blob_dbg_struct +{ + unsigned blob_page_no:32; /*!< first BLOB page number */ + unsigned ref_page_no:32; /*!< referring page number */ + unsigned ref_heap_no:16; /*!< referring heap number */ + unsigned ref_field_no:10; /*!< referring field number */ + unsigned owner:1; /*!< TRUE if BLOB owner */ + unsigned always_owner:1; /*!< TRUE if always + has been the BLOB owner; + reset to TRUE on B-tree + page splits and merges */ + unsigned del:1; /*!< TRUE if currently + delete-marked */ +}; + +/**************************************************************//** +Add a reference to an off-page column to the index->blobs map. */ +UNIV_INTERN +void +btr_blob_dbg_add_blob( +/*==================*/ + const rec_t* rec, /*!< in: clustered index record */ + ulint field_no, /*!< in: number of off-page column */ + ulint page_no, /*!< in: start page of the column */ + dict_index_t* index, /*!< in/out: index tree */ + const char* ctx) /*!< in: context (for logging) */ + __attribute__((nonnull)); +/**************************************************************//** +Display the references to off-page columns. +This function is to be called from a debugger, +for example when a breakpoint on ut_dbg_assertion_failed is hit. */ +UNIV_INTERN +void +btr_blob_dbg_print( +/*===============*/ + const dict_index_t* index) /*!< in: index tree */ + __attribute__((nonnull)); +/**************************************************************//** +Check that there are no references to off-page columns from or to +the given page. Invoked when freeing or clearing a page. +@return TRUE when no orphan references exist */ +UNIV_INTERN +ibool +btr_blob_dbg_is_empty( +/*==================*/ + dict_index_t* index, /*!< in: index */ + ulint page_no) /*!< in: page number */ + __attribute__((nonnull, warn_unused_result)); + +/**************************************************************//** +Modify the 'deleted' flag of a record. */ +UNIV_INTERN +void +btr_blob_dbg_set_deleted_flag( +/*==========================*/ + const rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in/out: index */ + const ulint* offsets,/*!< in: rec_get_offs(rec, index) */ + ibool del) /*!< in: TRUE=deleted, FALSE=exists */ + __attribute__((nonnull)); +/**************************************************************//** +Change the ownership of an off-page column. */ +UNIV_INTERN +void +btr_blob_dbg_owner( +/*===============*/ + const rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in/out: index */ + const ulint* offsets,/*!< in: rec_get_offs(rec, index) */ + ulint i, /*!< in: ith field in rec */ + ibool own) /*!< in: TRUE=owned, FALSE=disowned */ + __attribute__((nonnull)); +/** Assert that there are no BLOB references to or from the given page. */ +# define btr_blob_dbg_assert_empty(index, page_no) \ + ut_a(btr_blob_dbg_is_empty(index, page_no)) +#else /* UNIV_BLOB_DEBUG */ +# define btr_blob_dbg_add_blob(rec, field_no, page, index, ctx) ((void) 0) +# define btr_blob_dbg_set_deleted_flag(rec, index, offsets, del)((void) 0) +# define btr_blob_dbg_owner(rec, index, offsets, i, val) ((void) 0) +# define btr_blob_dbg_assert_empty(index, page_no) ((void) 0) +#endif /* UNIV_BLOB_DEBUG */ + /**************************************************************//** Gets the root node of a tree and x-latches it. @return root page, x-latched */ diff --git a/storage/innodb_plugin/include/btr0types.h b/storage/innodb_plugin/include/btr0types.h index ef4a6b04b34..07c06fb18d7 100644 --- a/storage/innodb_plugin/include/btr0types.h +++ b/storage/innodb_plugin/include/btr0types.h @@ -38,6 +38,131 @@ typedef struct btr_cur_struct btr_cur_t; /** B-tree search information for the adaptive hash index */ typedef struct btr_search_struct btr_search_t; +#ifdef UNIV_BLOB_DEBUG +# include "buf0types.h" +/** An index->blobs entry for keeping track of off-page column references */ +typedef struct btr_blob_dbg_struct btr_blob_dbg_t; + +/** Insert to index->blobs a reference to an off-page column. +@param index the index tree +@param b the reference +@param ctx context (for logging) */ +UNIV_INTERN +void +btr_blob_dbg_rbt_insert( +/*====================*/ + dict_index_t* index, /*!< in/out: index tree */ + const btr_blob_dbg_t* b, /*!< in: the reference */ + const char* ctx) /*!< in: context (for logging) */ + __attribute__((nonnull)); + +/** Remove from index->blobs a reference to an off-page column. +@param index the index tree +@param b the reference +@param ctx context (for logging) */ +UNIV_INTERN +void +btr_blob_dbg_rbt_delete( +/*====================*/ + dict_index_t* index, /*!< in/out: index tree */ + const btr_blob_dbg_t* b, /*!< in: the reference */ + const char* ctx) /*!< in: context (for logging) */ + __attribute__((nonnull)); + +/**************************************************************//** +Add to index->blobs any references to off-page columns from a record. +@return number of references added */ +UNIV_INTERN +ulint +btr_blob_dbg_add_rec( +/*=================*/ + const rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in/out: index */ + const ulint* offsets,/*!< in: offsets */ + const char* ctx) /*!< in: context (for logging) */ + __attribute__((nonnull)); +/**************************************************************//** +Remove from index->blobs any references to off-page columns from a record. +@return number of references removed */ +UNIV_INTERN +ulint +btr_blob_dbg_remove_rec( +/*====================*/ + const rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in/out: index */ + const ulint* offsets,/*!< in: offsets */ + const char* ctx) /*!< in: context (for logging) */ + __attribute__((nonnull)); +/**************************************************************//** +Count and add to index->blobs any references to off-page columns +from records on a page. +@return number of references added */ +UNIV_INTERN +ulint +btr_blob_dbg_add( +/*=============*/ + const page_t* page, /*!< in: rewritten page */ + dict_index_t* index, /*!< in/out: index */ + const char* ctx) /*!< in: context (for logging) */ + __attribute__((nonnull)); +/**************************************************************//** +Count and remove from index->blobs any references to off-page columns +from records on a page. +Used when reorganizing a page, before copying the records. +@return number of references removed */ +UNIV_INTERN +ulint +btr_blob_dbg_remove( +/*================*/ + const page_t* page, /*!< in: b-tree page */ + dict_index_t* index, /*!< in/out: index */ + const char* ctx) /*!< in: context (for logging) */ + __attribute__((nonnull)); +/**************************************************************//** +Restore in index->blobs any references to off-page columns +Used when page reorganize fails due to compressed page overflow. */ +UNIV_INTERN +void +btr_blob_dbg_restore( +/*=================*/ + const page_t* npage, /*!< in: page that failed to compress */ + const page_t* page, /*!< in: copy of original page */ + dict_index_t* index, /*!< in/out: index */ + const char* ctx) /*!< in: context (for logging) */ + __attribute__((nonnull)); + +/** Operation that processes the BLOB references of an index record +@param[in] rec record on index page +@param[in/out] index the index tree of the record +@param[in] offsets rec_get_offsets(rec,index) +@param[in] ctx context (for logging) +@return number of BLOB references processed */ +typedef ulint (*btr_blob_dbg_op_f) +(const rec_t* rec,dict_index_t* index,const ulint* offsets,const char* ctx); + +/**************************************************************//** +Count and process all references to off-page columns on a page. +@return number of references processed */ +UNIV_INTERN +ulint +btr_blob_dbg_op( +/*============*/ + const page_t* page, /*!< in: B-tree leaf page */ + const rec_t* rec, /*!< in: record to start from + (NULL to process the whole page) */ + dict_index_t* index, /*!< in/out: index */ + const char* ctx, /*!< in: context (for logging) */ + const btr_blob_dbg_op_f op) /*!< in: operation on records */ + __attribute__((nonnull(1,3,4,5))); +#else /* UNIV_BLOB_DEBUG */ +# define btr_blob_dbg_add_rec(rec, index, offsets, ctx) ((void) 0) +# define btr_blob_dbg_add(page, index, ctx) ((void) 0) +# define btr_blob_dbg_remove_rec(rec, index, offsets, ctx) ((void) 0) +# define btr_blob_dbg_remove(page, index, ctx) ((void) 0) +# define btr_blob_dbg_restore(npage, page, index, ctx) ((void) 0) +# define btr_blob_dbg_op(page, rec, index, ctx, op) ((void) 0) +#endif /* UNIV_BLOB_DEBUG */ + /** The size of a reference to data stored on a different page. The reference is stored at the end of the prefix of the field in the index record. */ diff --git a/storage/innodb_plugin/include/dict0mem.h b/storage/innodb_plugin/include/dict0mem.h index 09a068ccb93..bd32a239cfd 100644 --- a/storage/innodb_plugin/include/dict0mem.h +++ b/storage/innodb_plugin/include/dict0mem.h @@ -340,6 +340,13 @@ struct dict_index_struct{ index, or 0 if the index existed when InnoDB was started up */ #endif /* !UNIV_HOTBACKUP */ +#ifdef UNIV_BLOB_DEBUG + mutex_t blobs_mutex; + /*!< mutex protecting blobs */ + void* blobs; /*!< map of (page_no,heap_no,field_no) + to first_blob_page_no; protected by + blobs_mutex; @see btr_blob_dbg_t */ +#endif /* UNIV_BLOB_DEBUG */ #ifdef UNIV_DEBUG ulint magic_n;/*!< magic number */ /** Value of dict_index_struct::magic_n */ diff --git a/storage/innodb_plugin/include/page0zip.h b/storage/innodb_plugin/include/page0zip.h index 574809e5227..00c1d0516e6 100644 --- a/storage/innodb_plugin/include/page0zip.h +++ b/storage/innodb_plugin/include/page0zip.h @@ -420,7 +420,7 @@ page_zip_copy_recs( const page_t* src, /*!< in: page */ dict_index_t* index, /*!< in: index of the B-tree */ mtr_t* mtr) /*!< in: mini-transaction */ - __attribute__((nonnull(1,2,3,4))); + __attribute__((nonnull)); #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i index 690bfd5d6a9..6dd38df3782 100644 --- a/storage/innodb_plugin/include/univ.i +++ b/storage/innodb_plugin/include/univ.i @@ -194,6 +194,8 @@ this will break redo log file compatibility, but it may be useful when debugging redo log application problems. */ #define UNIV_MEM_DEBUG /* detect memory leaks etc */ #define UNIV_IBUF_DEBUG /* debug the insert buffer */ +#define UNIV_BLOB_DEBUG /* track BLOB ownership; +assumes that no BLOBs survive server restart */ #define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer; this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES, and the insert buffer must be empty when the database is started */ diff --git a/storage/innodb_plugin/page/page0cur.c b/storage/innodb_plugin/page/page0cur.c index f10f16a7dd9..936762b986a 100644 --- a/storage/innodb_plugin/page/page0cur.c +++ b/storage/innodb_plugin/page/page0cur.c @@ -1149,6 +1149,8 @@ use_heap: current_rec, index, mtr); } + btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert"); + return(insert_rec); } @@ -1195,10 +1197,12 @@ page_cur_insert_rec_zip_reorg( } /* Out of space: restore the page */ + btr_blob_dbg_remove(page, index, "insert_zip_fail"); if (!page_zip_decompress(page_zip, page, FALSE)) { ut_error; /* Memory corrupted? */ } ut_ad(page_validate(page, index)); + btr_blob_dbg_add(page, index, "insert_zip_fail"); return(NULL); } @@ -1490,6 +1494,8 @@ use_heap: page_zip_write_rec(page_zip, insert_rec, index, offsets, 1); + btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert_zip_ok"); + /* 9. Write log record of the insert */ if (UNIV_LIKELY(mtr != NULL)) { page_cur_insert_rec_write_log(insert_rec, rec_size, @@ -1697,6 +1703,9 @@ page_copy_rec_list_end_to_created_page( heap_top += rec_size; + rec_offs_make_valid(insert_rec, index, offsets); + btr_blob_dbg_add_rec(insert_rec, index, offsets, "copy_end"); + page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec, index, mtr); prev_rec = insert_rec; @@ -1944,6 +1953,7 @@ page_cur_delete_rec( page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1); /* 6. Free the memory occupied by the record */ + btr_blob_dbg_remove_rec(current_rec, index, offsets, "delete"); page_mem_free(page, page_zip, current_rec, index, offsets); /* 7. Now we have decremented the number of owned records of the slot. diff --git a/storage/innodb_plugin/page/page0page.c b/storage/innodb_plugin/page/page0page.c index 10008f9ac25..6cae03e8829 100644 --- a/storage/innodb_plugin/page/page0page.c +++ b/storage/innodb_plugin/page/page0page.c @@ -685,12 +685,16 @@ page_copy_rec_list_end( if (UNIV_UNLIKELY (!page_zip_reorganize(new_block, index, mtr))) { + btr_blob_dbg_remove(new_page, index, + "copy_end_reorg_fail"); if (UNIV_UNLIKELY (!page_zip_decompress(new_page_zip, new_page, FALSE))) { ut_error; } ut_ad(page_validate(new_page, index)); + btr_blob_dbg_add(new_page, index, + "copy_end_reorg_fail"); return(NULL); } else { /* The page was reorganized: @@ -803,12 +807,16 @@ page_copy_rec_list_start( if (UNIV_UNLIKELY (!page_zip_reorganize(new_block, index, mtr))) { + btr_blob_dbg_remove(new_page, index, + "copy_start_reorg_fail"); if (UNIV_UNLIKELY (!page_zip_decompress(new_page_zip, new_page, FALSE))) { ut_error; } ut_ad(page_validate(new_page, index)); + btr_blob_dbg_add(new_page, index, + "copy_start_reorg_fail"); return(NULL); } else { /* The page was reorganized: @@ -1080,6 +1088,9 @@ page_delete_rec_list_end( /* Remove the record chain segment from the record chain */ page_rec_set_next(prev_rec, page_get_supremum_rec(page)); + btr_blob_dbg_op(page, rec, index, "delete_end", + btr_blob_dbg_remove_rec); + /* Catenate the deleted chain segment to the page free list */ page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE)); diff --git a/storage/innodb_plugin/page/page0zip.c b/storage/innodb_plugin/page/page0zip.c index bb9b0995c72..a1dd4177ba8 100644 --- a/storage/innodb_plugin/page/page0zip.c +++ b/storage/innodb_plugin/page/page0zip.c @@ -4451,6 +4451,8 @@ page_zip_reorganize( /* Copy the old page to temporary space */ buf_frame_copy(temp_page, page); + btr_blob_dbg_remove(page, index, "zip_reorg"); + /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ @@ -4509,7 +4511,7 @@ page_zip_copy_recs( mtr_t* mtr) /*!< in: mini-transaction */ { ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX)); ut_ad(!dict_index_is_ibuf(index)); #ifdef UNIV_ZIP_DEBUG /* The B-tree operations that call this function may set @@ -4579,6 +4581,7 @@ page_zip_copy_recs( #ifdef UNIV_ZIP_DEBUG ut_a(page_zip_validate(page_zip, page)); #endif /* UNIV_ZIP_DEBUG */ + btr_blob_dbg_add(page, index, "page_zip_copy_recs"); page_zip_compress_write_log(page_zip, page, index, mtr); } diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index 9ded3d68018..3a6de4b94a7 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -498,14 +498,49 @@ row_upd_rec_in_place( n_fields = upd_get_n_fields(update); for (i = 0; i < n_fields; i++) { +#ifdef UNIV_BLOB_DEBUG + btr_blob_dbg_t b; + const byte* field_ref = NULL; +#endif /* UNIV_BLOB_DEBUG */ + upd_field = upd_get_nth_field(update, i); new_val = &(upd_field->new_val); ut_ad(!dfield_is_ext(new_val) == !rec_offs_nth_extern(offsets, upd_field->field_no)); +#ifdef UNIV_BLOB_DEBUG + if (dfield_is_ext(new_val)) { + ulint len; + field_ref = rec_get_nth_field(rec, offsets, i, &len); + ut_a(len != UNIV_SQL_NULL); + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; + + b.ref_page_no = page_get_page_no(page_align(rec)); + b.ref_heap_no = page_rec_get_heap_no(rec); + b.ref_field_no = i; + b.blob_page_no = mach_read_from_4( + field_ref + BTR_EXTERN_PAGE_NO); + ut_a(b.ref_field_no >= index->n_uniq); + btr_blob_dbg_rbt_delete(index, &b, "upd_in_place"); + } +#endif /* UNIV_BLOB_DEBUG */ rec_set_nth_field(rec, offsets, upd_field->field_no, dfield_get_data(new_val), dfield_get_len(new_val)); + +#ifdef UNIV_BLOB_DEBUG + if (dfield_is_ext(new_val)) { + b.blob_page_no = mach_read_from_4( + field_ref + BTR_EXTERN_PAGE_NO); + b.always_owner = b.owner = !(field_ref[BTR_EXTERN_LEN] + & BTR_EXTERN_OWNER_FLAG); + b.del = rec_get_deleted_flag( + rec, rec_offs_comp(offsets)); + + btr_blob_dbg_rbt_insert(index, &b, "upd_in_place"); + } +#endif /* UNIV_BLOB_DEBUG */ } if (UNIV_LIKELY_NULL(page_zip)) { diff --git a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c index 73f8f319704..f8b5049ca65 100644 --- a/storage/innodb_plugin/srv/srv0start.c +++ b/storage/innodb_plugin/srv/srv0start.c @@ -1061,6 +1061,12 @@ innobase_start_or_create_for_mysql(void) ); #endif +#ifdef UNIV_BLOB_DEBUG + fprintf(stderr, + "InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n" + "InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n"); +#endif /* UNIV_BLOB_DEBUG */ + #ifdef UNIV_SYNC_DEBUG fprintf(stderr, "InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n"); -- cgit v1.2.1 From 8aa7e213dc7127bc334087eed5dbcd17e2865a47 Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Thu, 10 Feb 2011 13:21:22 +0200 Subject: Increment InnoDB Plugin version from 1.0.15 to 1.0.16. InnoDB Plugin 1.0.15 has been released with MySQL 5.1.55. --- storage/innodb_plugin/include/univ.i | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'storage') diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i index 6dd38df3782..22ed765e680 100644 --- a/storage/innodb_plugin/include/univ.i +++ b/storage/innodb_plugin/include/univ.i @@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 15 +#define INNODB_VERSION_BUGFIX 16 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; -- cgit v1.2.1 From b5c61ed1f25001e6a967bc2e70273d61bd22bab6 Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Thu, 10 Feb 2011 17:16:32 +0200 Subject: Fix Bug#59307 Valgrind: uninitialized value in rw_lock_set_writer_id_and_recursion_flag() by silencing a bogus Valgrind warning: ==4392== Conditional jump or move depends on uninitialised value(s) ==4392== at 0x5A18416: rw_lock_set_writer_id_and_recursion_flag (sync0rw.ic:283) ==4392== by 0x5A1865C: rw_lock_x_lock_low (sync0rw.c:558) ==4392== by 0x5A18481: rw_lock_x_lock_func (sync0rw.c:617) ==4392== by 0x597EEE6: mtr_x_lock_func (mtr0mtr.ic:271) ==4392== by 0x597EBBD: fsp_header_init (fsp0fsp.c:970) ==4392== by 0x5A15E78: innobase_start_or_create_for_mysql (srv0start.c:1508) ==4392== by 0x598B789: innobase_init(void*) (ha_innodb.cc:2282) os_compare_and_swap_thread_id() is defined as __sync_bool_compare_and_swap(). From the GCC doc: `bool __sync_bool_compare_and_swap (TYPE *ptr, TYPE oldval TYPE newval, ...)' ... The "bool" version returns true if the comparison is successful and NEWVAL was written. So it is not possible that the return value is uninitialized, no matter what the arguments to os_compare_and_swap_thread_id() are. Probably Valgrind gets confused by the implementation of the GCC internal function __sync_bool_compare_and_swap(). --- storage/innodb_plugin/include/sync0rw.ic | 1 + 1 file changed, 1 insertion(+) (limited to 'storage') diff --git a/storage/innodb_plugin/include/sync0rw.ic b/storage/innodb_plugin/include/sync0rw.ic index 7116f1b7c9b..4110a0a7e0c 100644 --- a/storage/innodb_plugin/include/sync0rw.ic +++ b/storage/innodb_plugin/include/sync0rw.ic @@ -280,6 +280,7 @@ rw_lock_set_writer_id_and_recursion_flag( local_thread = lock->writer_thread; success = os_compare_and_swap_thread_id( &lock->writer_thread, local_thread, curr_thread); + UNIV_MEM_VALID(&success, sizeof(success)); ut_a(success); lock->recursive = recursive; -- cgit v1.2.1 From 4a8c83574626f4107ca84964f4c1faa13176eff1 Mon Sep 17 00:00:00 2001 From: Jimmy Yang Date: Mon, 14 Feb 2011 02:07:59 -0800 Subject: Fix Bug #59749 Enabling concurrent reads while creating non-primary unique index gives failures. Approved by Marko --- storage/innodb_plugin/ChangeLog | 6 ++++++ storage/innodb_plugin/handler/handler0alter.cc | 12 ++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 0cbdc8ed9d2..9fa00ac8e6f 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,9 @@ +2011-02-14 The InnoDB Team + + * handler/handler0alter.cc: + Bug#59749 Enabling concurrent reads while creating non-primary + unique index gives failures + 2011-01-31 The InnoDB Team * btr/btr0cur.c, include/row0upd.h, diff --git a/storage/innodb_plugin/handler/handler0alter.cc b/storage/innodb_plugin/handler/handler0alter.cc index 517445f7e69..dc1317d5c5a 100644 --- a/storage/innodb_plugin/handler/handler0alter.cc +++ b/storage/innodb_plugin/handler/handler0alter.cc @@ -782,10 +782,6 @@ err_exit: ut_ad(error == DB_SUCCESS); - /* We will need to rebuild index translation table. Set - valid index entry count in the translation table to zero */ - share->idx_trans_tbl.index_count = 0; - /* Commit the data dictionary transaction in order to release the table locks on the system tables. This means that if MySQL crashes while creating a new primary key inside @@ -911,6 +907,14 @@ error: } convert_error: + if (error == DB_SUCCESS) { + /* Build index is successful. We will need to + rebuild index translation table. Reset the + index entry count in the translation table + to zero, so that translation table will be rebuilt */ + share->idx_trans_tbl.index_count = 0; + } + error = convert_error_code_to_mysql(error, innodb_table->flags, user_thd); -- cgit v1.2.1 From 0efaef7d469eb6decdd8cf17057154914a10fd41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 15 Feb 2011 10:51:33 +0200 Subject: Bug#59307 Valgrind: uninitialized value in rw_lock_set_writer_id_and_recursion_flag() rw_lock_create_func(): Initialize lock->writer_thread, so that Valgrind will not complain even when Valgrind instrumentation is not enabled. Flag lock->writer_thread uninitialized, so that Valgrind can complain when it is used uninitialized. rw_lock_set_writer_id_and_recursion_flag(): Revert the bogus Valgrind instrumentation that was pushed in the first attempt to fix this bug. --- storage/innodb_plugin/ChangeLog | 6 ++++++ storage/innodb_plugin/include/sync0rw.ic | 1 - storage/innodb_plugin/sync/sync0rw.c | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'storage') diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 9fa00ac8e6f..1b2747ab012 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,9 @@ +2011-02-15 The InnoDB Team + + * sync/sync0rw.c, innodb_bug59307.test: + Bug#59307 Valgrind: uninitialized value in + rw_lock_set_writer_id_and_recursion_flag() + 2011-02-14 The InnoDB Team * handler/handler0alter.cc: diff --git a/storage/innodb_plugin/include/sync0rw.ic b/storage/innodb_plugin/include/sync0rw.ic index 4110a0a7e0c..7116f1b7c9b 100644 --- a/storage/innodb_plugin/include/sync0rw.ic +++ b/storage/innodb_plugin/include/sync0rw.ic @@ -280,7 +280,6 @@ rw_lock_set_writer_id_and_recursion_flag( local_thread = lock->writer_thread; success = os_compare_and_swap_thread_id( &lock->writer_thread, local_thread, curr_thread); - UNIV_MEM_VALID(&success, sizeof(success)); ut_a(success); lock->recursive = recursive; diff --git a/storage/innodb_plugin/sync/sync0rw.c b/storage/innodb_plugin/sync/sync0rw.c index 00e0324becd..a5da606ad80 100644 --- a/storage/innodb_plugin/sync/sync0rw.c +++ b/storage/innodb_plugin/sync/sync0rw.c @@ -260,6 +260,9 @@ rw_lock_create_func( contains garbage at initialization and cannot be used for recursive x-locking. */ lock->recursive = FALSE; + /* Silence Valgrind when UNIV_DEBUG_VALGRIND is not enabled. */ + memset((void*) &lock->writer_thread, 0, sizeof lock->writer_thread); + UNIV_MEM_INVALID(&lock->writer_thread, sizeof lock->writer_thread); #ifdef UNIV_SYNC_DEBUG UT_LIST_INIT(lock->debug_list); -- cgit v1.2.1 From 9c89cca5e364310928bde10287a56128378c107c Mon Sep 17 00:00:00 2001 From: Dmitry Lenev Date: Tue, 15 Feb 2011 14:03:05 +0300 Subject: Fix for bug#11766714 (former bug @59888) "debug assertion when attempt to create spatial index on char > 31 bytes". Attempt to create spatial index on char field with length greater than 31 byte led to assertion failure on server compiled with safemutex support. The problem occurred in mi_create() function which was called to create a new version of table being altered. This function failed since it detected an attempt to create a spatial key on non-binary column and tried to return an error. On its error path it tried to unlock THR_LOCK_myisam mutex which has not been not locked at this point. Indeed such an incorrect behavior was caught by safemutex wrapper and caused assertion failure. This patch fixes the problem by ensuring that mi_create() doesn't releases THR_LOCK_myisam mutex on error path if it was not acquired. mysql-test/r/gis.result: Added test for bug @59888 "debug assertion when attempt to create spatial index on char > 31 bytes". mysql-test/t/gis.test: Added test for bug @59888 "debug assertion when attempt to create spatial index on char > 31 bytes". storage/myisam/mi_create.c: Changed mi_create() not to release THR_LOCK_myisam mutex on error path if it was not acquired. --- storage/myisam/mi_create.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'storage') diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c index 42bd8e26a94..8c83996cadf 100644 --- a/storage/myisam/mi_create.c +++ b/storage/myisam/mi_create.c @@ -272,7 +272,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, keyseg->type != HA_KEYTYPE_VARBINARY2) { my_errno=HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } } keydef->keysegs+=sp_segs; @@ -281,7 +281,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, min_key_length_skip+=SPLEN*2*SPDIMS; #else my_errno= HA_ERR_UNSUPPORTED; - goto err; + goto err_no_lock; #endif /*HAVE_SPATIAL*/ } else if (keydef->flag & HA_FULLTEXT) @@ -297,7 +297,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, keyseg->type != HA_KEYTYPE_VARTEXT2) { my_errno=HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } if (!(keyseg->flag & HA_BLOB_PART) && (keyseg->type == HA_KEYTYPE_VARTEXT1 || @@ -422,7 +422,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, if (keydef->keysegs > MI_MAX_KEY_SEG) { my_errno=HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } /* key_segs may be 0 in the case when we only want to be able to @@ -447,7 +447,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, length >= MI_MAX_KEY_BUFF) { my_errno=HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } set_if_bigger(max_key_block_length,keydef->block_length); keydef->keylength= (uint16) key_length; @@ -494,7 +494,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, "indexes and/or unique constraints.", MYF(0), name + dirname_length(name)); my_errno= HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } bmove(share.state.header.file_version,(uchar*) myisam_file_magic,4); @@ -827,12 +827,14 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, errpos=0; pthread_mutex_unlock(&THR_LOCK_myisam); if (my_close(file,MYF(0))) - goto err; + goto err_no_lock; my_free((char*) rec_per_key_part,MYF(0)); DBUG_RETURN(0); err: pthread_mutex_unlock(&THR_LOCK_myisam); + +err_no_lock: save_errno=my_errno; switch (errpos) { case 3: -- cgit v1.2.1 From 5a805fe7c4b0ec4907376c4439c677d88b2bb0dd Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Fri, 25 Feb 2011 11:50:18 +0200 Subject: Fix BUG#11798085 - INCORRECT INTEGER TYPES USED IN CALCULATION RESULT IN OVERFLOW Do not assign the result of the difference to a signed variable and checking whether it is negative afterwards because this limits the max diff to 2G on 32 bit systems. E.g. "signed = 3.5G - 1G" would be negative and the code would assume that 3.5G < 1G. Instead compare the two variables directly and assign to unsigned only if we know that the result of the subtraction will be positive. Discussed with: Jimmy and Sunny (via IRC) --- storage/innodb_plugin/buf/buf0buf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index 6bbd5565c58..51a3a393d36 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -1893,16 +1893,19 @@ buf_block_align( /* TODO: protect buf_pool->chunks with a mutex (it will currently remain constant after buf_pool_init()) */ for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) { - lint offs = ptr - chunk->blocks->frame; + ulint offs; - if (UNIV_UNLIKELY(offs < 0)) { + if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) { continue; } + /* else */ + + offs = ptr - chunk->blocks->frame; offs >>= UNIV_PAGE_SIZE_SHIFT; - if (UNIV_LIKELY((ulint) offs < chunk->size)) { + if (UNIV_LIKELY(offs < chunk->size)) { buf_block_t* block = &chunk->blocks[offs]; /* The function buf_chunk_init() invokes -- cgit v1.2.1 From 0f8ae318c7203158e1ea70cbf3a6bba41fd2dde6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 28 Feb 2011 13:51:18 +0200 Subject: Bug #58549 Race condition in buf_LRU_drop_page_hash_for_tablespace() and compressed tables buf_LRU_drop_page_hash_for_tablespace(): after releasing and reacquiring the buffer pool mutex, do not dereference any block descriptor pointer that is not known to be a pointer to an uncompressed page frame (type buf_block_t; state == BUF_BLOCK_FILE_PAGE). Also, defer the acquisition of the block_mutex until it is needed. buf_page_get_gen(): Add mode == BUF_GET_IF_IN_POOL_PEEK for buffer-fixing a block without making it young in the LRU list. buf_page_get_gen(), buf_page_init(), buf_LRU_block_remove_hashed_page(): Set bpage->state = BUF_BLOCK_ZIP_FREE before buf_buddy_free(bpage), so that similar race conditions might be detected a little easier. btr_search_drop_page_hash_when_freed(): Use BUF_GET_IF_IN_POOL_PEEK when dropping the hash indexes. rb://528 approved by Jimmy Yang --- storage/innodb_plugin/ChangeLog | 6 +++ storage/innodb_plugin/btr/btr0sea.c | 4 +- storage/innodb_plugin/buf/buf0buf.c | 32 +++++++++--- storage/innodb_plugin/buf/buf0lru.c | 87 +++++++++++++++++---------------- storage/innodb_plugin/include/buf0buf.h | 4 +- 5 files changed, 81 insertions(+), 52 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 1b2747ab012..1ece3ad1825 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,9 @@ +2011-02-28 The InnoDB Team + + * btr/btr0sea.c, buf/buf0buf.c, buf/buf0lru.c: + Fix Bug#58549 Race condition in buf_LRU_drop_page_hash_for_tablespace() + and compressed tables + 2011-02-15 The InnoDB Team * sync/sync0rw.c, innodb_bug59307.test: diff --git a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c index 9835efcf712..cd0eadbb1b8 100644 --- a/storage/innodb_plugin/btr/btr0sea.c +++ b/storage/innodb_plugin/btr/btr0sea.c @@ -1201,8 +1201,8 @@ btr_search_drop_page_hash_when_freed( having to fear a deadlock. */ block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL, - BUF_GET_IF_IN_POOL, __FILE__, __LINE__, - &mtr); + BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__, + &mtr); /* Because the buffer pool mutex was released by buf_page_peek_if_search_hashed(), it is possible that the block was removed from the buffer pool by another thread diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index 51a3a393d36..14ec7b75911 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -2031,7 +2031,7 @@ buf_page_get_gen( ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ buf_block_t* guess, /*!< in: guessed block or NULL */ ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, - BUF_GET_NO_LATCH */ + BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mini-transaction */ @@ -2047,9 +2047,19 @@ buf_page_get_gen( ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH) || (rw_latch == RW_NO_LATCH)); - ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH)); - ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL) - || (mode == BUF_GET_NO_LATCH)); +#ifdef UNIV_DEBUG + switch (mode) { + case BUF_GET_NO_LATCH: + ut_ad(rw_latch == RW_NO_LATCH); + break; + case BUF_GET: + case BUF_GET_IF_IN_POOL: + case BUF_PEEK_IF_IN_POOL: + break; + default: + ut_error; + } +#endif /* UNIV_DEBUG */ ut_ad(zip_size == fil_space_get_zip_size(space)); ut_ad(ut_is_2pow(zip_size)); #ifndef UNIV_LOG_DEBUG @@ -2091,7 +2101,8 @@ loop2: buf_pool_mutex_exit(); - if (mode == BUF_GET_IF_IN_POOL) { + if (mode == BUF_GET_IF_IN_POOL + || mode == BUF_PEEK_IF_IN_POOL) { return(NULL); } @@ -2130,7 +2141,8 @@ loop2: must_read = buf_block_get_io_fix(block) == BUF_IO_READ; - if (must_read && mode == BUF_GET_IF_IN_POOL) { + if (must_read && (mode == BUF_GET_IF_IN_POOL + || mode == BUF_PEEK_IF_IN_POOL)) { /* The page is only being read to buffer */ buf_pool_mutex_exit(); @@ -2248,6 +2260,7 @@ wait_until_unfixed: mutex_exit(&buf_pool_zip_mutex); buf_pool->n_pend_unzip++; + bpage->state = BUF_BLOCK_ZIP_FREE; buf_buddy_free(bpage, sizeof *bpage); buf_pool_mutex_exit(); @@ -2324,7 +2337,9 @@ wait_until_unfixed: buf_pool_mutex_exit(); - buf_page_set_accessed_make_young(&block->page, access_time); + if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) { + buf_page_set_accessed_make_young(&block->page, access_time); + } #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!block->page.file_page_was_freed); @@ -2377,7 +2392,7 @@ wait_until_unfixed: mtr_memo_push(mtr, block, fix_type); - if (!access_time) { + if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) { /* In the case of a first access, try to apply linear read-ahead */ @@ -2926,6 +2941,7 @@ err_exit: && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) { /* The block was added by some other thread. */ + bpage->state = BUF_BLOCK_ZIP_FREE; buf_buddy_free(bpage, sizeof *bpage); buf_buddy_free(data, zip_size); diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c index 39feb06ff23..a69b2658c51 100644 --- a/storage/innodb_plugin/buf/buf0lru.c +++ b/storage/innodb_plugin/buf/buf0lru.c @@ -246,71 +246,75 @@ buf_LRU_drop_page_hash_for_tablespace( page_arr = ut_malloc(sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE); buf_pool_mutex_enter(); + num_entries = 0; scan_again: - num_entries = 0; bpage = UT_LIST_GET_LAST(buf_pool->LRU); while (bpage != NULL) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); buf_page_t* prev_bpage; + ibool is_fixed; - mutex_enter(block_mutex); prev_bpage = UT_LIST_GET_PREV(LRU, bpage); ut_a(buf_page_in_file(bpage)); if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE || bpage->space != id - || bpage->buf_fix_count > 0 || bpage->io_fix != BUF_IO_NONE) { - /* We leave the fixed pages as is in this scan. - To be dealt with later in the final scan. */ - mutex_exit(block_mutex); - goto next_page; + /* Compressed pages are never hashed. + Skip blocks of other tablespaces. + Skip I/O-fixed blocks (to be dealt with later). */ +next_page: + bpage = prev_bpage; + continue; } - if (((buf_block_t*) bpage)->is_hashed) { + mutex_enter(&((buf_block_t*) bpage)->mutex); + is_fixed = bpage->buf_fix_count > 0 + || !((buf_block_t*) bpage)->is_hashed; + mutex_exit(&((buf_block_t*) bpage)->mutex); - /* Store the offset(i.e.: page_no) in the array - so that we can drop hash index in a batch - later. */ - page_arr[num_entries] = bpage->offset; - mutex_exit(block_mutex); - ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE); - ++num_entries; + if (is_fixed) { + goto next_page; + } - if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { - goto next_page; - } - /* Array full. We release the buf_pool_mutex to - obey the latching order. */ - buf_pool_mutex_exit(); - - buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, - num_entries); - num_entries = 0; - buf_pool_mutex_enter(); - } else { - mutex_exit(block_mutex); + /* Store the page number so that we can drop the hash + index in a batch later. */ + page_arr[num_entries] = bpage->offset; + ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE); + ++num_entries; + + if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { + goto next_page; } -next_page: - /* Note that we may have released the buf_pool mutex - above after reading the prev_bpage during processing - of a page_hash_batch (i.e.: when the array was full). - This means that prev_bpage can change in LRU list. - This is OK because this function is a 'best effort' - to drop as many search hash entries as possible and - it does not guarantee that ALL such entries will be - dropped. */ - bpage = prev_bpage; + /* Array full. We release the buf_pool_mutex to + obey the latching order. */ + buf_pool_mutex_exit(); + buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, + num_entries); + buf_pool_mutex_enter(); + num_entries = 0; + + /* Note that we released the buf_pool mutex above + after reading the prev_bpage during processing of a + page_hash_batch (i.e.: when the array was full). + Because prev_bpage could belong to a compressed-only + block, it may have been relocated, and thus the + pointer cannot be trusted. Because bpage is of type + buf_block_t, it is safe to dereference. + + bpage can change in the LRU list. This is OK because + this function is a 'best effort' to drop as many + search hash entries as possible and it does not + guarantee that ALL such entries will be dropped. */ /* If, however, bpage has been removed from LRU list to the free list then we should restart the scan. bpage->state is protected by buf_pool mutex. */ - if (bpage && !buf_page_in_file(bpage)) { - ut_a(num_entries == 0); + if (bpage + && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { goto scan_again; } } @@ -1799,6 +1803,7 @@ buf_LRU_block_remove_hashed_page( buf_pool_mutex_exit_forbid(); buf_buddy_free(bpage->zip.data, page_zip_get_size(&bpage->zip)); + bpage->state = BUF_BLOCK_ZIP_FREE; buf_buddy_free(bpage, sizeof(*bpage)); buf_pool_mutex_exit_allow(); UNIV_MEM_UNDESC(bpage); diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h index a16de67aa3a..05dead5ac9e 100644 --- a/storage/innodb_plugin/include/buf0buf.h +++ b/storage/innodb_plugin/include/buf0buf.h @@ -41,6 +41,8 @@ Created 11/5/1995 Heikki Tuuri /* @{ */ #define BUF_GET 10 /*!< get always */ #define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */ +#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make + the block young in the LRU list */ #define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but set no latch; we have separated this case, because @@ -284,7 +286,7 @@ buf_page_get_gen( ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ buf_block_t* guess, /*!< in: guessed block or NULL */ ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, - BUF_GET_NO_LATCH */ + BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mini-transaction */ -- cgit v1.2.1 From 4f4b404e59ec941a3128caaa753f26a0ad8bef88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 15 Mar 2011 12:01:02 +0200 Subject: Bug#11849231 inflateInit() invoked without initializing all memory According to the zlib documentation, next_in and avail_in must be initialized before invoking inflateInit or inflateInit2. Furthermore, the zalloc function must clear the allocated memory. btr_copy_zblob_prefix(): Replace the d_stream parameter with buf,len and return the copied length. page_zip_decompress(): Invoke inflateInit2 a little later. page_zip_zalloc(): Rename from page_zip_alloc(). Invoke mem_heap_zalloc() instead of mem_heap_alloc(). rb:619 approved by Jimmy Yang --- storage/innodb_plugin/ChangeLog | 5 +++ storage/innodb_plugin/btr/btr0cur.c | 76 ++++++++++++++++++----------------- storage/innodb_plugin/page/page0zip.c | 17 ++++---- 3 files changed, 53 insertions(+), 45 deletions(-) (limited to 'storage') diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index fdd29908192..7c82cd9c27f 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,8 @@ +2011-03-15 The InnoDB Team + + * btr/btr0cur.c, page/page0zip.c: + Fix Bug#11849231 inflateInit() invoked without initializing all memory + 2011-02-28 The InnoDB Team * btr/btr0sea.c, buf/buf0buf.c, buf/buf0lru.c: diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 86d77c79e7b..d7b5ed0d135 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -4627,27 +4627,45 @@ btr_copy_blob_prefix( /*******************************************************************//** Copies the prefix of a compressed BLOB. The clustered index record -that points to this BLOB must be protected by a lock or a page latch. */ +that points to this BLOB must be protected by a lock or a page latch. +@return number of bytes written to buf */ static -void +ulint btr_copy_zblob_prefix( /*==================*/ - z_stream* d_stream,/*!< in/out: the decompressing stream */ + byte* buf, /*!< out: the externally stored part of + the field, or a prefix of it */ + ulint len, /*!< in: length of buf, in bytes */ ulint zip_size,/*!< in: compressed BLOB page size */ ulint space_id,/*!< in: space id of the BLOB pages */ ulint page_no,/*!< in: page number of the first BLOB page */ ulint offset) /*!< in: offset on the first BLOB page */ { - ulint page_type = FIL_PAGE_TYPE_ZBLOB; + ulint page_type = FIL_PAGE_TYPE_ZBLOB; + mem_heap_t* heap; + int err; + z_stream d_stream; + + d_stream.next_out = buf; + d_stream.avail_out = len; + d_stream.next_in = Z_NULL; + d_stream.avail_in = 0; + + /* Zlib inflate needs 32 kilobytes for the default + window size, plus a few kilobytes for small objects. */ + heap = mem_heap_create(40000); + page_zip_set_alloc(&d_stream, heap); ut_ad(ut_is_2pow(zip_size)); ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE); ut_ad(zip_size <= UNIV_PAGE_SIZE); ut_ad(space_id); + err = inflateInit(&d_stream); + ut_a(err == Z_OK); + for (;;) { buf_page_t* bpage; - int err; ulint next_page_no; /* There is no latch on bpage directly. Instead, @@ -4663,7 +4681,7 @@ btr_copy_zblob_prefix( " compressed BLOB" " page %lu space %lu\n", (ulong) page_no, (ulong) space_id); - return; + goto func_exit; } if (UNIV_UNLIKELY @@ -4689,13 +4707,13 @@ btr_copy_zblob_prefix( offset += 4; } - d_stream->next_in = bpage->zip.data + offset; - d_stream->avail_in = zip_size - offset; + d_stream.next_in = bpage->zip.data + offset; + d_stream.avail_in = zip_size - offset; - err = inflate(d_stream, Z_NO_FLUSH); + err = inflate(&d_stream, Z_NO_FLUSH); switch (err) { case Z_OK: - if (!d_stream->avail_out) { + if (!d_stream.avail_out) { goto end_of_blob; } break; @@ -4712,13 +4730,13 @@ inflate_error: " compressed BLOB" " page %lu space %lu returned %d (%s)\n", (ulong) page_no, (ulong) space_id, - err, d_stream->msg); + err, d_stream.msg); case Z_BUF_ERROR: goto end_of_blob; } if (next_page_no == FIL_NULL) { - if (!d_stream->avail_in) { + if (!d_stream.avail_in) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: unexpected end of" @@ -4727,7 +4745,7 @@ inflate_error: (ulong) page_no, (ulong) space_id); } else { - err = inflate(d_stream, Z_FINISH); + err = inflate(&d_stream, Z_FINISH); switch (err) { case Z_STREAM_END: case Z_BUF_ERROR: @@ -4739,7 +4757,7 @@ inflate_error: end_of_blob: buf_page_release_zip(bpage); - return; + goto func_exit; } buf_page_release_zip(bpage); @@ -4751,6 +4769,12 @@ end_of_blob: offset = FIL_PAGE_NEXT; page_type = FIL_PAGE_TYPE_ZBLOB2; } + +func_exit: + inflateEnd(&d_stream); + mem_heap_free(heap); + UNIV_MEM_ASSERT_RW(buf, d_stream.total_out); + return(d_stream.total_out); } /*******************************************************************//** @@ -4776,28 +4800,8 @@ btr_copy_externally_stored_field_prefix_low( } if (UNIV_UNLIKELY(zip_size)) { - int err; - z_stream d_stream; - mem_heap_t* heap; - - /* Zlib inflate needs 32 kilobytes for the default - window size, plus a few kilobytes for small objects. */ - heap = mem_heap_create(40000); - page_zip_set_alloc(&d_stream, heap); - - err = inflateInit(&d_stream); - ut_a(err == Z_OK); - - d_stream.next_out = buf; - d_stream.avail_out = len; - d_stream.avail_in = 0; - - btr_copy_zblob_prefix(&d_stream, zip_size, - space_id, page_no, offset); - inflateEnd(&d_stream); - mem_heap_free(heap); - UNIV_MEM_ASSERT_RW(buf, d_stream.total_out); - return(d_stream.total_out); + return(btr_copy_zblob_prefix(buf, len, zip_size, + space_id, page_no, offset)); } else { return(btr_copy_blob_prefix(buf, len, space_id, page_no, offset)); diff --git a/storage/innodb_plugin/page/page0zip.c b/storage/innodb_plugin/page/page0zip.c index a1dd4177ba8..6e866b3f016 100644 --- a/storage/innodb_plugin/page/page0zip.c +++ b/storage/innodb_plugin/page/page0zip.c @@ -653,13 +653,13 @@ page_zip_dir_encode( Allocate memory for zlib. */ static void* -page_zip_malloc( +page_zip_zalloc( /*============*/ void* opaque, /*!< in/out: memory heap */ uInt items, /*!< in: number of items to allocate */ uInt size) /*!< in: size of an item in bytes */ { - return(mem_heap_alloc(opaque, items * size)); + return(mem_heap_zalloc(opaque, items * size)); } /**********************************************************************//** @@ -684,7 +684,7 @@ page_zip_set_alloc( { z_stream* strm = stream; - strm->zalloc = page_zip_malloc; + strm->zalloc = page_zip_zalloc; strm->zfree = page_zip_free; strm->opaque = heap; } @@ -2912,19 +2912,18 @@ zlib_error: page_zip_set_alloc(&d_stream, heap); - if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT) - != Z_OK)) { - ut_error; - } - d_stream.next_in = page_zip->data + PAGE_DATA; /* Subtract the space reserved for the page header and the end marker of the modification log. */ d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1); - d_stream.next_out = page + PAGE_ZIP_START; d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START; + if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT) + != Z_OK)) { + ut_error; + } + /* Decode the zlib header and the index information. */ if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) { -- cgit v1.2.1 From e0887df8e1127c0f1410b9d4ad61647cb5f93be2 Mon Sep 17 00:00:00 2001 From: Mattias Jonsson Date: Fri, 25 Mar 2011 12:36:02 +0100 Subject: Bug#11766249 bug#59316: PARTITIONING AND INDEX_MERGE MEMORY LEAK When executing row-ordered-retrieval index merge, the handler was cloned, but it used the wrong memory root, so instead of allocating memory on the thread/query's mem_root, it used the table's mem_root, resulting in non released memory in the table object, and was not freed until the table was closed. Solution was to ensure that memory used during cloning of a handler was allocated from the correct memory root. This was implemented by fixing handler::clone() to also take a name argument, so it can be used with partitioning. And in ha_partition only allocate the ha_partition's ref, and call the original ha_partition partitions clone() and set at cloned partitions. Fix of .bzrignore on Windows with VS 2010 --- storage/heap/ha_heap.cc | 4 ++-- storage/heap/ha_heap.h | 2 +- storage/myisam/ha_myisam.cc | 5 +++-- storage/myisam/ha_myisam.h | 2 +- storage/myisammrg/ha_myisammrg.cc | 9 ++++----- storage/myisammrg/ha_myisammrg.h | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'storage') diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index fb7c13e4e41..9f29dee2030 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -142,11 +142,11 @@ int ha_heap::close(void) DESCRIPTION Do same as default implementation but use file->s->name instead of table->s->path. This is needed by Windows where the clone() call sees - '/'-delimited path in table->s->path, while ha_peap::open() was called + '/'-delimited path in table->s->path, while ha_heap::open() was called with '\'-delimited path. */ -handler *ha_heap::clone(MEM_ROOT *mem_root) +handler *ha_heap::clone(const char *name, MEM_ROOT *mem_root) { handler *new_handler= get_new_handler(table->s, mem_root, table->s->db_type()); if (new_handler && !new_handler->ha_open(table, file->s->name, table->db_stat, diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h index 22722129f4c..69751101645 100644 --- a/storage/heap/ha_heap.h +++ b/storage/heap/ha_heap.h @@ -34,7 +34,7 @@ class ha_heap: public handler public: ha_heap(handlerton *hton, TABLE_SHARE *table); ~ha_heap() {} - handler *clone(MEM_ROOT *mem_root); + handler *clone(const char *name, MEM_ROOT *mem_root); const char *table_type() const { return (table->in_use->variables.sql_mode & MODE_MYSQL323) ? diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 2650cc850a8..e5b657a4630 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -552,9 +552,10 @@ ha_myisam::ha_myisam(handlerton *hton, TABLE_SHARE *table_arg) can_enable_indexes(1) {} -handler *ha_myisam::clone(MEM_ROOT *mem_root) +handler *ha_myisam::clone(const char *name, MEM_ROOT *mem_root) { - ha_myisam *new_handler= static_cast (handler::clone(mem_root)); + ha_myisam *new_handler= static_cast (handler::clone(name, + mem_root)); if (new_handler) new_handler->file->state= file->state; return new_handler; diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index 55a5eac92de..54801bfd0b8 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -44,7 +44,7 @@ class ha_myisam: public handler public: ha_myisam(handlerton *hton, TABLE_SHARE *table_arg); ~ha_myisam() {} - handler *clone(MEM_ROOT *mem_root); + handler *clone(const char *name, MEM_ROOT *mem_root); const char *table_type() const { return "MyISAM"; } const char *index_type(uint key_number); const char **bas_ext() const; diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc index 4c8d45d1fe1..3beabd83512 100644 --- a/storage/myisammrg/ha_myisammrg.cc +++ b/storage/myisammrg/ha_myisammrg.cc @@ -459,8 +459,7 @@ int ha_myisammrg::open(const char *name, int mode __attribute__((unused)), problem because all locking is handled by the original MERGE table from which this is cloned of. */ - if (!(file= myrg_open(table->s->normalized_path.str, table->db_stat, - HA_OPEN_IGNORE_IF_LOCKED))) + if (!(file= myrg_open(name, table->db_stat, HA_OPEN_IGNORE_IF_LOCKED))) { DBUG_PRINT("error", ("my_errno %d", my_errno)); DBUG_RETURN(my_errno ? my_errno : -1); @@ -484,7 +483,7 @@ int ha_myisammrg::open(const char *name, int mode __attribute__((unused)), @return A cloned handler instance. */ -handler *ha_myisammrg::clone(MEM_ROOT *mem_root) +handler *ha_myisammrg::clone(const char *name, MEM_ROOT *mem_root) { MYRG_TABLE *u_table,*newu_table; ha_myisammrg *new_handler= @@ -505,8 +504,8 @@ handler *ha_myisammrg::clone(MEM_ROOT *mem_root) return NULL; } - if (new_handler->ha_open(table, table->s->normalized_path.str, table->db_stat, - HA_OPEN_IGNORE_IF_LOCKED)) + if (new_handler->ha_open(table, name, table->db_stat, + HA_OPEN_IGNORE_IF_LOCKED)) { delete new_handler; return NULL; diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h index 790aa15e90a..a1272c633a1 100644 --- a/storage/myisammrg/ha_myisammrg.h +++ b/storage/myisammrg/ha_myisammrg.h @@ -62,7 +62,7 @@ class ha_myisammrg: public handler int open(const char *name, int mode, uint test_if_locked); int attach_children(void); int detach_children(void); - virtual handler *clone(MEM_ROOT *mem_root); + virtual handler *clone(const char *name, MEM_ROOT *mem_root); int close(void); int write_row(uchar * buf); int update_row(const uchar * old_data, uchar * new_data); -- cgit v1.2.1 From 08d598fb98e0f7e5c34f47c6510577a375d0fab2 Mon Sep 17 00:00:00 2001 From: Vasil Dimov Date: Mon, 28 Mar 2011 11:34:12 +0300 Subject: Store the '\0'-terminated query in row->trx_query This problem was introduced in marko.makela@oracle.com-20100514130815-ym7j7cfu88ro6km4 and is probably the reason for the following valgrind warning: from http://bugs.mysql.com/52691 , http://bugs.mysql.com/file.php?id=16880 : Version: '5.6.3-m5-valgrind-max-debug' socket: '/tmp/mysql.sock' port: 3306 Source distribution ==14947== Thread 18: ==14947== Conditional jump or move depends on uninitialised value(s) ==14947== at 0x4A06318: __GI_strlen (mc_replace_strmem.c:284) ==14947== by 0x9F3D7A: fill_innodb_trx_from_cache(trx_i_s_cache_struct*, THD*, TABLE*) (i_s.cc:591) ==14947== by 0x9F4D7D: trx_i_s_common_fill_table(THD*, TABLE_LIST*, Item*) (i_s.cc:1238) ==14947== by 0x7689F3: get_schema_tables_result(JOIN*, enum_schema_table_state) (sql_show.cc:6745) ==14947== by 0x715A75: JOIN::exec() (sql_select.cc:2861) ==14947== by 0x7185BD: mysql_select(THD*, Item***, TABLE_LIST*, unsigned int, List&, Item*, unsigned int, st_order*, st_order*, Item*, st_order*, unsigned long long, select_result*, st_select_lex_unit*, st_select_lex*) (sql_select.cc:3609) ==14947== by 0x70E823: handle_select(THD*, LEX*, select_result*, unsigned long) (sql_select.cc:319) ==14947== by 0x6F2305: execute_sqlcom_select(THD*, TABLE_LIST*) (sql_parse.cc:4557) ==14947== by 0x6EAED4: mysql_execute_command(THD*) (sql_parse.cc:2135) ==14947== by 0x6F44C9: mysql_parse(THD*, char*, unsigned int, Parser_state*) (sql_parse.cc:5597) ==14947== by 0x6E864B: dispatch_command(enum_server_command, THD*, char*, unsigned int) (sql_parse.cc:1093) ==14947== by 0x6E785E: do_command(THD*) (sql_parse.cc:815) ==14947== by 0x6C18DD: do_handle_one_connection(THD*) (sql_connect.cc:771) ==14947== by 0x6C146E: handle_one_connection (sql_connect.cc:707) ==14947== by 0x30E1807760: start_thread (pthread_create.c:301) ==14947== by 0x35EA670F: ??? ==14947== Uninitialised value was created by a heap allocation ==14947== at 0x4A0515D: malloc (vg_replace_malloc.c:195) ==14947== by 0xB4B948: mem_area_alloc (mem0pool.c:385) ==14947== by 0xB4A27C: mem_heap_create_block (mem0mem.c:333) ==14947== by 0xB4A530: mem_heap_add_block (mem0mem.c:446) ==14947== by 0xB0D2A4: mem_heap_alloc (mem0mem.ic:186) ==14947== by 0xB0D9C2: ha_storage_put_memlim (ha0storage.c:118) ==14947== by 0xA479D8: fill_trx_row (trx0i_s.c:521) ==14947== by 0xA490E9: fetch_data_into_cache (trx0i_s.c:1319) ==14947== by 0xA491BA: trx_i_s_possibly_fetch_data_into_cache (trx0i_s.c:1352) ==14947== by 0x9F4CE7: trx_i_s_common_fill_table(THD*, TABLE_LIST*, Item*) (i_s.cc:1221) ==14947== by 0x7689F3: get_schema_tables_result(JOIN*, enum_schema_table_state) (sql_show.cc:6745) ==14947== by 0x715A75: JOIN::exec() (sql_select.cc:2861) ==14947== by 0x7185BD: mysql_select(THD*, Item***, TABLE_LIST*, unsigned int, List&, Item*, unsigned int, st_order*, st_order*, Item*, st_order*, unsigned long long, select_result*, st_select_lex_unit*, st_select_lex*) (sql_select.cc:3609) ==14947== by 0x70E823: handle_select(THD*, LEX*, select_result*, unsigned long) (sql_select.cc:319) ==14947== by 0x6F2305: execute_sqlcom_select(THD*, TABLE_LIST*) (sql_parse.cc:4557) ==14947== by 0x6EAED4: mysql_execute_command(THD*) (sql_parse.cc:2135) ==14947== by 0x6F44C9: mysql_parse(THD*, char*, unsigned int, Parser_state*) (sql_parse.cc:5597) ==14947== by 0x6E864B: dispatch_command(enum_server_command, THD*, char*, unsigned int) (sql_parse.cc:1093) ==14947== by 0x6E785E: do_command(THD*) (sql_parse.cc:815) ==14947== by 0x6C18DD: do_handle_one_connection(THD*) (sql_connect.cc:771) ==14947== by 0x6C146E: handle_one_connection (sql_connect.cc:707) ==14947== by 0x30E1807760: start_thread (pthread_create.c:301) ==14947== by 0x35EA670F: ??? (gdb) bt #0 0x0000000004a06318 in _vgrZU_libcZdsoZa___GI_strlen (str=0x3026bfa0 "insert into `blobtest` set `data`='pkefxxpkalpabzgrczlxefkreqljeqbvzrcnhvhsjsfnvxzjsltfuincffigdkmhvvcmnseluzgbtedrfmxvnrdmzesbinjgwvharkpgjplrlnqudfidbqwgbykupycxzyikzqincnsjrxgncqzlgyqwjdbjulztgsffxpjgymsnntdibvklwqylmwhsmdskmllxuwafabdjnwlyofknwuixiyrgnplmerfdewgizkdhznitesfqepsqbbwkdepkmjoseyxjofmmjaqdipwopfrwidmhqbtovdslvayxcnpewzhppeetblccppniamezibuoinvlxkafpcmozawtplfpepxwlwhymsuraezcwvjqzwogsozodlsfzjiyrcaljjhqwdrcjawvelhefzzaexvcbyorlcyupqwgjuamiqpiputtndjwcsuyzdfhuxswuowhrzdvriwrxqmcqthvzzzvivbabbnhdbtcfdtgssvmirrcddnytnctcvqplwytxxzxelldhwahalzxvgynaiwjyezhxqhlsqudngekocfvlbqprxqhyhwbaomgqiwkpfguohuvlnhtrsszgacxhhzeppyqwfwabiqzgyzkperiidyunrykopysvlcxwhrcboetjltawdjergalsfvaxncmzoznryumrjmncvhvxqvqhhbznnifkguuiffmlrbmgwtzvnuwlaguixqadkupfhasbbxnwkrvsfhrqanfmvjtzfqodtutkjlxfcogtsjywrdgmzgszjtsmimaelsveayqrwviqwwefeziuaqsqpauxpnzhaxjtkdfvvodniwezskbxfxszyniyzkzxngcfwgjlyrlskmrzxqnptwlilsxybuguafxxkvryyjrnkhhcmxuusitaflaiuxjhyfnzkahlgmaszujqmfdhyppdnpweqanmvzgjfyzjolbmprhnuuxextcaxzicfvsuochprmlf"...) at mc_replace_strmem.c:284 #1 0x00000000009f3d7b in fill_innodb_trx_from_cache (cache=0x1462440, thd=0x2a495000, table=0x2a422500) at /home/sbester/build/bzr/mysql-trunk/storage/innobase/handler/i_s.cc:591 #2 0x00000000009f4d7e in trx_i_s_common_fill_table (thd=0x2a495000, tables=0x2a4c3ec0) at /home/sbester/build/bzr/mysql-trunk/storage/innobase/handler/i_s.cc:1238 #3 0x00000000007689f4 in get_schema_tables_result (join=0x30f90c40, executed_place=PROCESSED_BY_JOIN_EXEC) at /home/sbester/build/bzr/mysql-trunk/sql/sql_show.cc:6745 #4 0x0000000000715a76 in JOIN::exec (this=0x30f90c40) at /home/sbester/build/bzr/mysql-trunk/sql/sql_select.cc:2861 #5 0x00000000007185be in mysql_select (thd=0x2a495000, rref_pointer_array=0x2a497590, tables=0x2a4c3ec0, wild_num=1, fields=..., conds=0x0, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2684619520, result=0x30319720, unit=0x2a496d28, select_lex=0x2a497378) at /home/sbester/build/bzr/mysql-trunk/sql/sql_select.cc:3609 #6 0x000000000070e824 in handle_select (thd=0x2a495000, lex=0x2a496c78, result=0x30319720, setup_tables_done_option=0) at /home/sbester/build/bzr/mysql-trunk/sql/sql_select.cc:319 #7 0x00000000006f2306 in execute_sqlcom_select (thd=0x2a495000, all_tables=0x2a4c3ec0) at /home/sbester/build/bzr/mysql-trunk/sql/sql_parse.cc:4557 #8 0x00000000006eaed5 in mysql_execute_command (thd=0x2a495000) at /home/sbester/build/bzr/mysql-trunk/sql/sql_parse.cc:2135 #9 0x00000000006f44ca in mysql_parse (thd=0x2a495000, rawbuf=0x30d80060 "select * from innodb_trx", length=24, parser_state=0x35ea5540) at /home/sbester/build/bzr/mysql-trunk/sql/sql_parse.cc:5597 #10 0x00000000006e864c in dispatch_command (command=COM_QUERY, thd=0x2a495000, packet=0x30bb4e31 "select * from innodb_trx", packet_length=24) at /home/sbester/build/bzr/mysql-trunk/sql/sql_parse.cc:1093 #11 0x00000000006e785f in do_command (thd=0x2a495000) at /home/sbester/build/bzr/mysql-trunk/sql/sql_parse.cc:815 #12 0x00000000006c18de in do_handle_one_connection (thd_arg=0x2a495000) at /home/sbester/build/bzr/mysql-trunk/sql/sql_connect.cc:771 #13 0x00000000006c146f in handle_one_connection (arg=0x2a495000) at /home/sbester/build/bzr/mysql-trunk/sql/sql_connect.cc:707 #14 0x00000030e1807761 in start_thread (arg=0x35ea6710) at pthread_create.c:301 #15 0x00000030e14e14ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115 (gdb) frame 1 #1 0x00000000009f3d7b in fill_innodb_trx_from_cache (cache=0x1462440, thd=0x2a495000, table=0x2a422500) at /home/sbester/build/bzr/mysql-trunk/storage/innobase/handler/i_s.cc:591 591 row->trx_query_cs); (gdb) list 586 if (row->trx_query) { 587 /* store will do appropriate character set 588 conversion check */ 589 fields[IDX_TRX_QUERY]->store( 590 row->trx_query, strlen(row->trx_query), 591 row->trx_query_cs); 592 fields[IDX_TRX_QUERY]->set_notnull(); 593 } else { 594 fields[IDX_TRX_QUERY]->set_null(); 595 } --- storage/innodb_plugin/trx/trx0i_s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'storage') diff --git a/storage/innodb_plugin/trx/trx0i_s.c b/storage/innodb_plugin/trx/trx0i_s.c index 267e91db22e..53f4dcb0bef 100644 --- a/storage/innodb_plugin/trx/trx0i_s.c +++ b/storage/innodb_plugin/trx/trx0i_s.c @@ -508,7 +508,7 @@ fill_trx_row( query[stmt_len] = '\0'; row->trx_query = ha_storage_put_memlim( - cache->storage, stmt, stmt_len + 1, + cache->storage, query, stmt_len + 1, MAX_ALLOWED_FOR_STORAGE(cache)); row->trx_query_cs = innobase_get_charset(trx->mysql_thd); -- cgit v1.2.1 From 9ff72a1acfffe95cd5e6d9e06c61c5ee9b0000e0 Mon Sep 17 00:00:00 2001 From: Magne Mahre Date: Mon, 28 Mar 2011 10:47:30 +0200 Subject: Bug#11900714 REMOVE LGPL LICENSED FILES IN MYSQL 5.1 The LGPL license is used in some legacy code, and to adhere to current licensing polity, we remove those files that are no longer used, and reorganize the remaining LGPL code so it will be GPL licensed from now on. Note: This patch only removed LGPL licensed files in MySQL 5.1, and is the second of a set of patches to remove LGPL from all trees. (See Bug# 11840513 for details) --- storage/myisam/ft_stopwords.c | 2 +- storage/myisam/mi_check.c | 87 +++++++++++++++++++++++++++++++++++++++++++ storage/myisam/mi_test1.c | 1 + storage/myisam/mi_write.c | 1 + storage/myisam/myisamdef.h | 2 + storage/myisam/sp_test.c | 1 + 6 files changed, 93 insertions(+), 1 deletion(-) (limited to 'storage') diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c index 9838b15af34..dbab71f4381 100644 --- a/storage/myisam/ft_stopwords.c +++ b/storage/myisam/ft_stopwords.c @@ -16,7 +16,7 @@ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ #include "ftdefs.h" -#include "my_handler.h" +#include "my_compare.h" typedef struct st_ft_stopwords { diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index 935465e7edf..7bc26729e03 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -85,6 +85,7 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, uint buffer_length); static ha_checksum mi_byte_checksum(const uchar *buf, uint length); static void set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share); +static HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a); void myisamchk_init(MI_CHECK *param) { @@ -4739,3 +4740,89 @@ set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share) share->delete_record=tmp.delete_record; } } + +/* + Find the first NULL value in index-suffix values tuple + + SYNOPSIS + ha_find_null() + keyseg Array of keyparts for key suffix + a Key suffix value tuple + + DESCRIPTION + Find the first NULL value in index-suffix values tuple. + TODO Consider optimizing this fuction or its use so we don't search for + NULL values in completely NOT NULL index suffixes. + + RETURN + First key part that has NULL as value in values tuple, or the last key part + (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs. +*/ + +static HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) +{ + for (; (enum ha_base_keytype) keyseg->type != HA_KEYTYPE_END; keyseg++) + { + uchar *end; + if (keyseg->null_bit) + { + if (!*a++) + return keyseg; + } + end= a+ keyseg->length; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_BIT: + if (keyseg->flag & HA_SPACE_PACK) + { + int a_length; + get_key_length(a_length, a); + a += a_length; + break; + } + else + a= end; + break; + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + { + int a_length; + get_key_length(a_length, a); + a+= a_length; + break; + } + case HA_KEYTYPE_NUM: + if (keyseg->flag & HA_SPACE_PACK) + { + int alength= *a++; + end= a+alength; + } + a= end; + break; + case HA_KEYTYPE_INT8: + case HA_KEYTYPE_SHORT_INT: + case HA_KEYTYPE_USHORT_INT: + case HA_KEYTYPE_LONG_INT: + case HA_KEYTYPE_ULONG_INT: + case HA_KEYTYPE_INT24: + case HA_KEYTYPE_UINT24: +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + case HA_KEYTYPE_ULONGLONG: +#endif + case HA_KEYTYPE_FLOAT: + case HA_KEYTYPE_DOUBLE: + a= end; + break; + case HA_KEYTYPE_END: /* purecov: inspected */ + /* keep compiler happy */ + DBUG_ASSERT(0); + break; + } + } + return keyseg; +} diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c index 363b024737a..142ee9b4909 100644 --- a/storage/myisam/mi_test1.c +++ b/storage/myisam/mi_test1.c @@ -16,6 +16,7 @@ /* Testing of the basic functions of a MyISAM table */ #include "myisam.h" +#include "myisamdef.h" #include #include diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c index 72a4e006cc6..3c8ebe5dbd8 100644 --- a/storage/myisam/mi_write.c +++ b/storage/myisam/mi_write.c @@ -17,6 +17,7 @@ #include "fulltext.h" #include "rt_index.h" +#include "my_compare.h" #define MAX_POINTER_LENGTH 8 diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h index 962155e884c..c91601f6503 100644 --- a/storage/myisam/myisamdef.h +++ b/storage/myisam/myisamdef.h @@ -424,6 +424,8 @@ typedef struct st_mi_sort_param #define get_pack_length(length) ((length) >= 255 ? 3 : 1) +#define portable_sizeof_char_ptr 8 + #define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ #define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */ #define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2) diff --git a/storage/myisam/sp_test.c b/storage/myisam/sp_test.c index f572c7ab19b..7a30a742fd6 100644 --- a/storage/myisam/sp_test.c +++ b/storage/myisam/sp_test.c @@ -17,6 +17,7 @@ /* Written by Alex Barkov, who has a shared copyright to this code */ #include "myisam.h" +#include "myisamdef.h" #ifdef HAVE_SPATIAL #include "sp_defs.h" -- cgit v1.2.1 From ddec6ecdd8521d6fd6e4c26498e7bd752fd3eddf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 30 Mar 2011 14:25:58 +0300 Subject: Bug#11877216 InnoDB too eager to commit suicide on a busy server sync_array_print_long_waits(): Return the longest waiting thread ID and the longest waited-for lock. Only if those remain unchanged between calls in srv_error_monitor_thread(), increment fatal_cnt. Otherwise, reset fatal_cnt. Background: There is a built-in watchdog in InnoDB whose purpose is to kill the server when some thread is stuck waiting for a mutex or rw-lock. Before this fix, the logic was flawed. The function sync_array_print_long_waits() returns TRUE if it finds a lock wait that exceeds 10 minutes (srv_fatal_semaphore_wait_threshold). The function srv_error_monitor_thread() will kill the server if this happens 10 times in a row (fatal_cnt reaches 10), checked every 30 seconds. This is wrong, because this situation does not mean that the server is hung. If the server is very busy for a little over 15 minutes, it will be killed. Consider this example. Thread T1 is waiting for mutex M. Some time later, threads T2..Tn start waiting for the same mutex M. If T1 keeps waiting for 600 seconds, fatal_cnt will be incremented to 1. So far, so good. Now, if M is granted to T1, the server was obviously not stuck. But, T2..Tn keeps waiting, and their wait time will be longer than 600 seconds. If 5 minutes later, some Tn has still been waiting for more than 10 minutes for the mutex M, the server can be killed, even though it is not stuck. rb:622 approved by Jimmy Yang --- storage/innobase/include/sync0arr.h | 11 ++++++---- storage/innobase/srv/srv0srv.c | 19 ++++++++++++----- storage/innobase/sync/sync0arr.c | 36 ++++++++++++++++++++++++-------- storage/innodb_plugin/ChangeLog | 5 +++++ storage/innodb_plugin/include/sync0arr.h | 7 +++++-- storage/innodb_plugin/srv/srv0srv.c | 11 +++++++++- storage/innodb_plugin/sync/sync0arr.c | 32 +++++++++++++++++++++------- 7 files changed, 93 insertions(+), 28 deletions(-) (limited to 'storage') diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h index fae26b7a63e..ec48059dbcb 100644 --- a/storage/innobase/include/sync0arr.h +++ b/storage/innobase/include/sync0arr.h @@ -93,10 +93,13 @@ sync_arr_wake_threads_if_sema_free(void); Prints warnings of long semaphore waits to stderr. */ ibool -sync_array_print_long_waits(void); -/*=============================*/ - /* out: TRUE if fatal semaphore wait threshold - was exceeded */ +sync_array_print_long_waits( +/*========================*/ + /* out: TRUE if fatal semaphore wait threshold + was exceeded */ + os_thread_id_t* waiter, /* out: longest waiting thread */ + const void** sema) /* out: longest-waited-for semaphore */ + __attribute__((nonnull)); /************************************************************************ Validates the integrity of the wait array. Checks that the number of reserved cells equals the count variable. */ diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 9c34e73109c..3f6f1982992 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -2180,9 +2180,15 @@ srv_error_monitor_thread( os_thread_create */ { /* number of successive fatal timeouts observed */ - ulint fatal_cnt = 0; - dulint old_lsn; - dulint new_lsn; + ulint fatal_cnt = 0; + dulint old_lsn; + dulint new_lsn; + /* longest waiting thread for a semaphore */ + os_thread_id_t waiter = os_thread_get_curr_id(); + os_thread_id_t old_waiter = waiter; + /* the semaphore that is being waited for */ + const void* sema = NULL; + const void* old_sema = NULL; old_lsn = srv_start_lsn; @@ -2224,10 +2230,11 @@ loop: /* In case mutex_exit is not a memory barrier, it is theoretically possible some threads are left waiting though the semaphore is already released. Wake up those threads: */ - + sync_arr_wake_threads_if_sema_free(); - if (sync_array_print_long_waits()) { + if (sync_array_print_long_waits(&waiter, &sema) + && sema == old_sema && os_thread_eq(waiter, old_waiter)) { fatal_cnt++; if (fatal_cnt > 10) { @@ -2242,6 +2249,8 @@ loop: } } else { fatal_cnt = 0; + old_waiter = waiter; + old_sema = sema; } /* Flush stderr so that a database user gets the output diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c index 41d3492c8c9..93a7398f252 100644 --- a/storage/innobase/sync/sync0arr.c +++ b/storage/innobase/sync/sync0arr.c @@ -916,10 +916,12 @@ sync_arr_wake_threads_if_sema_free(void) Prints warnings of long semaphore waits to stderr. */ ibool -sync_array_print_long_waits(void) -/*=============================*/ - /* out: TRUE if fatal semaphore wait threshold - was exceeded */ +sync_array_print_long_waits( +/*========================*/ + /* out: TRUE if fatal semaphore wait threshold + was exceeded */ + os_thread_id_t* waiter, /* out: longest waiting thread */ + const void** sema) /* out: longest-waited-for semaphore */ { sync_cell_t* cell; ibool old_val; @@ -927,24 +929,40 @@ sync_array_print_long_waits(void) ulint i; ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; ibool fatal = FALSE; + double longest_diff = 0; for (i = 0; i < sync_primary_wait_array->n_cells; i++) { + double diff; + void* wait_object; + cell = sync_array_get_nth_cell(sync_primary_wait_array, i); - if (cell->wait_object != NULL && cell->waiting - && difftime(time(NULL), cell->reservation_time) > 240) { + wait_object = cell->wait_object; + + if (wait_object == NULL || !cell->waiting) { + + continue; + } + + diff = difftime(time(NULL), cell->reservation_time); + + if (diff > 240) { fputs("InnoDB: Warning: a long semaphore wait:\n", stderr); sync_array_cell_print(stderr, cell); noticed = TRUE; } - if (cell->wait_object != NULL && cell->waiting - && difftime(time(NULL), cell->reservation_time) - > fatal_timeout) { + if (diff > fatal_timeout) { fatal = TRUE; } + + if (diff > longest_diff) { + longest_diff = diff; + *sema = wait_object; + *waiter = cell->thread; + } } if (noticed) { diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 7c82cd9c27f..100cf3690ce 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,8 @@ +2011-03-30 The InnoDB Team + + * srv/srv0srv.c, sync/sync0arr.h, sync/sync0arr.c: + Fix Bug#11877216 InnoDB too eager to commit suicide on a busy server + 2011-03-15 The InnoDB Team * btr/btr0cur.c, page/page0zip.c: diff --git a/storage/innodb_plugin/include/sync0arr.h b/storage/innodb_plugin/include/sync0arr.h index 5f1280f5e28..6e931346238 100644 --- a/storage/innodb_plugin/include/sync0arr.h +++ b/storage/innodb_plugin/include/sync0arr.h @@ -115,8 +115,11 @@ Prints warnings of long semaphore waits to stderr. @return TRUE if fatal semaphore wait threshold was exceeded */ UNIV_INTERN ibool -sync_array_print_long_waits(void); -/*=============================*/ +sync_array_print_long_waits( +/*========================*/ + os_thread_id_t* waiter, /*!< out: longest waiting thread */ + const void** sema) /*!< out: longest-waited-for semaphore */ + __attribute__((nonnull)); /********************************************************************//** Validates the integrity of the wait array. Checks that the number of reserved cells equals the count variable. */ diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c index 3cf17f33c40..b1fc1ac67fd 100644 --- a/storage/innodb_plugin/srv/srv0srv.c +++ b/storage/innodb_plugin/srv/srv0srv.c @@ -2236,6 +2236,12 @@ srv_error_monitor_thread( ulint fatal_cnt = 0; ib_uint64_t old_lsn; ib_uint64_t new_lsn; + /* longest waiting thread for a semaphore */ + os_thread_id_t waiter = os_thread_get_curr_id(); + os_thread_id_t old_waiter = waiter; + /* the semaphore that is being waited for */ + const void* sema = NULL; + const void* old_sema = NULL; old_lsn = srv_start_lsn; @@ -2284,7 +2290,8 @@ loop: sync_arr_wake_threads_if_sema_free(); - if (sync_array_print_long_waits()) { + if (sync_array_print_long_waits(&waiter, &sema) + && sema == old_sema && os_thread_eq(waiter, old_waiter)) { fatal_cnt++; if (fatal_cnt > 10) { @@ -2299,6 +2306,8 @@ loop: } } else { fatal_cnt = 0; + old_waiter = waiter; + old_sema = sema; } /* Flush stderr so that a database user gets the output diff --git a/storage/innodb_plugin/sync/sync0arr.c b/storage/innodb_plugin/sync/sync0arr.c index ad29b90d344..13970023573 100644 --- a/storage/innodb_plugin/sync/sync0arr.c +++ b/storage/innodb_plugin/sync/sync0arr.c @@ -914,8 +914,10 @@ Prints warnings of long semaphore waits to stderr. @return TRUE if fatal semaphore wait threshold was exceeded */ UNIV_INTERN ibool -sync_array_print_long_waits(void) -/*=============================*/ +sync_array_print_long_waits( +/*========================*/ + os_thread_id_t* waiter, /*!< out: longest waiting thread */ + const void** sema) /*!< out: longest-waited-for semaphore */ { sync_cell_t* cell; ibool old_val; @@ -923,24 +925,40 @@ sync_array_print_long_waits(void) ulint i; ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; ibool fatal = FALSE; + double longest_diff = 0; for (i = 0; i < sync_primary_wait_array->n_cells; i++) { + double diff; + void* wait_object; + cell = sync_array_get_nth_cell(sync_primary_wait_array, i); - if (cell->wait_object != NULL && cell->waiting - && difftime(time(NULL), cell->reservation_time) > 240) { + wait_object = cell->wait_object; + + if (wait_object == NULL || !cell->waiting) { + + continue; + } + + diff = difftime(time(NULL), cell->reservation_time); + + if (diff > 240) { fputs("InnoDB: Warning: a long semaphore wait:\n", stderr); sync_array_cell_print(stderr, cell); noticed = TRUE; } - if (cell->wait_object != NULL && cell->waiting - && difftime(time(NULL), cell->reservation_time) - > fatal_timeout) { + if (diff > fatal_timeout) { fatal = TRUE; } + + if (diff > longest_diff) { + longest_diff = diff; + *sema = wait_object; + *waiter = cell->thread; + } } if (noticed) { -- cgit v1.2.1