diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2020-01-17 13:13:03 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2020-01-17 14:27:29 +0200 |
commit | 457ce97ef2868c19fc8c9b7d6cd3d12bb78becf1 (patch) | |
tree | 5478b4035404f6165ca036e1723c5918343c4247 | |
parent | c3695b4058ea9a8849c22aabeabc76448fe548f8 (diff) | |
download | mariadb-git-457ce97ef2868c19fc8c9b7d6cd3d12bb78becf1.tar.gz |
MDEV-21512 InnoDB may hang due to SPATIAL INDEX
MySQL 5.7.29 includes the following fix:
Bug #30287668 INNODB: A LONG SEMAPHORE WAIT
mysql/mysql-server@5cdbb22b51cf2b35dbdf5666a251ffbec2f84dec
There is no test case. It seems that the problem could occur when
a spatial index is large and peculiar enough so that multiple R-tree
leaf pages will have the exactly same maximum bounding rectangle (MBR).
The commit message suggests that the hang can occur when R-tree
non-leaf pages are being merged, which should only be possible
during transaction rollback or the purge of transaction history,
when the R-tree index is at least 2 levels high and very many records
are being deleted. The message says that a comparison result that two
spatial index node pointer records are equal will cause an infinite loop
in rtr_page_copy_rec_list_end_no_locks(). Hence, we must include the
child page number in the comparison to be consistent with
mysql/mysql-server@2e11fe0e152e34d73579e1a9ec19aedc3f6010f6.
We fix this bug in a simpler way, involving fewer code changes.
cmp_rec_rec(): Renamed from cmp_rec_rec_with_match().
Assert that rec2 always resides in an index page.
Treat non-leaf spatial index pages specially.
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 18 | ||||
-rw-r--r-- | storage/innobase/dict/dict0stats.cc | 17 | ||||
-rw-r--r-- | storage/innobase/gis/gis0rtree.cc | 17 | ||||
-rw-r--r-- | storage/innobase/include/rem0cmp.h | 48 | ||||
-rw-r--r-- | storage/innobase/include/rem0cmp.ic | 35 | ||||
-rw-r--r-- | storage/innobase/rem/rem0cmp.cc | 90 |
6 files changed, 87 insertions, 138 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index b764f103a46..a47121399d2 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1826,9 +1826,9 @@ need_opposite_intention: offsets2 = rec_get_offsets( first_rec, index, offsets2, false, ULINT_UNDEFINED, &heap); - cmp_rec_rec_with_match(node_ptr, first_rec, - offsets, offsets2, index, FALSE, - &matched_fields); + cmp_rec_rec(node_ptr, first_rec, + offsets, offsets2, index, false, + &matched_fields); if (matched_fields >= rec_offs_n_fields(offsets) - 1) { @@ -1844,10 +1844,10 @@ need_opposite_intention: offsets2 = rec_get_offsets( last_rec, index, offsets2, false, ULINT_UNDEFINED, &heap); - cmp_rec_rec_with_match( + cmp_rec_rec( node_ptr, last_rec, offsets, offsets2, index, - FALSE, &matched_fields); + false, &matched_fields); if (matched_fields >= rec_offs_n_fields(offsets) - 1) { detected_same_key_root = true; @@ -6307,10 +6307,10 @@ btr_estimate_number_of_different_key_vals( ULINT_UNDEFINED, &heap); - cmp_rec_rec_with_match(rec, next_rec, - offsets_rec, offsets_next_rec, - index, stats_null_not_equal, - &matched_fields); + cmp_rec_rec(rec, next_rec, + offsets_rec, offsets_next_rec, + index, stats_null_not_equal, + &matched_fields); for (j = matched_fields; j < n_cols; j++) { /* We add one if this index record has diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index f3cd26f5527..2e2156865ef 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2019, MariaDB Corporation. +Copyright (c) 2015, 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1166,13 +1166,9 @@ dict_stats_analyze_index_level( prev_rec, index, prev_rec_offsets, !level, n_uniq, &heap); - cmp_rec_rec_with_match(rec, - prev_rec, - rec_offsets, - prev_rec_offsets, - index, - FALSE, - &matched_fields); + cmp_rec_rec(prev_rec, rec, + prev_rec_offsets, rec_offsets, index, + false, &matched_fields); for (i = matched_fields; i < n_uniq; i++) { @@ -1392,9 +1388,8 @@ dict_stats_scan_page( /* check whether rec != next_rec when looking at the first n_prefix fields */ - cmp_rec_rec_with_match(rec, next_rec, - offsets_rec, offsets_next_rec, - index, FALSE, &matched_fields); + cmp_rec_rec(rec, next_rec, offsets_rec, offsets_next_rec, + index, false, &matched_fields); if (matched_fields < n_prefix) { /* rec != next_rec, => rec is non-boring */ diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc index 8e602d127cf..1808fe851b8 100644 --- a/storage/innobase/gis/gis0rtree.cc +++ b/storage/innobase/gis/gis0rtree.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2019, MariaDB Corporation. +Copyright (c) 2019, 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1449,10 +1449,9 @@ rtr_page_copy_rec_list_end_no_locks( offsets2 = rec_get_offsets(cur_rec, index, offsets2, is_leaf, ULINT_UNDEFINED, &heap); - cmp = cmp_rec_rec_with_match(cur1_rec, cur_rec, - offsets1, offsets2, - index, FALSE, - &cur_matched_fields); + cmp = cmp_rec_rec(cur1_rec, cur_rec, + offsets1, offsets2, index, false, + &cur_matched_fields); if (cmp < 0) { page_cur_move_to_prev(&page_cur); break; @@ -1564,15 +1563,13 @@ rtr_page_copy_rec_list_start_no_locks( while (!page_rec_is_supremum(cur_rec)) { ulint cur_matched_fields = 0; - int cmp; offsets2 = rec_get_offsets(cur_rec, index, offsets2, is_leaf, ULINT_UNDEFINED, &heap); - cmp = cmp_rec_rec_with_match(cur1_rec, cur_rec, - offsets1, offsets2, - index, FALSE, - &cur_matched_fields); + int cmp = cmp_rec_rec(cur1_rec, cur_rec, + offsets1, offsets2, index, false, + &cur_matched_fields); if (cmp < 0) { page_cur_move_to_prev(&page_cur); cur_rec = page_cur_get_rec(&page_cur); diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h index 0877c7b5b6a..af1b145b0d9 100644 --- a/storage/innobase/include/rem0cmp.h +++ b/storage/innobase/include/rem0cmp.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -80,7 +80,7 @@ cmp_dfield_dfield( /** Compare a GIS data tuple to a physical record. @param[in] dtuple data tuple -@param[in] rec B-tree record +@param[in] rec R-tree record @param[in] offsets rec_get_offsets(rec) @param[in] mode compare mode @retval negative if dtuple is less than rec */ @@ -190,43 +190,23 @@ cmp_rec_rec_simple( duplicate key value if applicable, or NULL */ MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result)); -/** Compare two B-tree records. -@param[in] rec1 B-tree record -@param[in] rec2 B-tree record -@param[in] offsets1 rec_get_offsets(rec1, index) -@param[in] offsets2 rec_get_offsets(rec2, index) -@param[in] index B-tree index -@param[in] nulls_unequal true if this is for index cardinality -statistics estimation, and innodb_stats_method=nulls_unequal -or innodb_stats_method=nulls_ignored -@param[out] matched_fields number of completely matched fields -within the first field not completely matched -@return the comparison result -@retval 0 if rec1 is equal to rec2 -@retval negative if rec1 is less than rec2 -@retval positive if rec2 is greater than rec2 */ -int -cmp_rec_rec_with_match( - const rec_t* rec1, - const rec_t* rec2, - const offset_t* offsets1, - const offset_t* offsets2, - const dict_index_t* index, - bool nulls_unequal, - ulint* matched_fields); -/** Compare two B-tree records. +/** Compare two B-tree or R-tree records. Only the common first fields are compared, and externally stored field are treated as equal. -@param[in] rec1 B-tree record -@param[in] rec2 B-tree record +@param[in] rec1 record (possibly not on an index page) +@param[in] rec2 B-tree or R-tree record in an index page @param[in] offsets1 rec_get_offsets(rec1, index) @param[in] offsets2 rec_get_offsets(rec2, index) +@param[in] nulls_unequal true if this is for index cardinality + statistics estimation with + innodb_stats_method=nulls_unequal + or innodb_stats_method=nulls_ignored @param[out] matched_fields number of completely matched fields within the first field not completely matched -@return positive, 0, negative if rec1 is greater, equal, less, than rec2, -respectively */ -UNIV_INLINE +@retval 0 if rec1 is equal to rec2 +@retval negative if rec1 is less than rec2 +@retval positive if rec1 is greater than rec2 */ int cmp_rec_rec( const rec_t* rec1, @@ -234,7 +214,9 @@ cmp_rec_rec( const offset_t* offsets1, const offset_t* offsets2, const dict_index_t* index, - ulint* matched_fields = NULL); + bool nulls_unequal = false, + ulint* matched_fields = NULL) + MY_ATTRIBUTE((nonnull(1,2,3,4,5))); /** Compare two data fields. @param[in] dfield1 data field diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic index 5ac3838f244..4230543615a 100644 --- a/storage/innobase/include/rem0cmp.ic +++ b/storage/innobase/include/rem0cmp.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -52,40 +53,6 @@ cmp_dfield_dfield( dfield_get_len(dfield2))); } -/** Compare two B-tree records. -Only the common first fields are compared, and externally stored field -are treated as equal. -@param[in] rec1 B-tree record -@param[in] rec2 B-tree record -@param[in] offsets1 rec_get_offsets(rec1, index) -@param[in] offsets2 rec_get_offsets(rec2, index) -@param[out] matched_fields number of completely matched fields - within the first field not completely matched -@return positive, 0, negative if rec1 is greater, equal, less, than rec2, -respectively */ -UNIV_INLINE -int -cmp_rec_rec( - const rec_t* rec1, - const rec_t* rec2, - const offset_t* offsets1, - const offset_t* offsets2, - const dict_index_t* index, - ulint* matched_fields) -{ - ulint match_f; - int ret; - - ret = cmp_rec_rec_with_match( - rec1, rec2, offsets1, offsets2, index, false, &match_f); - - if (matched_fields != NULL) { - *matched_fields = match_f; - } - - return(ret); -} - /** Compare two data fields. @param[in] dfield1 data field @param[in] dfield2 data field diff --git a/storage/innobase/rem/rem0cmp.cc b/storage/innobase/rem/rem0cmp.cc index 48927b02963..cda286ef503 100644 --- a/storage/innobase/rem/rem0cmp.cc +++ b/storage/innobase/rem/rem0cmp.cc @@ -1,6 +1,7 @@ /***************************************************************************** -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -25,6 +26,7 @@ Created 7/1/1994 Heikki Tuuri #include "rem0cmp.h" #include "rem0rec.h" +#include "page0page.h" #include "dict0mem.h" #include "handler0alter.h" @@ -534,7 +536,7 @@ cmp_data( /** Compare a GIS data tuple to a physical record. @param[in] dtuple data tuple -@param[in] rec B-tree record +@param[in] rec R-tree record @param[in] offsets rec_get_offsets(rec) @param[in] mode compare mode @retval negative if dtuple is less than rec */ @@ -1086,23 +1088,24 @@ cmp_rec_rec_simple( return(0); } -/** Compare two B-tree records. -@param[in] rec1 B-tree record -@param[in] rec2 B-tree record -@param[in] offsets1 rec_get_offsets(rec1, index) -@param[in] offsets2 rec_get_offsets(rec2, index) -@param[in] index B-tree index -@param[in] nulls_unequal true if this is for index cardinality -statistics estimation, and innodb_stats_method=nulls_unequal -or innodb_stats_method=nulls_ignored -@param[out] matched_fields number of completely matched fields -within the first field not completely matched -@return the comparison result +/** Compare two B-tree or R-tree records. +Only the common first fields are compared, and externally stored field +are treated as equal. +@param[in] rec1 record (possibly not on an index page) +@param[in] rec2 B-tree or R-tree record in an index page +@param[in] offsets1 rec_get_offsets(rec1, index) +@param[in] offsets2 rec_get_offsets(rec2, index) +@param[in] nulls_unequal true if this is for index cardinality + statistics estimation with + innodb_stats_method=nulls_unequal + or innodb_stats_method=nulls_ignored +@param[out] matched_fields number of completely matched fields + within the first field not completely matched @retval 0 if rec1 is equal to rec2 @retval negative if rec1 is less than rec2 -@retval positive if rec2 is greater than rec2 */ +@retval positive if rec1 is greater than rec2 */ int -cmp_rec_rec_with_match( +cmp_rec_rec( const rec_t* rec1, const rec_t* rec2, const offset_t* offsets1, @@ -1111,17 +1114,14 @@ cmp_rec_rec_with_match( bool nulls_unequal, ulint* matched_fields) { - ulint rec1_n_fields; /* the number of fields in rec */ ulint rec1_f_len; /* length of current field in rec */ const byte* rec1_b_ptr; /* pointer to the current byte in rec field */ - ulint rec2_n_fields; /* the number of fields in rec */ ulint rec2_f_len; /* length of current field in rec */ const byte* rec2_b_ptr; /* pointer to the current byte in rec field */ ulint cur_field = 0; /* current field number */ int ret = 0; /* return value */ - ulint comp; ut_ad(rec1 != NULL); ut_ad(rec2 != NULL); @@ -1129,10 +1129,12 @@ cmp_rec_rec_with_match( ut_ad(rec_offs_validate(rec1, index, offsets1)); ut_ad(rec_offs_validate(rec2, index, offsets2)); ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2)); + ut_ad(fil_page_index_page_check(page_align(rec2))); + ut_ad(!!dict_index_is_spatial(index) + == (fil_page_get_type(page_align(rec2)) == FIL_PAGE_RTREE)); - comp = rec_offs_comp(offsets1); - rec1_n_fields = rec_offs_n_fields(offsets1); - rec2_n_fields = rec_offs_n_fields(offsets2); + ulint comp = rec_offs_comp(offsets1); + ulint n_fields; /* Test if rec is the predefined minimum record */ if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp) @@ -1149,37 +1151,41 @@ cmp_rec_rec_with_match( goto order_resolved; } - /* Match fields in a loop */ + /* For non-leaf spatial index records, the + dict_index_get_n_unique_in_tree() does include the child page + number, because spatial index node pointers only contain + the MBR (minimum bounding rectangle) and the child page number. - for (; cur_field < rec1_n_fields && cur_field < rec2_n_fields; - cur_field++) { + For B-tree node pointers, the key alone (secondary index + columns and PRIMARY KEY columns) must be unique, and there is + no need to compare the child page number. */ + n_fields = std::min(rec_offs_n_fields(offsets1), + rec_offs_n_fields(offsets2)); + n_fields = std::min(n_fields, dict_index_get_n_unique_in_tree(index)); + for (; cur_field < n_fields; cur_field++) { ulint mtype; ulint prtype; - /* If this is node-ptr records then avoid comparing node-ptr - field. Only key field needs to be compared. */ - if (cur_field == dict_index_get_n_unique_in_tree(index)) { - break; - } - - if (dict_index_is_ibuf(index)) { + if (UNIV_UNLIKELY(dict_index_is_ibuf(index))) { /* This is for the insert buffer B-tree. */ mtype = DATA_BINARY; prtype = 0; } else { - const dict_col_t* col; - - col = dict_index_get_nth_col(index, cur_field); - + const dict_col_t* col = dict_index_get_nth_col( + index, cur_field); mtype = col->mtype; prtype = col->prtype; - /* If the index is spatial index, we mark the - prtype of the first field as MBR field. */ - if (cur_field == 0 && dict_index_is_spatial(index)) { + if (UNIV_LIKELY(!dict_index_is_spatial(index))) { + } else if (cur_field == 0) { ut_ad(DATA_GEOMETRY_MTYPE(mtype)); prtype |= DATA_GIS_MBR; + } else if (!page_rec_is_leaf(rec2)) { + /* Compare the child page number. */ + ut_ad(cur_field == 1); + mtype = DATA_SYS_CHILD; + prtype = 0; } } @@ -1215,8 +1221,10 @@ cmp_rec_rec_with_match( to the common fields */ ut_ad(ret == 0); order_resolved: - *matched_fields = cur_field; - return(ret); + if (matched_fields) { + *matched_fields = cur_field; + } + return ret; } #ifdef UNIV_COMPILE_TEST_FUNCS |