summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVarun Gupta <varun.gupta@mariadb.com>2020-11-27 22:06:54 +0530
committerVarun Gupta <varun.gupta@mariadb.com>2020-11-30 17:16:43 +0530
commitb4379df5b485143209c35b9f6f07b00049c8d455 (patch)
tree8f34d436dfaaa584cc416cf703c6bbacd50af385
parentf3b10354a97a0815d36abbfd25a5f10489a3eaab (diff)
downloadmariadb-git-b4379df5b485143209c35b9f6f07b00049c8d455.tar.gz
MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison
Allow materialization strategy when collations on the inner and outer sides of an IN subquery are the same and the character set of the inner side is a proper subset of the character set on the outer side. This allows conversion from utf8mb3 to utf8mb4 as the former is a subset of the later. This is only allowed when IN predicate is converted to an IN subquery Backported part of the patch (d6a00d9b18f) of MDEV-17905.
-rw-r--r--mysql-test/main/subselect4.result34
-rw-r--r--mysql-test/main/subselect4.test37
-rw-r--r--sql/item_subselect.cc3
-rw-r--r--sql/item_subselect.h8
-rw-r--r--sql/opt_subselect.cc9
-rw-r--r--sql/sql_string.h18
-rw-r--r--sql/sql_tvc.cc6
-rw-r--r--sql/sql_type.cc90
-rw-r--r--sql/sql_type.h38
9 files changed, 215 insertions, 28 deletions
diff --git a/mysql-test/main/subselect4.result b/mysql-test/main/subselect4.result
index d069f71601f..23b6e268d91 100644
--- a/mysql-test/main/subselect4.result
+++ b/mysql-test/main/subselect4.result
@@ -2718,3 +2718,37 @@ Warning 1931 Query execution was interrupted. The query examined at least 3020 r
SET join_cache_level= @save_join_cache_level;
DROP TABLE t1,t2,t3,t4;
# End of 10.2 tests
+#
+# MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison
+#
+CREATE TABLE t1(a VARCHAR(50) collate utf8_general_ci, b INT);
+INSERT INTO t1 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
+CREATE TABLE t2(a VARCHAR(50) collate utf8mb4_general_ci, b INT);
+INSERT INTO t2 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
+set @save_in_predicate_conversion_threshold= @@in_predicate_conversion_threshold;
+set in_predicate_conversion_threshold=2;
+set names 'utf8mb4';
+#
+# IN predicate to IN subquery is not allowed as materialization is not allowed
+# The character set on the inner side is not equal to or a proper subset of the outer side
+#
+EXPLAIN
+SELECT * FROM t1 WHERE (t1.a,t1.b) IN (('abx',1),('def',2), ('abc', 3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where
+set names 'utf8';
+#
+# IN predicate to IN subquery is performed as materialization is llowed
+# The character set on the inner side is a proper subset of the outer side
+#
+EXPLAIN
+SELECT * FROM t2 WHERE (t2.a,t2.b) IN (('abx',1),('def',2), ('abc', 3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t2 ALL NULL NULL NULL NULL 5
+1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 16 func,func 1 Using where
+2 MATERIALIZED <derived3> ALL NULL NULL NULL NULL 3
+3 DERIVED NULL NULL NULL NULL NULL NULL NULL No tables used
+set names default;
+set @@in_predicate_conversion_threshold= @save_in_predicate_conversion_threshold;
+DROP TABLE t1,t2;
+# End of 10.3 tests
diff --git a/mysql-test/main/subselect4.test b/mysql-test/main/subselect4.test
index 6f5eb1f2985..f264b3857ff 100644
--- a/mysql-test/main/subselect4.test
+++ b/mysql-test/main/subselect4.test
@@ -2238,3 +2238,40 @@ SET join_cache_level= @save_join_cache_level;
DROP TABLE t1,t2,t3,t4;
--echo # End of 10.2 tests
+
+--echo #
+--echo # MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison
+--echo #
+
+CREATE TABLE t1(a VARCHAR(50) collate utf8_general_ci, b INT);
+INSERT INTO t1 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
+
+CREATE TABLE t2(a VARCHAR(50) collate utf8mb4_general_ci, b INT);
+INSERT INTO t2 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
+
+set @save_in_predicate_conversion_threshold= @@in_predicate_conversion_threshold;
+set in_predicate_conversion_threshold=2;
+
+set names 'utf8mb4';
+--echo #
+--echo # IN predicate to IN subquery is not allowed as materialization is not allowed
+--echo # The character set on the inner side is not equal to or a proper subset of the outer side
+--echo #
+
+EXPLAIN
+SELECT * FROM t1 WHERE (t1.a,t1.b) IN (('abx',1),('def',2), ('abc', 3));
+
+set names 'utf8';
+--echo #
+--echo # IN predicate to IN subquery is performed as materialization is llowed
+--echo # The character set on the inner side is a proper subset of the outer side
+--echo #
+
+EXPLAIN
+SELECT * FROM t2 WHERE (t2.a,t2.b) IN (('abx',1),('def',2), ('abc', 3));
+
+set names default;
+set @@in_predicate_conversion_threshold= @save_in_predicate_conversion_threshold;
+DROP TABLE t1,t2;
+
+--echo # End of 10.3 tests
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 3e49f893ee3..d882918de5c 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -1454,7 +1454,8 @@ Item_in_subselect::Item_in_subselect(THD *thd, Item * left_exp,
pushed_cond_guards(NULL), do_not_convert_to_sj(FALSE), is_jtbm_merged(FALSE),
is_jtbm_const_tab(FALSE), is_flattenable_semijoin(FALSE),
is_registered_semijoin(FALSE),
- upper_item(0)
+ upper_item(0),
+ converted_from_in_predicate(FALSE)
{
DBUG_ENTER("Item_in_subselect::Item_in_subselect");
DBUG_PRINT("info", ("in_strategy: %u", (uint)in_strategy));
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index 1cea7291c9e..fdc39f1f05e 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -601,12 +601,18 @@ public:
Item_func_not_all *upper_item; // point on NOT/NOP before ALL/SOME subquery
+ /*
+ SET to TRUE if IN subquery is converted from an IN predicate
+ */
+ bool converted_from_in_predicate;
+
Item_in_subselect(THD *thd_arg, Item * left_expr, st_select_lex *select_lex);
Item_in_subselect(THD *thd_arg):
Item_exists_subselect(thd_arg), left_expr_cache(0), first_execution(TRUE),
in_strategy(SUBS_NOT_TRANSFORMED),
pushed_cond_guards(NULL), func(NULL), do_not_convert_to_sj(FALSE),
- is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), upper_item(0) {}
+ is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), upper_item(0),
+ converted_from_in_predicate(FALSE) {}
void cleanup();
subs_type substype() { return IN_SUBS; }
void reset()
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 9ee51074854..d65e00f4b97 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -836,6 +836,7 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
bool all_are_fields= TRUE;
uint32 total_key_length = 0;
+ bool converted_from_in_predicate= in_subs->converted_from_in_predicate;
for (uint i= 0; i < elements; i++)
{
Item *outer= in_subs->left_expr->element_index(i);
@@ -843,8 +844,12 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM &&
inner->real_item()->type() == Item::FIELD_ITEM);
total_key_length += inner->max_length;
- if (!inner->type_handler()->subquery_type_allows_materialization(inner,
- outer))
+
+ if (!inner->
+ type_handler()->
+ subquery_type_allows_materialization(inner,
+ outer,
+ converted_from_in_predicate))
DBUG_RETURN(FALSE);
}
diff --git a/sql/sql_string.h b/sql/sql_string.h
index 11c3f0cd573..4f9a68acbb4 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -131,6 +131,24 @@ uint convert_to_printable(char *to, size_t to_len,
const char *from, size_t from_len,
CHARSET_INFO *from_cs, size_t nbytes= 0);
+class Charset
+{
+ CHARSET_INFO *m_charset;
+public:
+ Charset() :m_charset(&my_charset_bin) { }
+ Charset(CHARSET_INFO *cs) :m_charset(cs) { }
+
+ CHARSET_INFO *charset() const { return m_charset; }
+ /*
+ Collation name without the character set name.
+ For example, in case of "latin1_swedish_ci",
+ this method returns "_swedish_ci".
+ */
+ LEX_CSTRING collation_specific_name() const;
+ bool encoding_allows_reinterpret_as(CHARSET_INFO *cs) const;
+ bool eq_collation_specific_names(CHARSET_INFO *cs) const;
+};
+
class String : public Sql_alloc
{
char *Ptr;
diff --git a/sql/sql_tvc.cc b/sql/sql_tvc.cc
index def519035d9..10a279b92ed 100644
--- a/sql/sql_tvc.cc
+++ b/sql/sql_tvc.cc
@@ -829,7 +829,8 @@ static bool cmp_row_types(Item* item1, Item* item2)
Item *inner= item1->element_index(i);
Item *outer= item2->element_index(i);
if (!inner->type_handler()->subquery_type_allows_materialization(inner,
- outer))
+ outer,
+ true))
return true;
}
return false;
@@ -895,7 +896,7 @@ Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd,
for (uint i=1; i < arg_count; i++)
{
- if (!args[i]->const_item() || cmp_row_types(args[0], args[i]))
+ if (!args[i]->const_item() || cmp_row_types(args[i], args[0]))
return this;
}
@@ -975,6 +976,7 @@ Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd,
if (!(in_subs=
new (thd->mem_root) Item_in_subselect(thd, args[0], sq_select)))
goto err;
+ in_subs->converted_from_in_predicate= TRUE;
sq= in_subs;
if (negated)
sq= negate_expression(thd, in_subs);
diff --git a/sql/sql_type.cc b/sql/sql_type.cc
index a25aa236b16..0bae1e55145 100644
--- a/sql/sql_type.cc
+++ b/sql/sql_type.cc
@@ -19,6 +19,7 @@
#include "sql_const.h"
#include "sql_class.h"
#include "sql_time.h"
+#include "sql_string.h"
#include "item.h"
#include "log.h"
@@ -5112,7 +5113,8 @@ uint Type_handler_timestamp_common::Item_decimal_precision(const Item *item) con
bool Type_handler_real_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == REAL_RESULT);
return outer->cmp_type() == REAL_RESULT;
@@ -5121,7 +5123,8 @@ bool Type_handler_real_result::
bool Type_handler_int_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == INT_RESULT);
return outer->cmp_type() == INT_RESULT;
@@ -5130,7 +5133,8 @@ bool Type_handler_int_result::
bool Type_handler_decimal_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == DECIMAL_RESULT);
return outer->cmp_type() == DECIMAL_RESULT;
@@ -5139,23 +5143,37 @@ bool Type_handler_decimal_result::
bool Type_handler_string_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == STRING_RESULT);
- return outer->cmp_type() == STRING_RESULT &&
- outer->collation.collation == inner->collation.collation &&
- /*
- Materialization also is unable to work when create_tmp_table() will
- create a blob column because item->max_length is too big.
- The following test is copied from varstring_type_handler().
- */
- !inner->too_big_for_varchar();
+ if (outer->cmp_type() == STRING_RESULT &&
+ /*
+ Materialization also is unable to work when create_tmp_table() will
+ create a blob column because item->max_length is too big.
+ The following test is copied from varstring_type_handler().
+ */
+ !inner->too_big_for_varchar())
+ {
+ if (outer->collation.collation == inner->collation.collation)
+ return true;
+ if (is_in_predicate)
+ {
+ Charset inner_col(inner->collation.collation);
+ if (inner_col.encoding_allows_reinterpret_as(outer->
+ collation.collation) &&
+ inner_col.eq_collation_specific_names(outer->collation.collation))
+ return true;
+ }
+ }
+ return false;
}
bool Type_handler_temporal_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == TIME_RESULT);
return mysql_timestamp_type() ==
@@ -5973,3 +5991,49 @@ bool Type_handler_general_purpose_string::
}
/***************************************************************************/
+
+LEX_CSTRING Charset::collation_specific_name() const
+{
+ /*
+ User defined collations can provide arbitrary names
+ for character sets and collations, so a collation
+ name not necessarily starts with the character set name.
+ */
+ size_t csname_length= strlen(m_charset->csname);
+ if (strncmp(m_charset->name, m_charset->csname, csname_length))
+ return {NULL, 0};
+ const char *ptr= m_charset->name + csname_length;
+ return {ptr, strlen(ptr) };
+}
+
+
+bool
+Charset::encoding_allows_reinterpret_as(const CHARSET_INFO *cs) const
+{
+ if (!strcmp(m_charset->csname, cs->csname))
+ return true;
+
+ if (!strcmp(m_charset->csname, MY_UTF8MB3) &&
+ !strcmp(cs->csname, MY_UTF8MB4))
+ return true;
+
+ /*
+ Originally we allowed here instat ALTER for ASCII-to-LATIN1
+ and UCS2-to-UTF16, but this was wrong:
+ - MariaDB's ascii is not a subset for 8-bit character sets
+ like latin1, because it allows storing bytes 0x80..0xFF as
+ "unassigned" characters (see MDEV-19285).
+ - MariaDB's ucs2 (as in Unicode-1.1) is not a subset for UTF16,
+ because they treat surrogate codes differently (MDEV-19284).
+ */
+ return false;
+}
+
+
+bool
+Charset::eq_collation_specific_names(CHARSET_INFO *cs) const
+{
+ LEX_CSTRING name0= collation_specific_name();
+ LEX_CSTRING name1= Charset(cs).collation_specific_name();
+ return name0.length && !cmp(&name0, &name1);
+}
diff --git a/sql/sql_type.h b/sql/sql_type.h
index 907225b7c50..4a7a7b5a9b8 100644
--- a/sql/sql_type.h
+++ b/sql/sql_type.h
@@ -1291,9 +1291,21 @@ public:
Item *target_expr, Item *target_value,
Item_bool_func2 *source,
Item *source_expr, Item *source_const) const= 0;
+
+ /*
+ @brief
+ Check if an IN subquery allows materialization or not
+ @param
+ inner expression on the inner side of the IN subquery
+ outer expression on the outer side of the IN subquery
+ is_in_predicate SET to true if IN subquery was converted from an
+ IN predicate or we are checking if materialization
+ strategy can be used for an IN predicate
+ */
virtual bool
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const= 0;
+ const Item *outer,
+ bool is_in_predicate) const= 0;
/**
Make a simple constant replacement item for a constant "src",
so the new item can futher be used for comparison with "cmp", e.g.:
@@ -1470,7 +1482,8 @@ public:
}
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(0);
return false;
@@ -1788,7 +1801,8 @@ public:
virtual ~Type_handler_real_result() {}
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
void sortlength(THD *thd,
@@ -1857,7 +1871,8 @@ public:
virtual ~Type_handler_decimal_result() {};
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
@@ -2060,7 +2075,8 @@ public:
virtual ~Type_handler_int_result() {}
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
@@ -2156,7 +2172,8 @@ public:
Item_bool_func2 *source,
Item *source_expr, Item *source_const) const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
bool Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func,
Item **items, uint nitems) const;
bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const;
@@ -2266,7 +2283,8 @@ public:
Item_bool_func2 *source,
Item *source_expr, Item *source_const) const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) const;
Item_cache *Item_get_cache(THD *thd, const Item *item) const;
bool set_comparator_func(Arg_comparator *cmp) const;
@@ -3236,7 +3254,8 @@ public:
return blob_type_handler(item);
}
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
return false; // Materialization does not work with BLOB columns
}
@@ -3341,7 +3360,8 @@ public:
return true;
}
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
return false; // Materialization does not work with GEOMETRY columns
}