MDEV-25128 Wrong result from join with materialized semi-join and

splittable derived If one of joined tables of the processed query is a materialized derived table (or view or CTE) with GROUP BY clause then under some conditions it can be subject to split optimization. With this optimization new equalities are injected into the WHERE condition of the SELECT that specifies this derived table. The injected equalities are generated for all join orders with which the split optimization can employed. After the best join order has been chosen only certain of this equalities are really needed. The others can be safely removed. If it's not done and some of injected equalities involve expressions over semi-joins with look-up access then the query may return a wrong result set. This patch effectively removes equalities injected for split optimization that are needed only at the optimization stage and not needed for execution. Approved by serg@mariadb.com
author: Igor Babaev <igor@askmonty.org> 2021-03-23 20:54:54 -0700
committer: Igor Babaev <igor@askmonty.org> 2021-03-23 20:54:54 -0700
commit: 480a06718d137c9ee7784012ccb609b9e79ff08c (patch)
tree: 18180eab82208cf959f41839be2c9207d5644af1
parent: 7d5ec9f1aeec1ea8d00a0aa5bfec521948cfb8be (diff)
download: mariadb-git-480a06718d137c9ee7784012ccb609b9e79ff08c.tar.gz
4 files changed, 307 insertions, 6 deletions
diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result
index 27ffffd7581..f3d63b5887f 100644
--- a/mysql-test/main/derived_cond_pushdown.result
+++ b/mysql-test/main/derived_cond_pushdown.result
@@ -17134,4 +17134,215 @@ a
 3
 DROP VIEW v1;
 DROP TABLE t1;
+#
+# MDEV-25128: Split optimization for join with materialized semi-join
+#
+create table t1 (id int, a int, index (a), index (id, a)) engine=myisam;
+insert into t1 values
+(17,1),(17,3010),(17,3013),(17,3053),(21,2446),(21,2467),(21,2);
+create table t2 (a int) engine=myisam;
+insert into t2 values (1),(2),(3);
+create  table t3 (id int) engine=myisam;
+insert into t3 values (1),(2);
+analyze table t1,t2,t3;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+test.t2	analyze	status	OK
+test.t3	analyze	status	OK
+set optimizer_switch="split_materialized=off";
+select * from t1, (select a from t1 cp2 group by a) dt, t3
+where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
+id	a	a	id
+17	1	1	1
+21	2	2	2
+explain select * from t1, (select a from t1 cp2 group by a) dt, t3
+where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	2	Using where
+1	PRIMARY	t1	ref	a	a	5	test.t3.id	1	
+1	PRIMARY	<subquery3>	eq_ref	distinct_key	distinct_key	4	func	1	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t3.id	2	
+3	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	3	
+2	DERIVED	cp2	index	NULL	a	5	NULL	7	Using index
+explain format=json select * from t1, (select a from t1 cp2 group by a) dt, t3
+where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "table": {
+      "table_name": "t3",
+      "access_type": "ALL",
+      "rows": 2,
+      "filtered": 100,
+      "attached_condition": "t3.`id` is not null and t3.`id` is not null"
+    },
+    "table": {
+      "table_name": "t1",
+      "access_type": "ref",
+      "possible_keys": ["a"],
+      "key": "a",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t3.id"],
+      "rows": 1,
+      "filtered": 100
+    },
+    "table": {
+      "table_name": "<subquery3>",
+      "access_type": "eq_ref",
+      "possible_keys": ["distinct_key"],
+      "key": "distinct_key",
+      "key_length": "4",
+      "used_key_parts": ["a"],
+      "ref": ["func"],
+      "rows": 1,
+      "filtered": 100,
+      "materialized": {
+        "unique": 1,
+        "query_block": {
+          "select_id": 3,
+          "table": {
+            "table_name": "t2",
+            "access_type": "ALL",
+            "rows": 3,
+            "filtered": 100
+          }
+        }
+      }
+    },
+    "table": {
+      "table_name": "<derived2>",
+      "access_type": "ref",
+      "possible_keys": ["key0"],
+      "key": "key0",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t3.id"],
+      "rows": 2,
+      "filtered": 100,
+      "materialized": {
+        "query_block": {
+          "select_id": 2,
+          "table": {
+            "table_name": "cp2",
+            "access_type": "index",
+            "key": "a",
+            "key_length": "5",
+            "used_key_parts": ["a"],
+            "rows": 7,
+            "filtered": 100,
+            "using_index": true
+          }
+        }
+      }
+    }
+  }
+}
+set optimizer_switch="split_materialized=default";
+select * from t1, (select a from t1 cp2 group by a) dt, t3
+where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
+id	a	a	id
+17	1	1	1
+21	2	2	2
+explain select * from t1, (select a from t1 cp2 group by a) dt, t3
+where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	2	Using where
+1	PRIMARY	t1	ref	a	a	5	test.t3.id	1	
+1	PRIMARY	<subquery3>	eq_ref	distinct_key	distinct_key	4	func	1	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t3.id	2	
+3	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	3	
+2	LATERAL DERIVED	cp2	ref	a	a	5	test.t1.a	1	Using index
+explain format=json select * from t1, (select a from t1 cp2 group by a) dt, t3
+where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "table": {
+      "table_name": "t3",
+      "access_type": "ALL",
+      "rows": 2,
+      "filtered": 100,
+      "attached_condition": "t3.`id` is not null and t3.`id` is not null"
+    },
+    "table": {
+      "table_name": "t1",
+      "access_type": "ref",
+      "possible_keys": ["a"],
+      "key": "a",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t3.id"],
+      "rows": 1,
+      "filtered": 100
+    },
+    "table": {
+      "table_name": "<subquery3>",
+      "access_type": "eq_ref",
+      "possible_keys": ["distinct_key"],
+      "key": "distinct_key",
+      "key_length": "4",
+      "used_key_parts": ["a"],
+      "ref": ["func"],
+      "rows": 1,
+      "filtered": 100,
+      "materialized": {
+        "unique": 1,
+        "query_block": {
+          "select_id": 3,
+          "table": {
+            "table_name": "t2",
+            "access_type": "ALL",
+            "rows": 3,
+            "filtered": 100
+          }
+        }
+      }
+    },
+    "table": {
+      "table_name": "<derived2>",
+      "access_type": "ref",
+      "possible_keys": ["key0"],
+      "key": "key0",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t3.id"],
+      "rows": 2,
+      "filtered": 100,
+      "materialized": {
+        "lateral": 1,
+        "query_block": {
+          "select_id": 2,
+          "outer_ref_condition": "t1.a is not null",
+          "table": {
+            "table_name": "cp2",
+            "access_type": "ref",
+            "possible_keys": ["a"],
+            "key": "a",
+            "key_length": "5",
+            "used_key_parts": ["a"],
+            "ref": ["test.t1.a"],
+            "rows": 1,
+            "filtered": 100,
+            "using_index": true
+          }
+        }
+      }
+    }
+  }
+}
+prepare stmt from "select * from t1, (select a from t1 cp2 group by a) dt, t3
+where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2)";
+execute stmt;
+id	a	a	id
+17	1	1	1
+21	2	2	2
+execute stmt;
+id	a	a	id
+17	1	1	1
+21	2	2	2
+deallocate prepare stmt;
+drop table t1,t2,t3;
 # End of 10.3 tests
diff --git a/mysql-test/main/derived_cond_pushdown.test b/mysql-test/main/derived_cond_pushdown.test
index 12d34a8326a..5936447fc88 100644
--- a/mysql-test/main/derived_cond_pushdown.test
+++ b/mysql-test/main/derived_cond_pushdown.test
@@ -3479,4 +3479,41 @@ SELECT * from v1 WHERE a=3;
 DROP VIEW v1;
 DROP TABLE t1;
 
+--echo #
+--echo # MDEV-25128: Split optimization for join with materialized semi-join
+--echo #
+
+create table t1 (id int, a int, index (a), index (id, a)) engine=myisam;
+insert into t1 values
+(17,1),(17,3010),(17,3013),(17,3053),(21,2446),(21,2467),(21,2);
+
+create table t2 (a int) engine=myisam;
+insert into t2 values (1),(2),(3);
+
+create  table t3 (id int) engine=myisam;
+insert into t3 values (1),(2);
+
+analyze table t1,t2,t3;
+
+let $q=
+select * from t1, (select a from t1 cp2 group by a) dt, t3
+  where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
+
+set optimizer_switch="split_materialized=off";
+eval $q;
+eval explain $q;
+eval explain format=json $q;
+
+set optimizer_switch="split_materialized=default";
+eval $q;
+eval explain $q;
+eval explain format=json $q;
+
+eval prepare stmt from "$q";
+execute stmt;
+execute stmt;
+deallocate prepare stmt;
+
+drop table t1,t2,t3;
+
 --echo # End of 10.3 tests
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index fd7836f55cc..c3a2d03a93b 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -236,6 +236,8 @@ public:
   SplM_field_info *spl_fields;
   /* The number of elements in the above list */
   uint spl_field_cnt;
+  /* The list of equalities injected into WHERE for split optimization */
+  List<Item> inj_cond_list;
   /* Contains the structures to generate all KEYUSEs for pushable equalities */
   List<KEY_FIELD> added_key_fields;
   /* The cache of evaluated execution plans for 'join' with pushed equalities */
@@ -1047,22 +1049,22 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
 bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
 {
   Item *inj_cond= 0;
-  List<Item> inj_cond_list;
+  List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
   List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
   KEY_FIELD *added_key_field;
   while ((added_key_field= li++))
   {
     if (remaining_tables & added_key_field->val->used_tables())
       continue;
-    if (inj_cond_list.push_back(added_key_field->cond, thd->mem_root))
+    if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
       return true;
   }
-  DBUG_ASSERT(inj_cond_list.elements);
-  switch (inj_cond_list.elements) {
+  DBUG_ASSERT(inj_cond_list->elements);
+  switch (inj_cond_list->elements) {
   case 1:
-    inj_cond= inj_cond_list.head(); break;
+    inj_cond= inj_cond_list->head(); break;
   default:
-    inj_cond= new (thd->mem_root) Item_cond_and(thd, inj_cond_list);
+    inj_cond= new (thd->mem_root) Item_cond_and(thd, *inj_cond_list);
     if (!inj_cond)
       return true;
   }
@@ -1082,6 +1084,40 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
 
 /**
   @brief
+    Test if equality is injected for split optimization
+
+  @param
+    eq_item   equality to to test
+
+  @retval
+    true    eq_item is equality injected for split optimization
+    false   otherwise
+*/
+
+bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item)
+{
+  Item *left_item= eq_item->arguments()[0]->real_item();
+  if (left_item->type() != Item::FIELD_ITEM)
+    return false;
+  Field *field= ((Item_field *) left_item)->field;
+  if (!field->table->reginfo.join_tab)
+    return false;
+  JOIN *join= field->table->reginfo.join_tab->join;
+  if (!join->spl_opt_info)
+    return false;
+  List_iterator_fast<Item> li(join->spl_opt_info->inj_cond_list);
+  Item *item;
+  while ((item= li++))
+  {
+    if (item == eq_item)
+        return true;
+  }
+  return false;
+}
+
+
+/**
+  @brief
     Fix the splitting chosen for a splittable table in the final query plan
 
   @param
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 5f8bd24032f..1eb23781da2 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -292,6 +292,8 @@ void set_postjoin_aggr_write_func(JOIN_TAB *tab);
 
 static Item **get_sargable_cond(JOIN *join, TABLE *table);
 
+bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item);
+
 #ifndef DBUG_OFF
 
 /*
@@ -21787,6 +21789,21 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond,
       cond->marker=3;			// Checked when read
       return (COND*) 0;
     }
+    /*
+      If cond is an equality injected for split optimization then
+      a. when retain_ref_cond == false : cond is removed unconditionally
+         (cond that supports ref access is removed by the preceding code)
+      b. when retain_ref_cond == true : cond is removed if it does not
+         support ref access
+    */
+    if (left_item->type() == Item::FIELD_ITEM &&
+        is_eq_cond_injected_for_split_opt((Item_func_eq *) cond) &&
+        (!retain_ref_cond ||
+         !test_if_ref(root_cond, (Item_field*) left_item,right_item)))
+    {
+      cond->marker=3;
+      return (COND*) 0;
+    }
   }
   cond->marker=2;
   cond->set_join_tab_idx(join_tab_idx_arg);
author	Igor Babaev <igor@askmonty.org>	2021-03-23 20:54:54 -0700
committer	Igor Babaev <igor@askmonty.org>	2021-03-23 20:54:54 -0700
commit	480a06718d137c9ee7784012ccb609b9e79ff08c (patch)
tree	18180eab82208cf959f41839be2c9207d5644af1
parent	7d5ec9f1aeec1ea8d00a0aa5bfec521948cfb8be (diff)
download	mariadb-git-480a06718d137c9ee7784012ccb609b9e79ff08c.tar.gz