MDEV-9750: Quick memory exhaustion with 'extended_keys=on' ...

(Variant #5, full patch, for 10.5) Do not produce SEL_ARG graphs that would yield huge numbers of ranges. Introduce a concept of SEL_ARG graph's "weight". If we are about to produce a graph whose "weight" exceeds the limit, remove the parts of SEL_ARG graph that represent the biggest key parts. Do so until the graph's is within the limit. Includes - debug code to verify SEL_ARG graph weight - A user-visible @@optimizer_max_sel_arg_weight to control the optimization - Logging the optimization into the optimizer trace.
author: Sergei Petrunia <psergey@askmonty.org> 2021-01-28 21:43:55 +0300
committer: Sergei Petrunia <psergey@askmonty.org> 2021-01-29 16:20:57 +0300
commit: c36720388d598ca3aa1c4d2dab2266656c528b50 (patch)
tree: 66698613fde2bca7ed26d6af4368579700ebfccc /sql/opt_range.cc
parent: a2eb974b50f1a2912717d765bbd48aa69cfcc8f1 (diff)
download: mariadb-git-c36720388d598ca3aa1c4d2dab2266656c528b50.tar.gz
1 files changed, 296 insertions, 6 deletions
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index f146fc25126..e04a1e2753f 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -398,6 +398,11 @@ static SEL_ARG *key_or(RANGE_OPT_PARAM *param,
 static SEL_ARG *key_and(RANGE_OPT_PARAM *param,
                         SEL_ARG *key1, SEL_ARG *key2,
                         uint clone_flag);
+static SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno,
+                                  SEL_ARG *key1, SEL_ARG *key2);
+static SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno,
+                                   SEL_ARG *key1, SEL_ARG *key2,
+                                   uint clone_flag);
 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
                     SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
@@ -409,6 +414,13 @@ static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
                              uint length);
 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
 
+static
+SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno,
+                                      SEL_ARG *sel_arg);
+static
+bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1,
+                                  SEL_ARG *key2);
+
 #include "opt_range_mrr.cc"
 
 static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, 
@@ -706,7 +718,8 @@ int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
           SEL_ARG *key1= (*or_tree)->keys[key_no];
           SEL_ARG *key2= tree->keys[key_no];
           key2->incr_refs();
-          if ((result->keys[key_no]= key_or(param, key1, key2)))
+          if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1,
+                                                       key2)))
           {
             
             result_keys.set_bit(key_no);
@@ -1872,9 +1885,13 @@ SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
   next_key_part=arg.next_key_part;
   max_part_no= arg.max_part_no;
   use_count=1; elements=1;
+  weight=1;
   next= 0;
   if (next_key_part)
+  {
     ++next_key_part->use_count;
+    weight += next_key_part->weight;
+  }
 }
 
 
@@ -1891,7 +1908,7 @@ SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
   :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
    elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
    max_value((uchar*) max_value_arg), next(0),prev(0),
-   next_key_part(0), color(BLACK), type(KEY_RANGE)
+   next_key_part(0), color(BLACK), type(KEY_RANGE), weight(1)
 {
   left=right= &null_element;
   max_part_no= 1;
@@ -1903,7 +1920,7 @@ SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
   :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
    part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
    field(field_), min_value(min_value_), max_value(max_value_),
-   next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE)
+   next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE), weight(1)
 {
   max_part_no= part+1;
   left=right= &null_element;
@@ -5447,7 +5464,7 @@ TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
       if ((*tree)->keys[key_idx]) 
         (*tree)->keys[key_idx]->incr_refs(); 
       if (((*changed_tree)->keys[key_idx]=
-             key_or(param, key, (*tree)->keys[key_idx])))
+             key_or_with_limit(param, key_idx, key, (*tree)->keys[key_idx])))
         (*changed_tree)->keys_map.set_bit(key_idx);
       *tree= NULL;
       removed_cnt++;
@@ -9116,7 +9133,8 @@ int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2,
         key2->incr_refs();
     }
     SEL_ARG *key;
-    if ((result->keys[key_no]= key =key_and(param, key1, key2, flag)))
+    if ((result->keys[key_no]= key= key_and_with_limit(param, key_no,
+                                                       key1, key2, flag)))
     {
       if (key && key->type == SEL_ARG::IMPOSSIBLE)
       {
@@ -9678,7 +9696,7 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
         key1->incr_refs();
         key2->incr_refs();
       }
-      if ((result->keys[key_no]= key_or(param, key1, key2)))
+      if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1, key2)))
         result->keys_map.set_bit(key_no);
     }
     result->type= tree1->type;
@@ -9752,6 +9770,9 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
   SEL_ARG *next;
   ulong use_count=key1->use_count;
 
+  if (sel_arg_and_weight_heuristic(param, key1, key2))
+    return key1;
+
   if (key1->elements != 1)
   {
     key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
@@ -9764,6 +9785,8 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
     key1->right= key1->left= &null_element;
     key1->next= key1->prev= 0;
   }
+  uint new_weight= 0;
+
   for (next=key1->first(); next ; next=next->next)
   {
     if (next->next_key_part)
@@ -9775,17 +9798,22 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
 	continue;
       }
       next->next_key_part=tmp;
+      new_weight += 1 + tmp->weight;
       if (use_count)
 	next->increment_use_count(use_count);
       if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
         break;
     }
     else
+    {
+      new_weight += 1 + key2->weight;
       next->next_key_part=key2;
+    }
   }
   if (!key1)
     return &null_element;			// Impossible ranges
   key1->use_count++;
+  key1->weight= new_weight;
   key1->max_part_no= MY_MAX(key2->max_part_no, key2->part+1);
   return key1;
 }
@@ -9821,6 +9849,10 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
       clone_flag=swap_clone_flag(clone_flag);
     }
     // key1->part < key2->part
+
+    if (sel_arg_and_weight_heuristic(param, key1, key2))
+      return key1;
+
     key1->use_count--;
     if (key1->use_count > 0)
       if (!(key1= key1->clone_tree(param)))
@@ -9851,6 +9883,9 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
     {						// Both are maybe key
       key1->next_key_part=key_and(param, key1->next_key_part, 
                                   key2->next_key_part, clone_flag);
+
+      key1->weight= 1 + (key1->next_key_part? key1->next_key_part->weight : 0);
+
       if (key1->next_key_part &&
 	  key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
 	return key1;
@@ -9901,6 +9936,9 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
       if (!new_arg)
 	return &null_element;			// End of memory
       new_arg->next_key_part=next;
+      if (new_arg->next_key_part)
+        new_arg->weight += new_arg->next_key_part->weight;
+
       if (!new_tree)
       {
 	new_tree=new_arg;
@@ -9939,6 +9977,72 @@ get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
   return 0;
 }
 
+#ifndef DBUG_OFF
+/*
+  Verify SEL_TREE's weight.
+
+  Recompute the weight and compare
+*/
+uint SEL_ARG::verify_weight()
+{
+  uint computed_weight= 0;
+  SEL_ARG *first_arg= first();
+
+  if (first_arg)
+  {
+    for (SEL_ARG *arg= first_arg; arg; arg= arg->next)
+    {
+      computed_weight++;
+      if (arg->next_key_part)
+        computed_weight+= arg->next_key_part->verify_weight();
+    }
+  }
+  else
+  {
+    // first()=NULL means this is a special kind of SEL_ARG, e.g.
+    // SEL_ARG with type=MAYBE_KEY
+    computed_weight= 1;
+    if (next_key_part)
+      computed_weight += next_key_part->verify_weight();
+  }
+
+  if (computed_weight != weight)
+  {
+    sql_print_error("SEL_ARG weight mismatch: computed %u have %u\n",
+                    computed_weight, weight);
+    DBUG_ASSERT(computed_weight == weight);  // Fail an assertion
+  }
+  return computed_weight;
+}
+#endif
+
+static
+SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno,
+                           SEL_ARG *key1, SEL_ARG *key2)
+{
+  SEL_ARG *res= key_or(param, key1, key2);
+  res= enforce_sel_arg_weight_limit(param, keyno, res);
+#ifndef DBUG_OFF
+  if (res)
+    res->verify_weight();
+#endif
+  return res;
+}
+
+
+static
+SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno,
+                            SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
+{
+  SEL_ARG *res= key_and(param, key1, key2, clone_flag);
+  res= enforce_sel_arg_weight_limit(param, keyno, res);
+#ifndef DBUG_OFF
+  if (res)
+    res->verify_weight();
+#endif
+  return res;
+}
+
 
 /**
    Combine two range expression under a common OR. On a logical level, the
@@ -10595,6 +10699,19 @@ end:
   }
   key1->use_count++;
 
+  /* Re-compute the result tree's weight. */
+  {
+    uint new_weight= 0;
+    const SEL_ARG *sl;
+    for (sl= key1->first(); sl ; sl= sl->next)
+    {
+      new_weight++;
+      if (sl->next_key_part)
+        new_weight += sl->next_key_part->weight;
+    }
+    key1->weight= new_weight;
+  }
+
   key1->max_part_no= max_part_no;
   return key1;
 }
@@ -10632,6 +10749,160 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
 }
 
 
+/*
+  Compute the MAX(key part) in this SEL_ARG graph.
+*/
+uint SEL_ARG::get_max_key_part() const
+{
+  const SEL_ARG *cur;
+  uint max_part= part;
+  for (cur= first(); cur ; cur=cur->next)
+  {
+    if (cur->next_key_part)
+    {
+      uint mp= cur->next_key_part->get_max_key_part();
+      max_part= MY_MAX(part, mp);
+    }
+  }
+  return max_part;
+}
+
+
+/*
+  Remove the SEL_ARG graph elements which have part > max_part.
+
+  @detail
+    Also update weight for the graph and any modified subgraphs.
+*/
+
+void prune_sel_arg_graph(SEL_ARG *sel_arg, uint max_part)
+{
+  SEL_ARG *cur;
+  DBUG_ASSERT(max_part >= sel_arg->part);
+
+  for (cur= sel_arg->first(); cur ; cur=cur->next)
+  {
+    if (cur->next_key_part)
+    {
+      if (cur->next_key_part->part > max_part)
+      {
+        // Remove cur->next_key_part.
+        sel_arg->weight -= cur->next_key_part->weight;
+        cur->next_key_part= NULL;
+      }
+      else
+      {
+        uint old_weight= cur->next_key_part->weight;
+        prune_sel_arg_graph(cur->next_key_part, max_part);
+        sel_arg->weight -= (old_weight - cur->next_key_part->weight);
+      }
+    }
+  }
+}
+
+
+/*
+  @brief
+    Make sure the passed SEL_ARG graph's weight is below SEL_ARG::MAX_WEIGHT,
+    by cutting off branches if necessary.
+
+  @detail
+    @see declaration of SEL_ARG::weight for definition of weight.
+
+    This function attempts to reduce the graph's weight by cutting off
+    SEL_ARG::next_key_part connections if necessary.
+
+    We start with maximum used keypart and then remove one keypart after
+    another until the graph's weight is within the limit.
+
+  @seealso
+     sel_arg_and_weight_heuristic();
+
+  @return
+    tree pointer  The tree after processing,
+    NULL          If it was not possible to reduce the weight of the tree below the
+                  limit.
+*/
+
+SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno,
+                                      SEL_ARG *sel_arg)
+{
+  if (!sel_arg || sel_arg->type != SEL_ARG::KEY_RANGE ||
+      !param->thd->variables.optimizer_max_sel_arg_weight)
+    return sel_arg;
+
+  Field *field= sel_arg->field;
+  uint weight1= sel_arg->weight;
+
+  while (1)
+  {
+    if (likely(sel_arg->weight <= param->thd->variables.
+                                  optimizer_max_sel_arg_weight))
+      break;
+
+    uint max_part= sel_arg->get_max_key_part();
+    if (max_part == sel_arg->part)
+    {
+      /*
+        We don't return NULL right away as we want to have the information
+        about the changed tree in the optimizer trace.
+      */
+      sel_arg= NULL;
+      break;
+    }
+
+    max_part--;
+    prune_sel_arg_graph(sel_arg, max_part);
+  }
+
+  uint weight2= sel_arg? sel_arg->weight : 0;
+
+  if (weight2 != weight1)
+  {
+    Json_writer_object wrapper(param->thd);
+    Json_writer_object obj(param->thd, "enforce_sel_arg_weight_limit");
+    if (param->using_real_indexes)
+      obj.add("index", param->table->key_info[param->real_keynr[keyno]].name);
+    else
+      obj.add("pseudo_index", field->field_name);
+
+    obj.add("old_weight", (longlong)weight1);
+    obj.add("new_weight", (longlong)weight2);
+  }
+  return sel_arg;
+}
+
+
+/*
+  @detail
+    Do not combine the trees if their total weight is likely to exceed the
+    MAX_WEIGHT.
+    (It is possible that key1 has next_key_part that has empty overlap with
+    key2. In this case, the combined tree will have a smaller weight than we
+    predict. We assume this is rare.)
+*/
+
+static
+bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1,
+                                  SEL_ARG *key2)
+{
+  DBUG_ASSERT(key1->part < key2->part);
+
+  ulong max_weight= param->thd->variables.optimizer_max_sel_arg_weight;
+  if (max_weight && key1->weight + key1->elements*key2->weight > max_weight)
+  {
+    Json_writer_object wrapper(param->thd);
+    Json_writer_object obj(param->thd, "sel_arg_weight_heuristic");
+    obj.add("key1_field", key1->field->field_name);
+    obj.add("key2_field", key2->field->field_name);
+    obj.add("key1_weight", (longlong)key1->weight);
+    obj.add("key2_weight", (longlong)key2->weight);
+    return true; // Discard key2
+  }
+  return false;
+}
+
+
 SEL_ARG *
 SEL_ARG::insert(SEL_ARG *key)
 {
@@ -10670,6 +10941,13 @@ SEL_ARG::insert(SEL_ARG *key)
   SEL_ARG *root=rb_insert(key);			// rebalance tree
   root->use_count=this->use_count;		// copy root info
   root->elements= this->elements+1;
+  /*
+    The new weight is:
+     old root's weight
+     +1 for the weight of the added element
+     + next_key_part's weight of the added element
+  */
+  root->weight = weight + 1 + (key->next_key_part? key->next_key_part->weight: 0);
   root->maybe_flag=this->maybe_flag;
   return root;
 }
@@ -10727,6 +11005,17 @@ SEL_ARG::tree_delete(SEL_ARG *key)
   root=this;
   this->parent= 0;
 
+  /*
+    Compute the weight the tree will have after the element is removed.
+    We remove the element itself (weight=1)
+    and the sub-graph connected to its next_key_part.
+  */
+  uint new_weight= root->weight - (1 + (key->next_key_part?
+                                        key->next_key_part->weight : 0));
+
+  DBUG_ASSERT(root->weight >= (1 + (key->next_key_part ?
+                                    key->next_key_part->weight : 0)));
+
   /* Unlink from list */
   if (key->prev)
     key->prev->next=key->next;
@@ -10778,6 +11067,7 @@ SEL_ARG::tree_delete(SEL_ARG *key)
   test_rb_tree(root,root->parent);
 
   root->use_count=this->use_count;		// Fix root counters
+  root->weight= new_weight;
   root->elements=this->elements-1;
   root->maybe_flag=this->maybe_flag;
   DBUG_RETURN(root);
author	Sergei Petrunia <psergey@askmonty.org>	2021-01-28 21:43:55 +0300
committer	Sergei Petrunia <psergey@askmonty.org>	2021-01-29 16:20:57 +0300
commit	c36720388d598ca3aa1c4d2dab2266656c528b50 (patch)
tree	66698613fde2bca7ed26d6af4368579700ebfccc /sql/opt_range.cc
parent	a2eb974b50f1a2912717d765bbd48aa69cfcc8f1 (diff)
download	mariadb-git-c36720388d598ca3aa1c4d2dab2266656c528b50.tar.gz