TEMPORARY PUSH: Changing all cost calculation to be given in ms10.7-selectivity-old

- Added tests/check_costs.pl, a tool to verify optimizer cost calculations. - Most costs has been found with this program. All steps to calculate the new costs are documented in Docs/optimizer.costs - User optimizer_cost variables are given in usec (as individual costs can be very small). Internally they are stored in ms. - Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost (9 ms) to common SSD cost (400MB/sec). - Changed the following handler functions to return IO_AND_CPU_COST. This makes it easy to apply different cost modifiers in ha_..time() functions for io and cpu costs. - scan_time() - rndpos_time() - keyread_time() - Enhanched keyread_time() to calculate the full cost of reading of a set of keys with a given number of ranges and optionall number of blocks that need to be accessed. - Removed read_time() as keyread_time() + rndpos_time() is the same thing. - Added the following new optimizer_variables: - optimizer_scan_lookup_cost - optimizer_row_lookup_cost - optimizer_index_lookup_cost - optimizer_disk_read_cost - Added include/my_tracker.h ; Useful include file to quickly test costs of a function. - Tuned sequence and heap engine costs (rest will be done in an updated commit)
author: Monty <monty@mariadb.org> 2022-08-11 13:05:23 +0300
committer: Monty <monty@mariadb.org> 2022-08-12 00:47:48 +0300
commit: 07ffb3abc1004a102c2c605c7c280913741d5d87 (patch)
tree: 17dc111061e19f94c8378962f3e2952197366f03 /storage
parent: 0352e855f1be0e440eb9b7dbd8c6b42151252b88 (diff)
download: mariadb-git-07ffb3abc1004a102c2c605c7c280913741d5d87.tar.gz
25 files changed, 347 insertions, 132 deletions
diff --git a/storage/connect/ha_connect.h b/storage/connect/ha_connect.h
index d1aca22b01f..d5650163064 100644
--- a/storage/connect/ha_connect.h
+++ b/storage/connect/ha_connect.h
@@ -308,13 +308,18 @@ public:
   /** @brief
     Called in test_quick_select to determine if indexes should be used.
   */
-  virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; }
+  virtual IO_AND_CPU_COST scan_time()
+  { return { 0, (double) (stats.records+stats.deleted) * avg_io_cost() }; };
 
   /** @brief
     This method will never be called if you do not implement indexes.
   */
-  virtual double read_time(uint, uint, ha_rows rows)
-    { return (double) rows /  20.0+1; }
+  virtual IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+                                       ulonglong blocks)
+  {
+    return { 0, (double) rows * 0.001 };
+  }
+
 
   /*
     Everything below are methods that we implement in ha_connect.cc.
diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h
index 043183444da..5a56dc6c4dd 100644
--- a/storage/csv/ha_tina.h
+++ b/storage/csv/ha_tina.h
@@ -124,7 +124,12 @@ public:
   /*
      Called in test_quick_select to determine if indexes should be used.
    */
-  virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; }
+  virtual IO_AND_CPU_COST scan_time()
+  {
+    return { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE *
+              avg_io_cost(),
+            (stats.records+stats.deleted) * ROW_NEXT_FIND_COST };
+  }
   /* The next method will never be called */
   virtual bool fast_key_read() { return 1;}
   /* 
diff --git a/storage/example/ha_example.h b/storage/example/ha_example.h
index 2d3fa6d4216..ccd8bbd5446 100644
--- a/storage/example/ha_example.h
+++ b/storage/example/ha_example.h
@@ -150,15 +150,40 @@ public:
   uint max_supported_key_length()    const { return 0; }
 
   /** @brief
-    Called in test_quick_select to determine if indexes should be used.
+    Called in test_quick_select to determine cost of table scan
   */
-  virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; }
+  virtual IO_AND_CPU_COST scan_time()
+  {
+    IO_AND_CPU_COST cost;
+    /* 0 blocks,  0.001 ms / row */
+    cost.io= (double) (stats.records+stats.deleted) * avg_io_cost();
+    cost.cpu= 0;
+    return cost;
+  }
 
   /** @brief
     This method will never be called if you do not implement indexes.
   */
-  virtual double read_time(uint, uint, ha_rows rows)
-  { return (double) rows /  20.0+1; }
+  virtual IO_AND_CPU_COST keyread_time(uint, ulong, ha_rows rows,
+                                       ulonglong blocks)
+  {
+    IO_AND_CPU_COST cost;
+    cost.io= blocks * avg_io_cost();
+    cost.cpu= (double) rows * 0.001;
+    return cost;
+  }
+
+  /** @brief
+    Cost of fetching 'rows' records through rnd_pos()
+  */
+  virtual IO_AND_CPU_COST rndpos_time(ha_rows rows)
+  {
+   IO_AND_CPU_COST cost;
+    /* 0 blocks,  0.001 ms / row */
+    cost.io= 0;
+    cost.cpu= (double) rows * avg_io_cost();
+    return cost;
+  }
 
   /*
     Everything below are methods that we implement in ha_example.cc.
diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h
index a8d5439bdae..94a30962663 100644
--- a/storage/federated/ha_federated.h
+++ b/storage/federated/ha_federated.h
@@ -180,19 +180,26 @@ public:
     The reason for "records * 1000" is that such a large number forces 
     this to use indexes "
   */
-  virtual double scan_time()
+
+  IO_AND_CPU_COST scan_time()
   {
     DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
-    return (double)(stats.records*1000); 
+    return
+    {
+      (double) (stats.mean_rec_length * stats.records)/IO_SIZE * avg_io_cost(),
+        0
+    };
   }
-  virtual double read_time(uint index, uint ranges, ha_rows rows)
+  IO_AND_CPU_COST rndpos_time(ha_rows rows)
   {
-    return rows2double(rows) + rows2double(ranges);
+    return { (double) stats.records * avg_io_cost(), 0 };
   }
-  virtual double rndpos_time(ha_rows rows)
+  IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+                               ulonglong blocks)
   {
-    return rows2double(rows);
+    return { (double) (ranges + rows) * avg_io_cost(), 0 };
   }
+
   virtual void set_optimizer_cache_cost(double cost);
 
   const key_map *keys_to_use_for_scanning() { return &key_map_full; }
diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc
index afa7e26d85d..4521d33bcbc 100644
--- a/storage/federatedx/ha_federatedx.cc
+++ b/storage/federatedx/ha_federatedx.cc
@@ -846,12 +846,14 @@ ha_federatedx::ha_federatedx(handlerton *hton,
 }
 
 /*
-  Federated doesn't need optimizer_cache_cost as everything is one a remote server and
-  nothing is cached locally
+  Federated doesn't need optimizer_cache_cost as everything is one a remote
+  server and nothing is cached locally
 */
 
 void ha_federatedx::set_optimizer_cache_cost(double cost)
-{}
+{
+  optimizer_cache_cost= 1.0;
+}
 
 /*
   Convert MySQL result set row to handler internal format
diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h
index 28318842ac2..240b83554e8 100644
--- a/storage/federatedx/ha_federatedx.h
+++ b/storage/federatedx/ha_federatedx.h
@@ -368,19 +368,25 @@ public:
     The reason for "records * 1000" is that such a large number forces
     this to use indexes "
   */
-  double scan_time()
+  IO_AND_CPU_COST scan_time()
   {
     DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
-    return (double)(stats.records*1000);
+    return
+    {
+      (double) (stats.mean_rec_length * stats.records)/8192 * avg_io_cost(),
+        0
+    };
   }
-  double read_time(uint index, uint ranges, ha_rows rows)
+  IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+                               ulonglong blocks)
   {
-    return rows2double(rows) + rows2double(ranges);
+    return { (double) (ranges + rows) * avg_io_cost(), 0 };
   }
-  virtual double rndpos_time(ha_rows rows)
+  IO_AND_CPU_COST rndpos_time(ha_rows rows)
   {
-    return rows2double(rows);
+    return { (double) rows * avg_io_cost(), 0 };
   }
+
   virtual void set_optimizer_cache_cost(double cost);
 
   const key_map *keys_to_use_for_scanning() { return &key_map_full; }
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index f9b365cf91e..756f4d07573 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -230,6 +230,49 @@ void ha_heap::update_key_stats()
 }
 
 
+void ha_heap::optimizer_costs_updated()
+{
+  /*
+    Heap doesn't need optimizer_cache_cost as everything is in memory and
+    it supports all needed _time() functions
+  */
+  optimizer_cache_cost= 1.0;
+  optimizer_scan_lookup_cost= 1;
+  optimizer_index_next_find_cost= 0;
+}
+
+#define HEAP_SCAN_TIME  9.376e-06              // See optimizer_costs.txt
+#define BTREE_SCAN_TIME 5.171e-05              // See optimizer_costs.txt
+#define HEAP_LOOKUP_TIME 1.91e-4               // See optimizer_costs.txt
+
+
+IO_AND_CPU_COST ha_heap::keyread_time(uint index, ulong ranges, ha_rows rows,
+                                      ulonglong blocks)
+{
+  KEY *key=table->key_info+index;
+  if (key->algorithm == HA_KEY_ALG_BTREE)
+    return {0, (double) (rows + ranges + 1) * BTREE_SCAN_TIME };
+  else
+  {
+    return {0, (double) ranges * HEAP_LOOKUP_TIME + (rows-1) * BTREE_SCAN_TIME };
+  }
+}
+
+IO_AND_CPU_COST ha_heap::scan_time()
+{
+  return {0, (double) (stats.records+stats.deleted) * HEAP_SCAN_TIME};
+}
+
+IO_AND_CPU_COST ha_heap::rndpos_time(ha_rows rows)
+{
+  /*
+    The row pointer is a direct pointer to the block. Thus almost instant
+    in practice.
+    Note that ha_rndpos_time() will add ROW_COPY_COST to this result
+  */
+  return { 0, 0 };
+}
+
 int ha_heap::write_row(const uchar * buf)
 {
   int res;
diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h
index 45495daf54c..61e20779e44 100644
--- a/storage/heap/ha_heap.h
+++ b/storage/heap/ha_heap.h
@@ -62,27 +62,13 @@ public:
   const key_map *keys_to_use_for_scanning() { return &btree_keys; }
   uint max_supported_keys()          const { return MAX_KEY; }
   uint max_supported_key_part_length() const { return MAX_KEY_LENGTH; }
-  double scan_time()
-  { return (double) (stats.records+stats.deleted) / 20.0+10; }
-  double read_time(uint index, uint ranges, ha_rows rows)
-  { return (double) (rows +1)/ 20.0; }
-  double keyread_time(uint index, uint ranges, ha_rows rows)
-  { return (double) (rows + ranges) / 20.0 ; }
-  double rndpos_time(ha_rows rows)
-  {
-    return (double) rows/ 20.0;
-  }
-  double avg_io_cost()
-  { return 0.05; }                              /* 1/20 */
-
-  /*
-    Heap doesn't need optimizer_cache_cost as everything is in memory and
-    it supports all needed _time() functions
-  */
-  void set_optimizer_cache_cost(double cost)
-  {
-    optimizer_cache_cost= 1.0;
-  }
+  IO_AND_CPU_COST scan_time();
+  IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+                               ulonglong blocks);
+  IO_AND_CPU_COST rndpos_time(ha_rows rows);
+  /* 0 for avg_io_cost ensures that there are no read-block calculations */
+  double avg_io_cost() { return 0.0; }
+  void optimizer_costs_updated();
   int open(const char *name, int mode, uint test_if_locked);
   int close(void);
   void set_keys_for_scanning(void);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index ffd3f0bc0e3..4ab2ac8618c 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -14416,7 +14416,7 @@ comparable to the number returned by records_in_range so that we can
 decide if we should scan the table or use keys.
 @return estimated time measured in disk seeks */
 
-double
+IO_AND_CPU_COST
 ha_innobase::scan_time()
 /*====================*/
 {
@@ -14436,17 +14436,19 @@ ha_innobase::scan_time()
 		TODO: This will be further improved to return some approximate
 		estimate but that would also needs pre-population of stats
 		structure. As of now approach is in sync with MyISAM. */
-		return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2);
+          return { (ulonglong2double(stats.data_file_length) / IO_SIZE * avg_io_cost()), 0.0 };
 	}
 
 	ulint	stat_clustered_index_size;
-
+        IO_AND_CPU_COST cost;
 	ut_a(m_prebuilt->table->stat_initialized);
 
 	stat_clustered_index_size =
 		m_prebuilt->table->stat_clustered_index_size;
 
-	return((double) stat_clustered_index_size);
+        cost.io= (double) stat_clustered_index_size * avg_io_cost();
+        cost.cpu= 0;
+	return(cost);
 }
 
 /******************************************************************//**
@@ -14454,6 +14456,7 @@ Calculate the time it takes to read a set of ranges through an index
 This enables us to optimise reads for clustered indexes.
 @return estimated time measured in disk seeks */
 
+#ifdef NOT_USED
 double
 ha_innobase::read_time(
 /*===================*/
@@ -14478,14 +14481,13 @@ ha_innobase::read_time(
 		return(time_for_scan);
 	}
 
-	return(ranges + (double) rows / (double) total_rows * time_for_scan);
+	return(ranges * INDEX_LOOKUP_COST + (double) rows / (double) total_rows * time_for_scan);
 }
 
 /******************************************************************//**
 Calculate the time it takes to read a set of rows with primary key.
 */
 
-double
 ha_innobase::rndpos_time(ha_rows rows)
 {
 	ha_rows total_rows;
@@ -14502,6 +14504,7 @@ ha_innobase::rndpos_time(ha_rows rows)
 
 	return((double) rows + (double) rows / (double) total_rows * time_for_scan);
 }
+#endif
 
 /*********************************************************************//**
 Calculates the key number used inside MySQL for an Innobase index.
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 664fa10d4da..4bb5179d832 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -105,11 +105,25 @@ public:
 
 	int close(void) override;
 
-	double scan_time() override;
+	IO_AND_CPU_COST scan_time() override;
 
+#ifdef NOT_USED
 	double read_time(uint index, uint ranges, ha_rows rows) override;
-
         double rndpos_time(ha_rows rows) override;
+#endif
+
+        void optimizer_costs_updated()
+        {
+#ifdef QQQ
+          /*
+            The following number was found by check_costs.pl when using 1M rows
+            and all rows are cached
+          */
+          optimizer_row_lookup_cost*= 2.2;
+#endif
+          /* Accessing a row has the same cost as doing an index read */
+          optimizer_row_lookup_cost= optimizer_index_lookup_cost;
+        }
 
 	int write_row(const uchar * buf) override;
 
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 1e6680d30de..f48373ea818 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -1100,14 +1100,37 @@ ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const
 }
 
 
-double ha_maria::scan_time()
+IO_AND_CPU_COST ha_maria::scan_time()
 {
-  if (file->s->data_file_type == BLOCK_RECORD)
-    return (ulonglong2double(stats.data_file_length - file->s->block_size) /
-            file->s->block_size) + 2;
   return handler::scan_time();
 }
 
+void ha_maria::optimizer_costs_updated()
+{
+  /*
+    The following numbers where found by check_costs.pl when using 1M rows
+    and all rows are cached. See optimzier_costs.txt
+  */
+  if (file->s->data_file_type == BLOCK_RECORD)
+  {
+    /*
+      Aria row lookup is fast for BLOCK_RECORDS as the row data is cached
+      and we know exactly on which block a row is.
+    */
+    optimizer_row_copy_cost= 0.000118;
+    optimizer_row_lookup_cost= 0.52;
+  }
+  else
+  {
+    /*
+      MyISAM format row lookup costs are slow as the row data is on a not cached
+      file.
+    */
+    optimizer_row_lookup_cost*= 3;
+  }
+}
+
+
 /*
   We need to be able to store at least 2 keys on an index page as the
   splitting algorithms depends on this. (With only one key on a page
diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h
index 6b4302145dd..46dce02239e 100644
--- a/storage/maria/ha_maria.h
+++ b/storage/maria/ha_maria.h
@@ -77,7 +77,7 @@ public:
   { return max_supported_key_length(); }
   enum row_type get_row_type() const override final;
   void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) override final;
-  virtual double scan_time() override final;
+  virtual IO_AND_CPU_COST scan_time() override final;
 
   int open(const char *name, int mode, uint test_if_locked) override;
   int close(void) override final;
@@ -114,6 +114,7 @@ public:
   int remember_rnd_pos() override final;
   int restart_rnd_next(uchar * buf) override final;
   void position(const uchar * record) override final;
+  void optimizer_costs_updated();
   int info(uint) override final;
   int info(uint, my_bool);
   int extra(enum ha_extra_function operation) override final;
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c
index de85ec51deb..58f8637abe7 100644
--- a/storage/maria/ma_pagecache.c
+++ b/storage/maria/ma_pagecache.c
@@ -3875,7 +3875,7 @@ restart:
       {
         pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
         DBUG_ASSERT(0);
-        return (uchar*) 0;
+        DBUG_RETURN((uchar*) 0);
       }
     }
     /*
diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp
index 83c2d15a36c..5dd74ea6240 100644
--- a/storage/mroonga/ha_mroonga.cpp
+++ b/storage/mroonga/ha_mroonga.cpp
@@ -13000,9 +13000,9 @@ int ha_mroonga::truncate()
   DBUG_RETURN(error);
 }
 
-double ha_mroonga::wrapper_scan_time()
+IO_AND_CPU_COST ha_mroonga::wrapper_scan_time()
 {
-  double res;
+  IO_AND_CPU_COST res;
   MRN_DBUG_ENTER_METHOD();
   MRN_SET_WRAP_SHARE_KEY(share, table->s);
   MRN_SET_WRAP_TABLE_KEY(this, table);
@@ -13012,17 +13012,16 @@ double ha_mroonga::wrapper_scan_time()
   DBUG_RETURN(res);
 }
 
-double ha_mroonga::storage_scan_time()
+IO_AND_CPU_COST ha_mroonga::storage_scan_time()
 {
   MRN_DBUG_ENTER_METHOD();
-  double time = handler::scan_time();
-  DBUG_RETURN(time);
+  DBUG_RETURN(handler::scan_time());
 }
 
-double ha_mroonga::scan_time()
+IO_AND_CPU_COST ha_mroonga::scan_time()
 {
   MRN_DBUG_ENTER_METHOD();
-  double time;
+  IO_AND_CPU_COST time;
   if (share->wrapper_mode)
   {
     time = wrapper_scan_time();
@@ -13032,51 +13031,87 @@ double ha_mroonga::scan_time()
   DBUG_RETURN(time);
 }
 
-double ha_mroonga::wrapper_read_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_mroonga::wrapper_rndpos_time(ha_rows rows)
+{
+  IO_AND_CPU_COST res;
+  MRN_DBUG_ENTER_METHOD();
+  MRN_SET_WRAP_SHARE_KEY(share, table->s);
+  MRN_SET_WRAP_TABLE_KEY(this, table);
+  res = wrap_handler->rndpos_time(rows);
+  MRN_SET_BASE_SHARE_KEY(share, table->s);
+  MRN_SET_BASE_TABLE_KEY(this, table);
+  DBUG_RETURN(res);
+}
+
+IO_AND_CPU_COST ha_mroonga::storage_rndpos_time(ha_rows rows)
 {
-  double res;
+  MRN_DBUG_ENTER_METHOD();
+  IO_AND_CPU_COST time = handler::rndpos_time(rows);
+  DBUG_RETURN(time);
+}
+
+
+IO_AND_CPU_COST ha_mroonga::rndpos_time(ha_rows rows)
+{
+  MRN_DBUG_ENTER_METHOD();
+  IO_AND_CPU_COST time;
+  if (share->wrapper_mode)
+  {
+    time = wrapper_rndpos_time(rows);
+  } else {
+    time = storage_rndpos_time(rows);
+  }
+  DBUG_RETURN(time);
+}
+
+
+IO_AND_CPU_COST ha_mroonga::wrapper_keyread_time(uint index, ulong ranges,
+                                                 ha_rows rows, ulonglong blocks)
+{
+  IO_AND_CPU_COST res;
   MRN_DBUG_ENTER_METHOD();
   if (index < MAX_KEY) {
     KEY *key_info = &(table->key_info[index]);
     if (mrn_is_geo_key(key_info)) {
-      res = handler::read_time(index, ranges, rows);
+      res = handler::keyread_time(index, ranges, rows, blocks);
       DBUG_RETURN(res);
     }
     MRN_SET_WRAP_SHARE_KEY(share, table->s);
     MRN_SET_WRAP_TABLE_KEY(this, table);
-    res = wrap_handler->read_time(share->wrap_key_nr[index], ranges, rows);
+    res = wrap_handler->keyread_time(share->wrap_key_nr[index], ranges, rows, blocks);
     MRN_SET_BASE_SHARE_KEY(share, table->s);
     MRN_SET_BASE_TABLE_KEY(this, table);
   } else {
     MRN_SET_WRAP_SHARE_KEY(share, table->s);
     MRN_SET_WRAP_TABLE_KEY(this, table);
-    res = wrap_handler->read_time(index, ranges, rows);
+    res = wrap_handler->keyread_time(index, ranges, rows, blocks);
     MRN_SET_BASE_SHARE_KEY(share, table->s);
     MRN_SET_BASE_TABLE_KEY(this, table);
   }
   DBUG_RETURN(res);
 }
 
-double ha_mroonga::storage_read_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_mroonga::storage_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks)
 {
   MRN_DBUG_ENTER_METHOD();
-  double time = handler::read_time(index, ranges, rows);
+  IO_AND_CPU_COST time = handler::keyread_time(index, ranges, rows, blocks);
   DBUG_RETURN(time);
 }
 
-double ha_mroonga::read_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_mroonga::keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks)
 {
   MRN_DBUG_ENTER_METHOD();
-  double time;
+  IO_AND_CPU_COST time;
   if (share->wrapper_mode)
   {
-    time = wrapper_read_time(index, ranges, rows);
+    time = wrapper_keyread_time(index, ranges, rows, blocks);
   } else {
-    time = storage_read_time(index, ranges, rows);
+    time = storage_keyread_time(index, ranges, rows, blocks);
   }
   DBUG_RETURN(time);
 }
 
+
 #ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
 const key_map *ha_mroonga::wrapper_keys_to_use_for_scanning()
 {
diff --git a/storage/mroonga/ha_mroonga.hpp b/storage/mroonga/ha_mroonga.hpp
index 66767899e21..38ed146676e 100644
--- a/storage/mroonga/ha_mroonga.hpp
+++ b/storage/mroonga/ha_mroonga.hpp
@@ -531,8 +531,9 @@ public:
   int end_bulk_insert() mrn_override;
   int delete_all_rows() mrn_override;
   int truncate() mrn_override;
-  double scan_time() mrn_override;
-  double read_time(uint index, uint ranges, ha_rows rows) mrn_override;
+  IO_AND_CPU_COST scan_time() mrn_override;
+  IO_AND_CPU_COST rndpos_time(ha_rows rows) mrn_override;
+  IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks) mrn_override;
 #ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
   const key_map *keys_to_use_for_scanning() mrn_override;
 #endif
@@ -1106,10 +1107,12 @@ private:
   int wrapper_truncate_index();
   int storage_truncate();
   int storage_truncate_index();
-  double wrapper_scan_time();
-  double storage_scan_time();
-  double wrapper_read_time(uint index, uint ranges, ha_rows rows);
-  double storage_read_time(uint index, uint ranges, ha_rows rows);
+  IO_AND_CPU_COST wrapper_scan_time();
+  IO_AND_CPU_COST storage_scan_time();
+  IO_AND_CPU_COST wrapper_rndpos_time(ha_rows rows);
+  IO_AND_CPU_COST storage_rndpos_time(ha_rows rows);
+  IO_AND_CPU_COST wrapper_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks);
+  IO_AND_CPU_COST storage_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks);
 #ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
   const key_map *wrapper_keys_to_use_for_scanning();
   const key_map *storage_keys_to_use_for_scanning();
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index 3843004cc6e..6ceabd8d9c5 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -102,6 +102,17 @@ class ha_myisam final : public handler
   int remember_rnd_pos();
   int restart_rnd_next(uchar *buf);
   void position(const uchar *record);
+  void optimizer_costs_updated()
+  {
+#ifdef QQQQ
+    /*
+      MyISAM row lookup costs are slow as the row data is on a not cached file.
+      The following number was found by check_costs.pl when using 1M rows
+      and all rows are cached
+    */
+    optimizer_row_lookup_cost*= 3.0;
+#endif
+  }
   int info(uint);
   int extra(enum ha_extra_function operation);
   int extra_opt(enum ha_extra_function operation, ulong cache_size);
diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h
index 6da327ec84b..7921b725bb2 100644
--- a/storage/myisammrg/ha_myisammrg.h
+++ b/storage/myisammrg/ha_myisammrg.h
@@ -102,8 +102,14 @@ public:
   uint max_supported_keys()          const { return MI_MAX_KEY; }
   uint max_supported_key_length()    const { return HA_MAX_KEY_LENGTH; }
   uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; }
-  double scan_time()
-  { return ulonglong2double(stats.data_file_length) / IO_SIZE + file->tables; }
+  IO_AND_CPU_COST scan_time()
+  {
+    IO_AND_CPU_COST cost;
+    cost.io= (ulonglong2double(stats.data_file_length) / IO_SIZE * avg_io_cost()+
+              file->tables);
+    cost.cpu= records() * ROW_NEXT_FIND_COST;
+    return cost;
+  }
 
   int open(const char *name, int mode, uint test_if_locked);
   int add_children_list(void);
diff --git a/storage/oqgraph/ha_oqgraph.h b/storage/oqgraph/ha_oqgraph.h
index c8e175df616..dae81fd0c0c 100644
--- a/storage/oqgraph/ha_oqgraph.h
+++ b/storage/oqgraph/ha_oqgraph.h
@@ -74,9 +74,10 @@ public:
   const char **bas_ext() const;
   uint max_supported_keys()          const { return MAX_KEY; }
   uint max_supported_key_part_length() const { return MAX_KEY_LENGTH; }
-  double scan_time() { return (double) 1000000000; }
-  double read_time(uint index, uint ranges, ha_rows rows)
-  { return 1; }
+  IO_AND_CPU_COST scan_time()
+  { return { (double) 1000000000, (double) 1000000000 }; }
+  IO_AND_CPU_COST rndpos_time(ha_rows rows)
+  { return { (double) rows, (double) rows }; }
 
   // Doesn't make sense to change the engine on a virtual table.
   virtual bool can_switch_engines() { return false; }
diff --git a/storage/perfschema/ha_perfschema.h b/storage/perfschema/ha_perfschema.h
index f3d84a3e264..dcad584224b 100644
--- a/storage/perfschema/ha_perfschema.h
+++ b/storage/perfschema/ha_perfschema.h
@@ -104,8 +104,10 @@ public:
   ha_rows estimate_rows_upper_bound(void)
   { return HA_POS_ERROR; }
 
-  double scan_time(void)
-  { return 1.0; }
+  IO_AND_CPU_COST scan_time(void)
+  {
+    return {0.0, 1.0};
+  }
 
   /**
     Open a performance schema table.
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 72f88172a2a..b41851bd66a 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -14602,15 +14602,18 @@ bool ha_rocksdb::use_read_free_rpl() const {
 }
 #endif // MARIAROCKS_NOT_YET
 
-double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) {
+IO_AND_CPU_COST ha_rocksdb::keyread_time(uint index, ulong ranges,
+                                         ha_rows rows,
+                                         ulonglong blocks) {
   DBUG_ENTER_FUNC();
 
   if (index != table->s->primary_key) {
     /* Non covering index range scan */
-    DBUG_RETURN(handler::read_time(index, ranges, rows));
+    DBUG_RETURN(handler::keyread_time(index, ranges, rows, blocks));
   }
 
-  DBUG_RETURN((rows / 20.0) + 1);
+  IO_AND_CPU_COST cost= {0, (rows / 20.0) + ranges };
+  DBUG_RETURN(cost);
 }
 
 void ha_rocksdb::print_error(int error, myf errflag) {
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 63bf7ffd602..d40fc539b0c 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -623,14 +623,17 @@ public:
                        bool sorted) override
       MY_ATTRIBUTE((__warn_unused_result__));
 
-  virtual double scan_time() override {
+  virtual IO_AND_CPU_COST scan_time() override
+  {
+    IO_AND_CPU_COST cost;
     DBUG_ENTER_FUNC();
-
-    DBUG_RETURN(
-        static_cast<double>((stats.records + stats.deleted) / 20.0 + 10));
+    cost.io= 0;
+    cost.cpu= (stats.records + stats.deleted) * 0.001 + 1;
+    DBUG_RETURN(cost);
   }
+  IO_AND_CPU_COST keyread_time(uint index, ulong ranges,
+                               ha_rows rows, ulonglong blocks) override;
 
-  virtual double read_time(uint, uint, ha_rows rows) override;
   virtual void print_error(int error, myf errflag) override;
 
   int open(const char *const name, int mode, uint test_if_locked) override
diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc
index f5a18094521..1e558e6d3e4 100644
--- a/storage/sequence/sequence.cc
+++ b/storage/sequence/sequence.cc
@@ -32,6 +32,8 @@
 
 static handlerton *sequence_hton;
 
+#define SEQUENCE_SCAN_COST 1.53e-05  // See optimizer_costs.txt
+
 class Sequence_share : public Handler_share {
 public:
   const char *name;
@@ -100,9 +102,17 @@ public:
   int index_last(uchar *buf);
   ha_rows records_in_range(uint inx, const key_range *start_key,
                            const key_range *end_key, page_range *pages);
-  double scan_time() { return (double)nvalues(); }
-  double read_time(uint index, uint ranges, ha_rows rows) { return (double)rows; }
-  double keyread_time(uint index, uint ranges, ha_rows rows) { return (double)rows; }
+  IO_AND_CPU_COST scan_time()
+  {
+    return {0, (double) nvalues() * SEQUENCE_SCAN_COST };
+  }
+  IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+                               ulonglong blocks)
+  {
+    return {0, rows * SEQUENCE_SCAN_COST };   // Very low value/row
+  }
+  /* 0 for avg_io_cost ensures that there are no read-block calculations */
+  double avg_io_cost() { return 0.0; }
 
 private:
   void set(uchar *buf);
diff --git a/storage/sphinx/ha_sphinx.h b/storage/sphinx/ha_sphinx.h
index f03e9d8c797..c7c61a4738a 100644
--- a/storage/sphinx/ha_sphinx.h
+++ b/storage/sphinx/ha_sphinx.h
@@ -72,14 +72,30 @@ public:
 	uint			max_supported_key_length () const		{ return MAX_KEY_LENGTH; }
 	uint			max_supported_key_part_length () const	{ return MAX_KEY_LENGTH; }
 
-	#if MYSQL_VERSION_ID>50100
-	virtual double	scan_time ()	{ return (double)( stats.records+stats.deleted )/20.0 + 10; }	///< called in test_quick_select to determine if indexes should be used
-	#else
-	virtual double	scan_time ()	{ return (double)( records+deleted )/20.0 + 10; }				///< called in test_quick_select to determine if indexes should be used
-	#endif
+	IO_AND_CPU_COST	scan_time ()
+	{
+          IO_AND_CPU_COST cost;
+          cost.io= 0;
+          cost.cpu= (double) (stats.records+stats.deleted) * avg_io_cost();
+          return cost;
+        }
+        IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+                                    ulonglong blocks)
+	{
+          IO_AND_CPU_COST cost;
+          cost.io= ranges;
+          cost.cpu= (double) rows * DEFAULT_ROW_LOOKUP_COST;
+          return cost;
+        }
+        IO_AND_CPU_COST rndpos_time(ha_rows rows)
+	{
+          IO_AND_CPU_COST cost;
+          cost.io= 0;
+          cost.cpu= (double) rows * DEFAULT_ROW_LOOKUP_COST;
+          return cost;
+        }
+
 
-        virtual double read_time(uint index, uint ranges, ha_rows rows)
-	{ return ranges + (double)rows/20.0 + 1; }					///< index read time estimate
 
 public:
 	int				open ( const char * name, int mode, uint test_if_locked );
diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc
index 712397ffb92..e0c3e64ecb6 100644
--- a/storage/spider/ha_spider.cc
+++ b/storage/spider/ha_spider.cc
@@ -11350,37 +11350,44 @@ void ha_spider::bulk_req_exec()
 }
 #endif
 
-double ha_spider::scan_time()
+IO_AND_CPU_COST ha_spider::scan_time()
 {
+  IO_AND_CPU_COST cost;
   DBUG_ENTER("ha_spider::scan_time");
   DBUG_PRINT("info",("spider this=%p", this));
-  DBUG_PRINT("info",("spider scan_time = %.6f",
-    share->scan_rate * share->stat.records * share->stat.mean_rec_length + 2));
-  DBUG_RETURN(share->scan_rate * share->stat.records *
-    share->stat.mean_rec_length + 2);
+  cost.io=0;
+  cost.cpu= (share->scan_rate * share->stat.records *
+             share->stat.mean_rec_length);
+  DBUG_PRINT("info",("spider scan_time = %.6f", cost.cpu));
+  return cost;
 }
 
-double ha_spider::read_time(
-  uint index,
-  uint ranges,
-  ha_rows rows
-) {
-  DBUG_ENTER("ha_spider::read_time");
+IO_AND_CPU_COST ha_spider::rndpos_time(ha_rows rows)
+{
+  IO_AND_CPU_COST cost= { 0.0, 0.0};            // Row is in memory
+  return cost;
+}
+
+IO_AND_CPU_COST ha_spider::keyread_time(uint index, ulong ranges, ha_rows rows,
+                                        ulonglong blocks)
+{
+  IO_AND_CPU_COST cost;
+  DBUG_ENTER("ha_spider::keyread_time");
   DBUG_PRINT("info",("spider this=%p", this));
   if (wide_handler->keyread)
   {
-    DBUG_PRINT("info",("spider read_time(keyread) = %.6f",
-      share->read_rate * table->key_info[index].key_length *
-      rows / 2 + 2));
-    DBUG_RETURN(share->read_rate * table->key_info[index].key_length *
-      rows / 2 + 2);
+    cost.io= ranges;
+    cost.cpu= (share->read_rate * table->key_info[index].key_length * rows / 2
+               + 2);
   } else {
-    DBUG_PRINT("info",("spider read_time = %.6f",
-      share->read_rate * share->stat.mean_rec_length * rows + 2));
-    DBUG_RETURN(share->read_rate * share->stat.mean_rec_length * rows + 2);
+    cost.io= ranges;
+    cost.cpu= share->read_rate * share->stat.mean_rec_length * rows + 2;
   }
+  DBUG_PRINT("info",("spider scan_time(keyread) = %.6f", cost.cpu));
+  DBUG_RETURN(cost);
 }
 
+
 const key_map *ha_spider::keys_to_use_for_scanning()
 {
   DBUG_ENTER("ha_spider::keys_to_use_for_scanning");
diff --git a/storage/spider/ha_spider.h b/storage/spider/ha_spider.h
index 3036f8d522a..8e8308c760a 100644
--- a/storage/spider/ha_spider.h
+++ b/storage/spider/ha_spider.h
@@ -787,12 +787,10 @@ public:
 #endif
   int delete_all_rows();
   int truncate();
-  double scan_time();
-  double read_time(
-    uint index,
-    uint ranges,
-    ha_rows rows
-  );
+  IO_AND_CPU_COST scan_time();
+  IO_AND_CPU_COST rndpos_time(ha_rows rows);
+  IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+                               ulonglong blocks);
 #ifdef HA_CAN_BULK_ACCESS
   void bulk_req_exec();
 #endif
author	Monty <monty@mariadb.org>	2022-08-11 13:05:23 +0300
committer	Monty <monty@mariadb.org>	2022-08-12 00:47:48 +0300
commit	07ffb3abc1004a102c2c605c7c280913741d5d87 (patch)
tree	17dc111061e19f94c8378962f3e2952197366f03 /storage
parent	0352e855f1be0e440eb9b7dbd8c6b42151252b88 (diff)
download	mariadb-git-07ffb3abc1004a102c2c605c7c280913741d5d87.tar.gz