sql/optimizer_costs.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

#ifndef OPTIMIZER_COSTS_INCLUDED
#define OPTIMIZER_COSTS_INCLUDED
/*
   Copyright (c) 2022, MariaDB AB

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; version 2 of
   the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA
*/

/* This file includes costs variables used by the optimizer */

/**
  The following is used to decide if MariaDB should use table scanning
  instead of reading with keys.  The number says how many evaluation of the
  WHERE clause is comparable to reading one extra row from a table.
*/
#define TIME_FOR_COMPARE         5.0	//  5 WHERE compares == one read
#define TIME_FOR_COMPARE_IDX    20.0

/*
  The table/index cache hit ratio in %. 0 means that a searched for key or row
  will never be in the cache while 100 means it always in the cache.

  According to folklore, one need at least 80 % hit rate in the cache for
  MariaDB to run very well. We set CACHE_HIT_RATIO to a bit smaller
  as there is still a cost involved in finding the row in the B tree, hash
  or other seek structure.

  Increasing CACHE_HIT_RATIO will make MariaDB prefer key lookups over
  table scans as the impact of ROW_COPY_COST and INDEX_COPY cost will
  have a larger impact when more rows are exmined..

  Note that avg_io_cost() is multipled with this constant!
*/
#define CACHE_HIT_RATIO 50

/* Convert ratio to cost */

static inline double cache_hit_cost(uint ratio)
{
  return (((double) (100 - ratio)) / 100.0);
}

/*
  Cost of finding and copying keys of a total length of 'blocksize'
  used in handler::keyread_time()
 */
#define INDEX_BLOCK_COPY_COST  ((double) 1 / 5.0)
/* Cost for finding the first key in a key scan */
#define KEY_LOOKUP_COST      ((double) 1.0)
/* Cost of finding a key from a row_ID (not used for clustered keys) */
#define ROW_LOOKUP_COST      ((double) 1.0)
/*
  Cost of finding the next row during table scan and copying it to
  'table->record'.
  If this is too small, then table scans will be prefered over 'ref'
  as with table scans there are no key read (KEY_LOOKUP_COST), fewer
  disk reads but more record copying and row comparisions.  If it's
  too big then MariaDB will used key lookup even when table scan is
  better.
*/
#define ROW_COPY_COST     ((double) 1.0 / 20.0)
/*
  Cost of finding the next key during index scan and copying it to
  'table->record'

  If this is too small, then index scans will be prefered over 'ref'
  as with table scans there are no key read (KEY_LOOKUP_COST) and
  fewer disk reads.
*/
#define KEY_COPY_COST     ((double) 1.0 / 40.0)
/*
  Cost of finding the next index entry and checking it against filter
  This cost is very low as it's done inside the storage engine.
  Should be smaller than KEY_COPY_COST.
 */
#define KEY_NEXT_FIND_COST ((double) 1.0 / 80.0)

/* Extra cost for doing a range scan. Used to prefer 'ref' over range */
#define MULTI_RANGE_READ_SETUP_COST (double) (1.0 / 50.0)

/*
  These costs are mainly to handle small tables, like the one we have in the
  mtr test suite
*/
/* Extra cost for full table scan. Used to prefer range over table scans */
#define TABLE_SCAN_SETUP_COST 1.0
/* Extra cost for full index scan. Used to prefer range over index scans */
#define INDEX_SCAN_SETUP_COST 1.0

/*
  The lower bound of accepted rows when using filter.
  This is used to ensure that filters are not too agressive.
*/
#define MIN_ROWS_AFTER_FILTERING 1.0

/**
  Number of comparisons of table rowids equivalent to reading one row from a
  table.
*/
#define TIME_FOR_COMPARE_ROWID  (TIME_FOR_COMPARE*100)

/*
  cost1 is better that cost2 only if cost1 + COST_EPS < cost2
  The main purpose of this is to ensure we use the first index or plan
  when there are identical plans. Without COST_EPS some plans in the
  test suite would vary depending on floating point calculations done
  in different paths.
 */
#define COST_EPS  0.0001

/*
  For sequential disk seeks the cost formula is:
    DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip

  The cost of average seek
    DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
*/
#define DISK_SEEK_BASE_COST ((double)0.9)

#define BLOCKS_IN_AVG_SEEK  128

#define DISK_SEEK_PROP_COST ((double)0.1/BLOCKS_IN_AVG_SEEK)

/*
  Subquery materialization-related constants
*/
/* This should match ha_heap::read_time() */
#define HEAP_TEMPTABLE_LOOKUP_COST 0.05
#define HEAP_TEMPTABLE_CREATE_COST 1.0
#define DISK_TEMPTABLE_LOOKUP_COST 1.0
#define DISK_TEMPTABLE_CREATE_COST 4.0 /* Creating and deleting 2 temp tables */
#define SORT_INDEX_CMP_COST 0.02

#define COST_MAX (DBL_MAX * (1.0 - DBL_EPSILON))

#define COST_ADD(c,d) (COST_MAX - (d) > (c) ? (c) + (d) : COST_MAX)

#define COST_MULT(c,f) (COST_MAX / (f) > (c) ? (c) * (f) : COST_MAX)

#endif /* OPTIMIZER_COSTS_INCLUDED */