summaryrefslogtreecommitdiff
path: root/sql/sql_statistics.h
diff options
context:
space:
mode:
authorIgor Babaev <igor@askmonty.org>2013-03-25 23:48:29 -0700
committerIgor Babaev <igor@askmonty.org>2013-03-25 23:48:29 -0700
commit1009832c13380365c03f77fcabd0fda470b73390 (patch)
tree73e123df951d60220a4cb0cac2ca19b2ebff7056 /sql/sql_statistics.h
parentfc1c8ffdadfd14eb51969ecfde43e3204f10f6f8 (diff)
downloadmariadb-git-1009832c13380365c03f77fcabd0fda470b73390.tar.gz
Added histogams for table columns.
Diffstat (limited to 'sql/sql_statistics.h')
-rw-r--r--sql/sql_statistics.h124
1 files changed, 111 insertions, 13 deletions
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 6bf552b92a0..9a2b5c2433b 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -16,15 +16,6 @@
#ifndef SQL_STATISTICS_H
#define SQL_STATISTICS_H
-/*
- These enumeration types comprise the dictionary of three
- statistical tables table_stat, column_stat and index_stat
- as they defined in ../scripts/mysql_system_tables.sql.
-
- It would be nice if the declarations of these types were
- generated automatically by the table definitions.
-*/
-
typedef
enum enum_use_stat_tables_mode
{
@@ -40,6 +31,16 @@ enum enum_stat_tables
INDEX_STAT,
};
+
+/*
+ These enumeration types comprise the dictionary of three
+ statistical tables table_stat, column_stat and index_stat
+ as they defined in ../scripts/mysql_system_tables.sql.
+
+ It would be nice if the declarations of these types were
+ generated automatically by the table definitions.
+*/
+
enum enum_table_stat_col
{
TABLE_STAT_DB_NAME,
@@ -56,7 +57,9 @@ enum enum_column_stat_col
COLUMN_STAT_MAX_VALUE,
COLUMN_STAT_NULLS_RATIO,
COLUMN_STAT_AVG_LENGTH,
- COLUMN_STAT_AVG_FREQUENCY
+ COLUMN_STAT_AVG_FREQUENCY,
+ COLUMN_STAT_HIST_SIZE,
+ COLUMN_STAT_HISTOGRAM
};
enum enum_index_stat_col
@@ -96,6 +99,98 @@ double get_column_range_cardinality(Field *field,
key_range *min_endp,
key_range *max_endp);
+#define HIST_FACTOR 255
+#define INV_HIST_FACTOR ((double) 1.0 / HIST_FACTOR)
+
+class Histogram
+{
+private:
+public:
+
+private:
+ uint8 size;
+ uint8 *values;
+
+ uint find_bucket(double pos, bool first)
+ {
+ uint8 val= (uint8) (pos * HIST_FACTOR);
+ int lp= 0;
+ int rp= size - 1;
+ int i= 0;
+ for (int d= size / 2 ; d; d= (rp - lp) / 2)
+ {
+ i= lp + d;
+ if (val == values[i])
+ break;
+ if (val < values[i])
+ rp= i;
+ else if (val > values[i + 1])
+ lp= i + 1;
+ else
+ break;
+ }
+ if (val == values[i])
+ {
+ if (first)
+ {
+ while(i && val == values[i - 1])
+ i--;
+ }
+ else
+ {
+ while(i + 1 < size && val == values[i + 1])
+ i++;
+ }
+ }
+ return i;
+ }
+
+public:
+
+ uint get_size() { return (uint) size; }
+
+ uchar *get_values() { return (uchar *) values; }
+
+ void set_size (ulonglong sz) { size= (uint8) sz; }
+
+ void set_values (uchar *vals) { values= (uint8 *) vals; }
+
+ void set_value(uint i, double val)
+ {
+ values[i]= (uint8) (val * HIST_FACTOR);
+ }
+
+ void set_prev_value(uint i) { values[i]= values[i-1]; }
+
+
+ double range_selectivity(double min_pos, double max_pos)
+ {
+ double sel;
+ double bucket_sel= 1.0/(size + 1);
+ uint min= find_bucket(min_pos, TRUE);
+ uint max= find_bucket(max_pos, FALSE);
+ sel= bucket_sel * (max - min + 1);
+ return sel;
+ }
+
+ double point_selectivity(double pos, double avg_sel)
+ {
+ double sel;
+ double bucket_sel= 1.0/(size + 1);
+ uint min= find_bucket(pos, TRUE);
+ uint max= min;
+ while (max + 1 < size && values[max + 1] == values[max])
+ max++;
+ double width= ((max + 1 == size ? 1.0 : values[max]) -
+ (min == 0 ? 0.0 : values[min-1])) *
+ INV_HIST_FACTOR;
+ sel= avg_sel * (bucket_sel * (max + 1 - min)) / width;
+ return sel;
+ }
+
+};
+
+
class Columns_statistics;
class Index_statistics;
@@ -111,8 +206,9 @@ public:
uchar *min_max_record_buffers; /* Record buffers for min/max values */
Column_statistics *column_stats; /* Array of statistical data for columns */
Index_statistics *index_stats; /* Array of statistical data for indexes */
- ulong *idx_avg_frequency; /* Array of records per key for index prefixes */
-
+ ulong *idx_avg_frequency; /* Array of records per key for index prefixes */
+ ulong total_hist_size;
+ uchar *histograms; /* Sequence of histograms */
};
@@ -167,10 +263,12 @@ private:
public:
+ Histogram histogram;
+
void set_all_nulls()
{
column_stat_nulls=
- ((1 << (COLUMN_STAT_AVG_FREQUENCY-COLUMN_STAT_COLUMN_NAME))-1) <<
+ ((1 << (COLUMN_STAT_HISTOGRAM-COLUMN_STAT_COLUMN_NAME))-1) <<
(COLUMN_STAT_COLUMN_NAME+1);
}