summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <sanja@askmonty.org>2013-04-18 22:22:04 +0300
committerunknown <sanja@askmonty.org>2013-04-18 22:22:04 +0300
commit9441e536535af3e71aaa81801645ab42bd07e89f (patch)
tree4479a5d77e53380b7bd2d9b3d4db3c271ea6b40a /sql
parent8c71e7a38358c79e1b5b9072fa7407cec3cfd273 (diff)
downloadmariadb-git-9441e536535af3e71aaa81801645ab42bd07e89f.tar.gz
MDEV-4345
Sampling of selectivity of LIKE predicate.
Diffstat (limited to 'sql')
-rw-r--r--sql/item.h15
-rw-r--r--sql/item_cmpfunc.cc33
-rw-r--r--sql/item_cmpfunc.h8
-rw-r--r--sql/opt_range.cc69
-rw-r--r--sql/opt_range.h7
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_select.cc72
-rw-r--r--sql/sql_select.h13
-rw-r--r--sql/sys_vars.cc15
-rw-r--r--sql/table.cc1
-rw-r--r--sql/table.h3
11 files changed, 232 insertions, 5 deletions
diff --git a/sql/item.h b/sql/item.h
index 50d125f76de..d8ae051b4b5 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -548,6 +548,14 @@ typedef bool (Item::*Item_analyzer) (uchar **argp);
typedef Item* (Item::*Item_transformer) (uchar *arg);
typedef void (*Cond_traverser) (const Item *item, void *arg);
+struct st_cond_statistic;
+
+struct find_selective_predicates_list_processor_data
+{
+ TABLE *table;
+ List<st_cond_statistic> list;
+};
+
class Item_equal;
class COND_EQUAL;
@@ -1108,6 +1116,11 @@ public:
return (this->*processor)(arg);
}
+ virtual bool walk_top_and(Item_processor processor, uchar *arg)
+ {
+ return (this->*processor)(arg);
+ }
+
virtual Item* transform(Item_transformer transformer, uchar *arg);
/*
@@ -1174,6 +1187,8 @@ public:
return FALSE;
}
virtual bool exists2in_processor(uchar *opt_arg) { return 0; }
+ virtual bool find_selective_predicates_list_processor(uchar *opt_arg)
+ { return 0; }
/* To call bool function for all arguments */
struct bool_func_call_args
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index b5b143a2448..7ca85a72cfa 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -4428,6 +4428,16 @@ bool Item_cond::walk(Item_processor processor, bool walk_subquery, uchar *arg)
return Item_func::walk(processor, walk_subquery, arg);
}
+bool Item_cond_and::walk_top_and(Item_processor processor, uchar *arg)
+{
+ List_iterator_fast<Item> li(list);
+ Item *item;
+ while ((item= li++))
+ if (item->walk_top_and(processor, arg))
+ return 1;
+ return Item_cond::walk_top_and(processor, arg);
+}
+
/**
Transform an Item_cond object with a transformer callback function.
@@ -4940,6 +4950,7 @@ bool Item_func_like::fix_fields(THD *thd, Item **ref)
turboBM_compute_bad_character_shifts();
DBUG_PRINT("info",("done"));
}
+ use_sampling= ((*first == wild_many || *first == wild_one) && len > 2);
}
}
return FALSE;
@@ -4951,6 +4962,28 @@ void Item_func_like::cleanup()
Item_bool_func2::cleanup();
}
+
+bool Item_func_like::find_selective_predicates_list_processor(uchar *arg)
+{
+ find_selective_predicates_list_processor_data *data=
+ (find_selective_predicates_list_processor_data *) arg;
+ if (use_sampling && used_tables() == data->table->map)
+ {
+ COND_STATISTIC *stat= (COND_STATISTIC *)sql_alloc(sizeof(COND_STATISTIC));
+ if (!stat)
+ return TRUE;
+ stat->cond= this;
+ Item *arg0= args[0]->real_item();
+ if (args[1]->const_item() && arg0->type() == FIELD_ITEM)
+ stat->field_arg= ((Item_field *)arg0)->field;
+ else
+ stat->field_arg= NULL;
+ data->list.push_back(stat);
+ }
+ return FALSE;
+}
+
+
/**
@brief Compile regular expression.
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index 81598ba96c8..ab4219564fe 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -1487,8 +1487,9 @@ class Item_func_like :public Item_bool_func2
enum { alphabet_size = 256 };
Item *escape_item;
-
+
bool escape_used_in_parsing;
+ bool use_sampling;
public:
int escape;
@@ -1496,7 +1497,7 @@ public:
Item_func_like(Item *a,Item *b, Item *escape_arg, bool escape_used)
:Item_bool_func2(a,b), canDoTurboBM(FALSE), pattern(0), pattern_len(0),
bmGs(0), bmBc(0), escape_item(escape_arg),
- escape_used_in_parsing(escape_used) {}
+ escape_used_in_parsing(escape_used), use_sampling(0) {}
longlong val_int();
enum Functype functype() const { return LIKE_FUNC; }
optimize_type select_optimize() const;
@@ -1504,6 +1505,8 @@ public:
const char *func_name() const { return "like"; }
bool fix_fields(THD *thd, Item **ref);
void cleanup();
+
+ bool find_selective_predicates_list_processor(uchar *arg);
};
@@ -1914,6 +1917,7 @@ public:
Item *neg_transformer(THD *thd);
void mark_as_condition_AND_part(TABLE_LIST *embedding);
virtual uint exists2in_reserved_items() { return list.elements; };
+ bool walk_top_and(Item_processor processor, uchar *arg);
};
inline bool is_cond_and(Item *item)
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index c29a888ea6f..b736f898768 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3530,7 +3530,74 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
}
}
}
- }
+ }
+
+ /* Calculate selectivity of probably highly selective predicates */
+ ulong check_rows=
+ min(thd->variables.optimizer_selectivity_sampling_limit,
+ (ulong) (table_records * SELECTIVITY_SAMPLING_SHARE));
+ if (cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD &&
+ thd->variables.optimizer_use_condition_selectivity > 4)
+ {
+ find_selective_predicates_list_processor_data *dt=
+ (find_selective_predicates_list_processor_data *)
+ alloc_root(thd->mem_root,
+ sizeof(find_selective_predicates_list_processor_data));
+ if (!dt)
+ DBUG_RETURN(TRUE);
+ dt->list.empty();
+ dt->table= table;
+ if (cond->walk(&Item::find_selective_predicates_list_processor, 0,
+ (uchar*) dt))
+ DBUG_RETURN(TRUE);
+ if (dt->list.elements > 0)
+ {
+ check_rows= check_selectivity(thd, check_rows, table, &dt->list);
+ if (check_rows > SELECTIVITY_SAMPLING_THRESHOLD)
+ {
+ COND_STATISTIC *stat;
+ List_iterator_fast<COND_STATISTIC> it(dt->list);
+ double examined_rows= check_rows;
+ while ((stat= it++))
+ {
+ if (!stat->positive)
+ {
+ DBUG_PRINT("info", ("To avoid 0 assigned 1 to the counter"));
+ stat->positive= 1; // avoid 0
+ }
+ DBUG_PRINT("info", ("The predicate selectivity : %g",
+ (double)stat->positive / examined_rows));
+ double selectivity= ((double)stat->positive) / examined_rows;
+ table->cond_selectivity*= selectivity;
+ /*
+ If a field is involved then we register its selectivity in case
+ there in an equality with the field.
+ For example in case
+ t1.a LIKE "%bla%" and t1.a = t2.b
+ the selectivity we have found could be used also for t2.
+ */
+ if (stat->field_arg)
+ {
+ stat->field_arg->cond_selectivity*= selectivity;
+
+ if (stat->field_arg->next_equal_field)
+ {
+ for (Field *next_field= stat->field_arg->next_equal_field;
+ next_field != stat->field_arg;
+ next_field= next_field->next_equal_field)
+ {
+ next_field->cond_selectivity*= selectivity;
+ next_field->table->cond_selectivity*= selectivity;
+ }
+ }
+ }
+ }
+
+ }
+ /* This list and its elements put to mem_root so should not be freed */
+ table->cond_selectivity_sampling_explain= &dt->list;
+ }
+ }
DBUG_RETURN(FALSE);
}
diff --git a/sql/opt_range.h b/sql/opt_range.h
index d98bf1186e8..963551cabdb 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -1051,4 +1051,11 @@ void store_key_image_to_rec(Field *field, uchar *ptr, uint len);
extern String null_string;
+/* check this number of rows (default value) */
+#define SELECTIVITY_SAMPLING_LIMIT 5000
+/* but no more then this part of table (10%) */
+#define SELECTIVITY_SAMPLING_SHARE 0.10
+/* do not check if we are going check less then this number of records */
+#define SELECTIVITY_SAMPLING_THRESHOLD 10
+
#endif
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 8e2bd59da57..bb5b2c4e775 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -505,6 +505,7 @@ typedef struct system_variables
ulong net_write_timeout;
ulong optimizer_prune_level;
ulong optimizer_search_depth;
+ ulong optimizer_selectivity_sampling_limit;
ulong optimizer_use_condition_selectivity;
ulong use_stat_tables;
ulong histogram_size;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index cc4b139fb2f..a48eff66386 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -23972,6 +23972,78 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
return MAX_KEY;
}
+/*
+ Count how much times conditions are true for several first rows of the table
+
+ @param thd thread handle
+ @param rows_to_read how much rows to check
+ @param table table which should be checked
+ @conds conds list of conditions and countars for them
+
+ @return number of really checked rows or 0 in case of error or empty table
+*/
+
+ulong check_selectivity(THD *thd,
+ ulong rows_to_read,
+ TABLE *table,
+ List<COND_STATISTIC> *conds)
+{
+ ulong count= 0;
+ COND_STATISTIC *cond;
+ List_iterator_fast<COND_STATISTIC> it(*conds);
+ handler *file= table->file;
+ uchar *record= table->record[0];
+ int error= 0;
+ DBUG_ENTER("check_selectivity");
+
+ DBUG_ASSERT(rows_to_read > 0);
+ while ((cond= it++))
+ {
+ DBUG_ASSERT(cond->cond);
+ DBUG_ASSERT(cond->cond->used_tables() == table->map);
+ cond->positive= 0;
+ }
+ it.rewind();
+
+ if (file->ha_rnd_init_with_error(1))
+ DBUG_RETURN(0);
+ do
+ {
+ error= file->ha_rnd_next(record);
+
+ if (thd->killed)
+ {
+ thd->send_kill_message();
+ count= 0;
+ goto err;
+ }
+ if (error)
+ {
+ if (error == HA_ERR_RECORD_DELETED)
+ continue;
+ if (error == HA_ERR_END_OF_FILE)
+ break;
+ goto err;
+ }
+
+ count++;
+ while ((cond= it++))
+ {
+ if (cond->cond->val_bool())
+ cond->positive++;
+ }
+ it.rewind();
+
+ } while (count < rows_to_read);
+
+ file->ha_rnd_end();
+ DBUG_RETURN(count);
+
+err:
+ DBUG_PRINT("error", ("error %d", error));
+ file->ha_rnd_end();
+ DBUG_RETURN(0);
+}
/**
@} (end of group Query_Optimizer)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 9cda68bd434..950c48d6ea1 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1855,4 +1855,17 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
double prev_record_reads(POSITION *positions, uint idx, table_map found_ref);
void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist);
+struct st_cond_statistic
+{
+ Item *cond;
+ Field *field_arg;
+ ulong positive;
+};
+typedef struct st_cond_statistic COND_STATISTIC;
+
+ulong check_selectivity(THD *thd,
+ ulong rows_to_read,
+ TABLE *table,
+ List<COND_STATISTIC> *conds);
+
#endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 4393809a6fe..24009cb5a99 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -56,6 +56,7 @@
#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
#include "threadpool.h"
#include "sql_repl.h"
+#include "opt_range.h"
/*
The rule for this file: everything should be 'static'. When a sys_var
@@ -1533,6 +1534,14 @@ static Sys_var_ulong Sys_optimizer_prune_level(
SESSION_VAR(optimizer_prune_level), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, 1), DEFAULT(1), BLOCK_SIZE(1));
+static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit(
+ "optimizer_selectivity_sampling_limit",
+ "Controls number of record samples to check condition selectivity",
+ SESSION_VAR(optimizer_selectivity_sampling_limit),
+ CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX),
+ DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1));
+
static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
"optimizer_use_condition_selectivity",
"Controls selectivity of which conditions the optimizer takes into "
@@ -1548,9 +1557,11 @@ static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
"not backed by any index to calculate the cardinality of a partial join, "
"4 - use histograms to calculate selectivity of range conditions that "
"are not backed by any index to calculate the cardinality of "
- "a partial join.",
+ "a partial join."
+ "5 - additionally use selectivity of certain non-range predicates "
+ "calculated on record samples",
SESSION_VAR(optimizer_use_condition_selectivity), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(1, 4), DEFAULT(1), BLOCK_SIZE(1));
+ VALID_RANGE(1, 5), DEFAULT(1), BLOCK_SIZE(1));
/** Warns about deprecated value 63 */
static bool fix_optimizer_search_depth(sys_var *self, THD *thd,
diff --git a/sql/table.cc b/sql/table.cc
index e88b3453ce9..caed8ed4107 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -3891,6 +3891,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
reginfo.impossible_range= 0;
created= TRUE;
cond_selectivity= 1.0;
+ cond_selectivity_sampling_explain= NULL;
/* Catch wrong handling of the auto_increment_field_not_null. */
DBUG_ASSERT(!auto_increment_field_not_null);
diff --git a/sql/table.h b/sql/table.h
index e721d60f892..af79bffd437 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -1038,6 +1038,8 @@ enum index_hint_type
INDEX_HINT_FORCE
};
+struct st_cond_statistic;
+
#define CHECK_ROW_FOR_NULLS_TO_REJECT (1 << 0)
#define REJECT_ROW_DUE_TO_NULL_FIELDS (1 << 1)
@@ -1163,6 +1165,7 @@ public:
ha_rows quick_condition_rows;
double cond_selectivity;
+ List<st_cond_statistic> *cond_selectivity_sampling_explain;
table_map map; /* ID bit of table (1,2,4,8,16...) */