summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorVarun Gupta <varun.gupta@mariadb.com>2019-02-13 11:22:16 +0530
committerVarun Gupta <varun.gupta@mariadb.com>2019-02-13 11:52:36 +0530
commitbe8709eb7bdf2a68a1c04fd8ab368113f5f39b63 (patch)
tree35585804a654ec75089a92e90d546f73e7f60b68 /sql
parent6b979416e0e4eac0a036ca5f2b81b748a3d2e680 (diff)
downloadmariadb-git-be8709eb7bdf2a68a1c04fd8ab368113f5f39b63.tar.gz
MDEV-6111 Optimizer Trace
This task involves the implementation for the optimizer trace. This feature produces a trace for any SELECT/UPDATE/DELETE/, which contains information about decisions taken by the optimizer during the optimization phase (choice of table access method, various costs, transformations, etc). This feature would help to tell why some decisions were taken by the optimizer and why some were rejected. Trace is session-local, controlled by the @@optimizer_trace variable. To enable optimizer trace we need to write: set @@optimizer_trace variable= 'enabled=on'; To display the trace one can run: SELECT trace FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; This task also involves: MDEV-18489: Limit the memory used by the optimizer trace introduces a switch optimizer_trace_max_mem_size which limits the memory used by the optimizer trace. This was implemented by Sergei Petrunia.
Diffstat (limited to 'sql')
-rw-r--r--sql/CMakeLists.txt1
-rw-r--r--sql/handler.h1
-rw-r--r--sql/my_json_writer.cc16
-rw-r--r--sql/my_json_writer.h228
-rw-r--r--sql/opt_range.cc845
-rw-r--r--sql/opt_range.h2
-rw-r--r--sql/opt_subselect.cc66
-rw-r--r--sql/opt_table_elimination.cc30
-rw-r--r--sql/opt_trace.cc722
-rw-r--r--sql/opt_trace.h201
-rw-r--r--sql/opt_trace_context.h92
-rw-r--r--sql/set_var.h9
-rw-r--r--sql/sp_head.cc13
-rw-r--r--sql/sp_head.h1
-rw-r--r--sql/sql_class.cc14
-rw-r--r--sql/sql_class.h15
-rw-r--r--sql/sql_derived.cc49
-rw-r--r--sql/sql_explain.cc2
-rw-r--r--sql/sql_parse.cc8
-rw-r--r--sql/sql_prepare.cc12
-rw-r--r--sql/sql_select.cc632
-rw-r--r--sql/sql_show.cc7
-rw-r--r--sql/sql_test.cc27
-rw-r--r--sql/sql_test.h2
-rw-r--r--sql/sql_view.cc10
-rw-r--r--sql/sys_vars.cc18
-rw-r--r--sql/table.cc2
27 files changed, 2744 insertions, 281 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index a22ce694805..69ca326c1fd 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -139,6 +139,7 @@ SET (SQL_SOURCE
sql_sequence.cc sql_sequence.h ha_sequence.h
sql_tvc.cc sql_tvc.h
opt_split.cc
+ opt_trace.cc
${WSREP_SOURCES}
table_cache.cc encryption.cc temporary_tables.cc
proxy_protocol.cc backup.cc
diff --git a/sql/handler.h b/sql/handler.h
index dfb2333b24e..afe62123170 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -993,6 +993,7 @@ enum enum_schema_tables
SCH_KEY_CACHES,
SCH_KEY_COLUMN_USAGE,
SCH_OPEN_TABLES,
+ SCH_OPT_TRACE,
SCH_PARAMETERS,
SCH_PARTITIONS,
SCH_PLUGINS,
diff --git a/sql/my_json_writer.cc b/sql/my_json_writer.cc
index 9c4d63f9c4a..7ae0c58bd7d 100644
--- a/sql/my_json_writer.cc
+++ b/sql/my_json_writer.cc
@@ -219,19 +219,20 @@ void Json_writer::add_str(const String &str)
add_str(str.ptr(), str.length());
}
-Json_writer_object::Json_writer_object(Json_writer *writer):Json_writer_struct(writer)
+Json_writer_object::Json_writer_object(THD *thd) :
+ Json_writer_struct(thd)
{
if (my_writer)
my_writer->start_object();
}
-Json_writer_object::Json_writer_object(Json_writer *writer, const char *str)
- :Json_writer_struct(writer)
+Json_writer_object::Json_writer_object(THD* thd, const char *str)
+ : Json_writer_struct(thd)
{
if (my_writer)
my_writer->add_member(str).start_object();
-
}
+
Json_writer_object::~Json_writer_object()
{
if (!closed && my_writer)
@@ -239,14 +240,15 @@ Json_writer_object::~Json_writer_object()
closed= TRUE;
}
-Json_writer_array::Json_writer_array(Json_writer *writer):Json_writer_struct(writer)
+Json_writer_array::Json_writer_array(THD *thd) :
+ Json_writer_struct(thd)
{
if (my_writer)
my_writer->start_array();
}
-Json_writer_array::Json_writer_array(Json_writer *writer, const char *str)
- :Json_writer_struct(writer)
+Json_writer_array::Json_writer_array(THD *thd, const char *str)
+ :Json_writer_struct(thd)
{
if (my_writer)
my_writer->add_member(str).start_array();
diff --git a/sql/my_json_writer.h b/sql/my_json_writer.h
index fcf853b0b62..4701cb4165b 100644
--- a/sql/my_json_writer.h
+++ b/sql/my_json_writer.h
@@ -101,6 +101,80 @@ public:
/*
+ Something that looks like class String, but has an internal limit of
+ how many bytes one can append to it.
+
+ Bytes that were truncated due to the size limitation are counted.
+*/
+
+class String_with_limit
+{
+public:
+
+ String_with_limit() : size_limit(SIZE_MAX), truncated_len(0)
+ {
+ str.length(0);
+ }
+
+ size_t get_truncated_bytes() const { return truncated_len; }
+ size_t get_size_limit() { return size_limit; }
+
+ void set_size_limit(size_t limit_arg)
+ {
+ // Setting size limit to be shorter than length will not have the desired
+ // effect
+ DBUG_ASSERT(str.length() < size_limit);
+ size_limit= limit_arg;
+ }
+
+ void append(const char *s, size_t size)
+ {
+ if (str.length() + size <= size_limit)
+ {
+ // Whole string can be added, just do it
+ str.append(s, size);
+ }
+ else
+ {
+ // We cannot add the whole string
+ if (str.length() < size_limit)
+ {
+ // But we can still add something
+ size_t bytes_to_add = size_limit - str.length();
+ str.append(s, bytes_to_add);
+ truncated_len += size - bytes_to_add;
+ }
+ else
+ truncated_len += size;
+ }
+ }
+
+ void append(const char *s)
+ {
+ append(s, strlen(s));
+ }
+
+ void append(char c)
+ {
+ if (str.length() + 1 > size_limit)
+ truncated_len++;
+ else
+ str.append(c);
+ }
+
+ const String *get_string() { return &str; }
+ size_t length() { return str.length(); }
+private:
+ String str;
+
+ // str must not get longer than this many bytes.
+ size_t size_limit;
+
+ // How many bytes were truncated from the string
+ size_t truncated_len;
+};
+
+/*
A class to write well-formed JSON documents. The documents are also formatted
for human readability.
*/
@@ -116,7 +190,8 @@ public:
void add_str(const char* val, size_t num_bytes);
void add_str(const String &str);
void add_str(Item *item);
- void add_table_name(JOIN_TAB *tab);
+ void add_table_name(const JOIN_TAB *tab);
+ void add_table_name(const TABLE* table);
void add_ll(longlong val);
void add_size(longlong val);
@@ -134,9 +209,18 @@ public:
void end_object();
void end_array();
+ /*
+ One can set a limit of how large a JSON document should be.
+ Writes beyond that size will be counted, but will not be collected.
+ */
+ void set_size_limit(size_t mem_size) { output.set_size_limit(mem_size); }
+
+ // psergey: return how many bytes would be required to store everything
+ size_t get_truncated_bytes() { return output.get_truncated_bytes(); }
+
Json_writer() :
indent_level(0), document_start(true), element_started(false),
- first_child(true)
+ first_child(true), allowed_mem_size(0)
{
fmt_helper.init(this);
}
@@ -151,22 +235,28 @@ private:
bool element_started;
bool first_child;
+ /*
+ True when we are using the optimizer trace
+ FALSE otherwise
+ */
+ size_t allowed_mem_size;
+
Single_line_formatting_helper fmt_helper;
void append_indent();
void start_element();
void start_sub_element();
- //const char *new_member_name;
public:
- String output;
+ String_with_limit output;
};
/* A class to add values to Json_writer_object and Json_writer_array */
-class Json_value_context
+class Json_value_helper
{
Json_writer* writer;
- public:
+
+public:
void init(Json_writer *my_writer) { writer= my_writer; }
void add_str(const char* val)
{
@@ -219,11 +309,16 @@ class Json_value_context
if (writer)
writer->add_null();
}
- void add_table_name(JOIN_TAB *tab)
+ void add_table_name(const JOIN_TAB *tab)
{
if (writer)
writer->add_table_name(tab);
}
+ void add_table_name(const TABLE* table)
+ {
+ if (writer)
+ writer->add_table_name(table);
+ }
};
/* A common base for Json_writer_object and Json_writer_array */
@@ -231,17 +326,17 @@ class Json_writer_struct
{
protected:
Json_writer* my_writer;
- Json_value_context context;
+ Json_value_helper context;
/*
Tells when a json_writer_struct has been closed or not
*/
bool closed;
public:
- Json_writer_struct(Json_writer* writer)
+ explicit Json_writer_struct(THD *thd)
{
- my_writer= writer;
- context.init(writer);
+ my_writer= thd->opt_trace.get_current_json();
+ context.init(my_writer);
closed= false;
}
};
@@ -249,9 +344,13 @@ public:
/*
RAII-based class to start/end writing a JSON object into the JSON document
+
+ There is "ignore mode": one can initialize Json_writer_object with a NULL
+ Json_writer argument, and then all its calls will do nothing. This is used
+ by optimizer trace which can be enabled or disabled.
*/
-class Json_writer_object:public Json_writer_struct
+class Json_writer_object : public Json_writer_struct
{
private:
void add_member(const char *name)
@@ -260,46 +359,49 @@ private:
my_writer->add_member(name);
}
public:
- Json_writer_object(Json_writer *w);
- Json_writer_object(Json_writer *w, const char *str);
+ explicit Json_writer_object(THD *thd);
+ explicit Json_writer_object(THD *thd, const char *str);
+
Json_writer_object& add(const char *name, bool value)
{
+ DBUG_ASSERT(!closed);
add_member(name);
context.add_bool(value);
return *this;
}
- Json_writer_object& add(const char* name, uint value)
- {
- add_member(name);
- context.add_ll(value);
- return *this;
- }
- Json_writer_object& add(const char* name, ha_rows value)
+ Json_writer_object& add(const char *name, ulonglong value)
{
+ DBUG_ASSERT(!closed);
add_member(name);
- context.add_ll(value);
+ context.add_ll(static_cast<longlong>(value));
return *this;
}
Json_writer_object& add(const char *name, longlong value)
{
+ DBUG_ASSERT(!closed);
add_member(name);
context.add_ll(value);
return *this;
}
Json_writer_object& add(const char *name, double value)
{
+ DBUG_ASSERT(!closed);
add_member(name);
context.add_double(value);
return *this;
}
+ #ifndef _WIN64
Json_writer_object& add(const char *name, size_t value)
{
+ DBUG_ASSERT(!closed);
add_member(name);
- context.add_ll(value);
+ context.add_ll(static_cast<longlong>(value));
return *this;
}
+ #endif
Json_writer_object& add(const char *name, const char *value)
{
+ DBUG_ASSERT(!closed);
add_member(name);
context.add_str(value);
return *this;
@@ -310,38 +412,54 @@ public:
context.add_str(value, num_bytes);
return *this;
}
- Json_writer_object& add(const char *name, const String &value)
- {
- add_member(name);
- context.add_str(value);
- return *this;
- }
Json_writer_object& add(const char *name, LEX_CSTRING value)
{
+ DBUG_ASSERT(!closed);
add_member(name);
context.add_str(value.str);
return *this;
}
Json_writer_object& add(const char *name, Item *value)
{
+ DBUG_ASSERT(!closed);
add_member(name);
context.add_str(value);
return *this;
}
Json_writer_object& add_null(const char*name)
{
+ DBUG_ASSERT(!closed);
add_member(name);
context.add_null();
return *this;
}
- Json_writer_object& add_table_name(JOIN_TAB *tab)
+ Json_writer_object& add_table_name(const JOIN_TAB *tab)
{
+ DBUG_ASSERT(!closed);
add_member("table");
context.add_table_name(tab);
return *this;
}
+ Json_writer_object& add_table_name(const TABLE *table)
+ {
+ DBUG_ASSERT(!closed);
+ add_member("table");
+ context.add_table_name(table);
+ return *this;
+ }
+ Json_writer_object& add_select_number(uint select_number)
+ {
+ DBUG_ASSERT(!closed);
+ add_member("select_id");
+ if (unlikely(select_number >= INT_MAX))
+ context.add_str("fake");
+ else
+ context.add_ll(static_cast<longlong>(select_number));
+ return *this;
+ }
void end()
{
+ DBUG_ASSERT(!closed);
if (my_writer)
my_writer->end_object();
closed= TRUE;
@@ -352,83 +470,99 @@ public:
/*
RAII-based class to start/end writing a JSON array into the JSON document
+
+ There is "ignore mode": one can initialize Json_writer_array with a NULL
+ Json_writer argument, and then all its calls will do nothing. This is used
+ by optimizer trace which can be enabled or disabled.
*/
-class Json_writer_array:public Json_writer_struct
+
+class Json_writer_array : public Json_writer_struct
{
public:
- Json_writer_array(Json_writer *w);
- Json_writer_array(Json_writer *w, const char *str);
+ Json_writer_array(THD *thd);
+ Json_writer_array(THD *thd, const char *str);
void end()
{
+ DBUG_ASSERT(!closed);
if (my_writer)
my_writer->end_array();
closed= TRUE;
}
+
Json_writer_array& add(bool value)
{
+ DBUG_ASSERT(!closed);
context.add_bool(value);
return *this;
}
- Json_writer_array& add(uint value)
+ Json_writer_array& add(ulonglong value)
{
- context.add_ll(value);
- return *this;
- }
- Json_writer_array& add(ha_rows value)
- {
- context.add_ll(value);
+ DBUG_ASSERT(!closed);
+ context.add_ll(static_cast<longlong>(value));
return *this;
}
Json_writer_array& add(longlong value)
{
+ DBUG_ASSERT(!closed);
context.add_ll(value);
return *this;
}
Json_writer_array& add(double value)
{
+ DBUG_ASSERT(!closed);
context.add_double(value);
return *this;
}
+ #ifndef _WIN64
Json_writer_array& add(size_t value)
{
- context.add_ll(value);
+ DBUG_ASSERT(!closed);
+ context.add_ll(static_cast<longlong>(value));
return *this;
}
+ #endif
Json_writer_array& add(const char *value)
{
+ DBUG_ASSERT(!closed);
context.add_str(value);
return *this;
}
Json_writer_array& add(const char *value, size_t num_bytes)
{
+ DBUG_ASSERT(!closed);
context.add_str(value, num_bytes);
return *this;
}
- Json_writer_array& add(const String &value)
- {
- context.add_str(value);
- return *this;
- }
Json_writer_array& add(LEX_CSTRING value)
{
+ DBUG_ASSERT(!closed);
context.add_str(value.str);
return *this;
}
Json_writer_array& add(Item *value)
{
+ DBUG_ASSERT(!closed);
context.add_str(value);
return *this;
}
Json_writer_array& add_null()
{
+ DBUG_ASSERT(!closed);
context.add_null();
return *this;
}
- Json_writer_array& add_table_name(JOIN_TAB *tab)
+ Json_writer_array& add_table_name(const JOIN_TAB *tab)
{
+ DBUG_ASSERT(!closed);
context.add_table_name(tab);
return *this;
}
+ Json_writer_array& add_table_name(const TABLE *table)
+ {
+ DBUG_ASSERT(!closed);
+ context.add_table_name(table);
+ return *this;
+ }
~Json_writer_array();
};
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index eca51b54dab..c87a0595a7d 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -119,6 +119,7 @@
#include "sql_select.h"
#include "sql_statistics.h"
#include "uniques.h"
+#include "my_json_writer.h"
#ifndef EXTRA_DEBUG
#define test_rb_tree(A,B) {}
@@ -429,6 +430,18 @@ static int and_range_trees(RANGE_OPT_PARAM *param,
SEL_TREE *result);
static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree);
+static void print_key_value(String *out, const KEY_PART_INFO *key_part,
+ const uchar *key);
+
+static void append_range_all_keyparts(Json_writer_array *range_trace,
+ String *range_string,
+ String *range_so_far, const SEL_ARG *keypart,
+ const KEY_PART_INFO *key_parts);
+
+static
+void append_range(String *out, const KEY_PART_INFO *key_parts,
+ const uchar *min_key, const uchar *max_key, const uint flag);
+
/*
SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
@@ -2191,6 +2204,14 @@ public:
static void operator delete(void *ptr,size_t size) { TRASH_FREE(ptr, size); }
static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
virtual ~TABLE_READ_PLAN() {} /* Remove gcc warning */
+ /**
+ Add basic info for this TABLE_READ_PLAN to the optimizer trace.
+
+ @param param Parameters for range analysis of this table
+ @param trace_object The optimizer trace object the info is appended to
+ */
+ virtual void trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const = 0;
};
@@ -2232,8 +2253,34 @@ public:
}
DBUG_RETURN(quick);
}
+ void trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const;
};
+void TRP_RANGE::trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const
+{
+ DBUG_ASSERT(param->using_real_indexes);
+ const uint keynr_in_table = param->real_keynr[key_idx];
+
+ const KEY &cur_key = param->table->key_info[keynr_in_table];
+ const KEY_PART_INFO *key_part = cur_key.key_part;
+
+ trace_object->add("type", "range_scan")
+ .add("index", cur_key.name)
+ .add("rows", records);
+
+ Json_writer_array trace_range(param->thd, "ranges");
+
+ // TRP_RANGE should not be created if there are no range intervals
+ DBUG_ASSERT(key);
+
+ String range_info;
+ range_info.length(0);
+ range_info.set_charset(system_charset_info);
+ append_range_all_keyparts(&trace_range, NULL, &range_info, key, key_part);
+}
+
/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
@@ -2251,9 +2298,12 @@ public:
struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */
bool is_covering; /* TRUE if no row retrieval phase is necessary */
double index_scan_costs; /* SUM(cost(index_scan)) */
+ void trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const;
};
+
/*
Plan for QUICK_ROR_UNION_SELECT scan.
QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
@@ -2269,8 +2319,22 @@ public:
MEM_ROOT *parent_alloc);
TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
TABLE_READ_PLAN **last_ror; /* end of the above array */
+ void trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const;
};
+void TRP_ROR_UNION::trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const
+{
+ THD *thd= param->thd;
+ trace_object->add("type", "index_roworder_union");
+ Json_writer_array smth_trace(thd, "union_of");
+ for (TABLE_READ_PLAN **current = first_ror; current != last_ror; current++)
+ {
+ Json_writer_object trp_info(thd);
+ (*current)->trace_basic_info(param, &trp_info);
+ }
+}
/*
Plan for QUICK_INDEX_INTERSECT_SELECT scan.
@@ -2288,9 +2352,25 @@ public:
TRP_RANGE **range_scans; /* array of ptrs to plans of intersected scans */
TRP_RANGE **range_scans_end; /* end of the array */
/* keys whose scans are to be filtered by cpk conditions */
- key_map filtered_scans;
+ key_map filtered_scans;
+ void trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const;
+
};
+void TRP_INDEX_INTERSECT::trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const
+{
+ THD *thd= param->thd;
+ trace_object->add("type", "index_sort_intersect");
+ Json_writer_array smth_trace(thd, "index_sort_intersect_of");
+ for (TRP_RANGE **current = range_scans; current != range_scans_end;
+ current++)
+ {
+ Json_writer_object trp_info(thd);
+ (*current)->trace_basic_info(param, &trp_info);
+ }
+}
/*
Plan for QUICK_INDEX_MERGE_SELECT scan.
@@ -2307,8 +2387,22 @@ public:
MEM_ROOT *parent_alloc);
TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
TRP_RANGE **range_scans_end; /* end of the array */
+ void trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const;
};
+void TRP_INDEX_MERGE::trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const
+{
+ THD *thd= param->thd;
+ trace_object->add("type", "index_merge");
+ Json_writer_array smth_trace(thd, "index_merge_of");
+ for (TRP_RANGE **current= range_scans; current != range_scans_end; current++)
+ {
+ Json_writer_object trp_info(thd);
+ (*current)->trace_basic_info(param, &trp_info);
+ }
+}
/*
Plan for a QUICK_GROUP_MIN_MAX_SELECT scan.
@@ -2360,9 +2454,51 @@ public:
QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
MEM_ROOT *parent_alloc);
void use_index_scan() { is_index_scan= TRUE; }
+ void trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const;
};
+void TRP_GROUP_MIN_MAX::trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const
+{
+ THD *thd= param->thd;
+ trace_object->add("type", "index_group").add("index", index_info->name);
+
+ if (min_max_arg_part)
+ trace_object->add("group_attribute", min_max_arg_part->field->field_name);
+ else
+ trace_object->add_null("group_attribute");
+
+ trace_object->add("min_aggregate", have_min)
+ .add("max_aggregate", have_max)
+ .add("distinct_aggregate", have_agg_distinct)
+ .add("rows", records)
+ .add("cost", read_cost);
+
+ const KEY_PART_INFO *key_part = index_info->key_part;
+ {
+ Json_writer_array trace_keyparts(thd, "key_parts_used_for_access");
+ for (uint partno = 0; partno < used_key_parts; partno++)
+ {
+ const KEY_PART_INFO *cur_key_part = key_part + partno;
+ trace_keyparts.add(cur_key_part->field->field_name);
+ }
+ }
+
+ Json_writer_array trace_range(thd, "ranges");
+
+ // can have group quick without ranges
+ if (index_tree)
+ {
+ String range_info;
+ range_info.set_charset(system_charset_info);
+ append_range_all_keyparts(&trace_range, NULL, &range_info, index_tree,
+ key_part);
+ }
+}
+
+
typedef struct st_index_scan_info
{
uint idx; /* # of used key in param->keys */
@@ -2538,6 +2674,17 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
DBUG_PRINT("info",("Time to scan table: %g", read_time));
+ Json_writer_object table_records(thd);
+ if (head->reginfo.join_tab)
+ table_records.add_table_name(head->reginfo.join_tab);
+ else
+ table_records.add_table_name(head);
+ Json_writer_object trace_range(thd, "range_analysis");
+ {
+ Json_writer_object table_rec(thd, "table_scan");
+ table_rec.add("rows", records).add("cost", read_time);
+ }
+
keys_to_use.intersect(head->keys_in_use_for_query);
if (!keys_to_use.is_clear_all())
{
@@ -2591,19 +2738,33 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
*/
key_info= head->key_info;
uint max_key_len= 0;
+
+ Json_writer_array trace_idx(thd, "potential_range_indexes");
+
for (idx=0 ; idx < head->s->keys ; idx++, key_info++)
{
+ Json_writer_object trace_idx_details(thd);
+ trace_idx_details.add("index", key_info->name);
KEY_PART_INFO *key_part_info;
uint n_key_parts= head->actual_n_key_parts(key_info);
if (!keys_to_use.is_set(idx))
- continue;
+ {
+ trace_idx_details.add("usable", false)
+ .add("cause", "not applicable");
+ continue;
+ }
if (key_info->flags & HA_FULLTEXT)
- continue; // ToDo: ft-keys in non-ft ranges, if possible SerG
+ {
+ trace_idx_details.add("usable", false).add("cause", "fulltext");
+ continue; // ToDo: ft-keys in non-ft ranges, if possible SerG
+ }
+ trace_idx_details.add("usable", true);
param.key[param.keys]=key_parts;
key_part_info= key_info->key_part;
uint cur_key_len= 0;
+ Json_writer_array trace_keypart(thd, "key_parts");
for (uint part= 0 ; part < n_key_parts ;
part++, key_parts++, key_part_info++)
{
@@ -2618,11 +2779,14 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
(key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
/* Only HA_PART_KEY_SEG is used */
key_parts->flag= (uint8) key_part_info->key_part_flag;
+ trace_keypart.add(key_parts->field->field_name);
}
param.real_keynr[param.keys++]=idx;
if (cur_key_len > max_key_len)
max_key_len= cur_key_len;
}
+ trace_idx.end();
+
param.key_parts_end=key_parts;
param.alloced_sel_args= 0;
@@ -2644,8 +2808,18 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
(double) records / TIME_FOR_COMPARE;
DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, "
"read time %g", key_for_use, key_read_time));
+
+ Json_writer_object trace_cov(thd, "best_covering_index_scan");
+ bool chosen= FALSE;
if (key_read_time < read_time)
+ {
read_time= key_read_time;
+ chosen= TRUE;
+ }
+ trace_cov.add("index", head->key_info[key_for_use].name)
+ .add("cost", key_read_time).add("chosen", chosen);
+ if (!chosen)
+ trace_cov.add("cause", "cost");
}
TABLE_READ_PLAN *best_trp= NULL;
@@ -2654,12 +2828,18 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
if (cond)
{
- if ((tree= cond->get_mm_tree(&param, &cond)))
+ {
+ Json_writer_array trace_range_summary(thd,
+ "setup_range_conditions");
+ tree= cond->get_mm_tree(&param, &cond);
+ }
+ if (tree)
{
if (tree->type == SEL_TREE::IMPOSSIBLE)
{
records=0L; /* Return -1 from this function. */
read_time= (double) HA_POS_ERROR;
+ trace_range.add("impossible_range", true);
goto free_mem;
}
/*
@@ -2667,7 +2847,10 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
can construct a group-min-max quick select
*/
if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
+ {
+ trace_range.add("range_scan_possible", false);
tree= NULL;
+ }
}
}
@@ -2680,11 +2863,19 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
{
param.table->quick_condition_rows= MY_MIN(group_trp->records,
head->stat_records());
+ Json_writer_object grp_summary(thd, "best_group_range_summary");
+
+ if (unlikely(thd->trace_started()))
+ group_trp->trace_basic_info(&param, &grp_summary);
+
if (group_trp->read_cost < best_read_time)
{
+ grp_summary.add("chosen", true);
best_trp= group_trp;
best_read_time= best_trp->read_cost;
}
+ else
+ grp_summary.add("chosen", false).add("cause", "cost");
}
if (tree)
@@ -2697,7 +2888,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
TRP_ROR_INTERSECT *rori_trp;
TRP_INDEX_INTERSECT *intersect_trp;
bool can_build_covering= FALSE;
-
+ Json_writer_object trace_range(thd, "analyzing_range_alternatives");
+
remove_nonrange_trees(&param, tree);
/* Get best 'range' plan and prepare data for making other plans */
@@ -2764,6 +2956,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
DBUG_PRINT("info",("No range reads possible,"
" trying to construct index_merge"));
List_iterator_fast<SEL_IMERGE> it(tree->merges);
+ Json_writer_array trace_idx_merge(thd, "analyzing_index_merge_union");
while ((imerge= it++))
{
new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
@@ -2798,6 +2991,19 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
possible_keys= param.possible_keys;
free_mem:
+ if (unlikely(quick && best_trp && thd->trace_started()))
+ {
+ Json_writer_object trace_range_summary(thd,
+ "chosen_range_access_summary");
+ {
+ Json_writer_object trace_range_plan(thd, "range_access_plan");
+ best_trp->trace_basic_info(&param, &trace_range_plan);
+ }
+ trace_range_summary.add("rows_for_plan", quick->records)
+ .add("cost_for_plan", quick->read_time)
+ .add("chosen", true);
+ }
+
free_root(&alloc,MYF(0)); // Return memory & allocator
thd->mem_root= param.old_root;
thd->no_errors=0;
@@ -3060,6 +3266,9 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
estimate sources.
*/
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_array selectivity_for_indexes(thd, "selectivity_for_indexes");
+
for (keynr= 0; keynr < table->s->keys; keynr++)
{
if (table->quick_keys.is_set(keynr))
@@ -3109,6 +3318,10 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
not yet been accounted for.
*/
table->cond_selectivity*= quick_cond_selectivity;
+ Json_writer_object selectivity_for_index(thd);
+ selectivity_for_index.add("index_name", key_info->name)
+ .add("selectivity_from_index",
+ quick_cond_selectivity);
if (i != used_key_parts)
{
/*
@@ -3128,7 +3341,9 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
*/
selectivity_mult= ((double)(i+1)) / i;
}
- table->cond_selectivity*= selectivity_mult;
+ table->cond_selectivity*= selectivity_mult;
+ selectivity_for_index.add("selectivity_multiplier",
+ selectivity_mult);
}
/*
We need to set selectivity for fields supported by indexes.
@@ -3149,12 +3364,14 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
}
}
}
+ selectivity_for_indexes.end();
/*
Second step: calculate the selectivity of the range conditions not
supported by any index and selectivity of the range condition
over the fields whose selectivity has not been set yet.
*/
+ Json_writer_array selectivity_for_columns(thd, "selectivity_for_columns");
if (thd->variables.optimizer_use_condition_selectivity > 2 &&
!bitmap_is_clear_all(used_fields) &&
@@ -3214,17 +3431,25 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
SEL_ARG *key= tree->keys[idx];
if (key)
{
+ Json_writer_object selectivity_for_column(thd);
+ selectivity_for_column.add("column_name", key->field->field_name);
if (key->type == SEL_ARG::IMPOSSIBLE)
- {
+ {
rows= 0;
table->reginfo.impossible_range= 1;
+ selectivity_for_column.add("selectivity_from_histograms", rows);
+ selectivity_for_column.add("cause", "impossible range");
goto free_alloc;
}
else
{
rows= records_in_column_ranges(&param, idx, key);
if (rows != DBL_MAX)
+ {
key->field->cond_selectivity= rows/table_records;
+ selectivity_for_column.add("selectivity_from_histograms",
+ key->field->cond_selectivity);
+ }
}
}
}
@@ -4791,6 +5016,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
ha_rows roru_total_records;
double roru_intersect_part= 1.0;
size_t n_child_scans;
+ THD *thd= param->thd;
DBUG_ENTER("get_best_disjunct_quick");
DBUG_PRINT("info", ("Full table scan cost: %g", read_time));
@@ -4816,6 +5042,8 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
sizeof(TRP_RANGE*)*
n_child_scans)))
DBUG_RETURN(NULL);
+ Json_writer_object trace_best_disjunct(thd);
+ Json_writer_array to_merge(thd, "indexes_to_merge");
/*
Collect best 'range' scan for each of disjuncts, and, while doing so,
analyze possibility of ROR scans. Also calculate some values needed by
@@ -4827,6 +5055,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
{
DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
"tree in SEL_IMERGE"););
+ Json_writer_object trace_idx(thd);
if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, read_time)))
{
/*
@@ -4838,8 +5067,11 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
imerge_too_expensive= TRUE;
}
if (imerge_too_expensive)
+ {
+ trace_idx.add("chosen", false).add("cause", "cost");
continue;
-
+ }
+ const uint keynr_in_table = param->real_keynr[(*cur_child)->key_idx];
imerge_cost += (*cur_child)->read_cost;
all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
all_scans_rors &= (*cur_child)->is_ror;
@@ -4852,9 +5084,16 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
}
else
non_cpk_scan_records += (*cur_child)->records;
+ trace_idx.add("index_to_merge",
+ param->table->key_info[keynr_in_table].name)
+ .add("cumulated_cost", imerge_cost);
}
+ to_merge.end();
+
DBUG_PRINT("info", ("index_merge scans cost %g", imerge_cost));
+ trace_best_disjunct.add("cost_of_reading_ranges", imerge_cost);
+
if (imerge_too_expensive || (imerge_cost > read_time) ||
((non_cpk_scan_records+cpk_scan_records >=
param->table->stat_records()) &&
@@ -4866,6 +5105,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
*/
DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
"full table scan, bailing out"));
+ trace_best_disjunct.add("chosen", false).add("cause", "cost");
DBUG_RETURN(NULL);
}
@@ -4878,6 +5118,9 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
{
roru_read_plans= (TABLE_READ_PLAN**)range_scans;
+ trace_best_disjunct.add("use_roworder_union", true)
+ .add("cause",
+ "always cheaper than non roworder retrieval");
goto skip_to_ror_scan;
}
@@ -4887,16 +5130,26 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
Add one ROWID comparison for each row retrieved on non-CPK scan. (it
is done in QUICK_RANGE_SELECT::row_in_ranges)
*/
- imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID;
+ double rid_comp_cost= static_cast<double>(non_cpk_scan_records) /
+ TIME_FOR_COMPARE_ROWID;
+ imerge_cost += rid_comp_cost;
+ trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan",
+ rid_comp_cost);
}
/* Calculate cost(rowid_to_row_scan) */
- imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records);
+ {
+ double sweep_cost= get_sweep_read_cost(param, non_cpk_scan_records);
+ imerge_cost += sweep_cost;
+ trace_best_disjunct.add("cost_sort_rowid_and_read_disk", sweep_cost);
+ }
DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
imerge_cost));
if (imerge_cost > read_time ||
!optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
{
+ trace_best_disjunct.add("use_roworder_index_merge", true);
+ trace_best_disjunct.add("cause", "cost");
goto build_ror_index_merge;
}
@@ -4913,12 +5166,18 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
param->imerge_cost_buff_size= unique_calc_buff_size;
}
- imerge_cost +=
- Unique::get_use_cost(param->imerge_cost_buff, (uint)non_cpk_scan_records,
- param->table->file->ref_length,
- (size_t)param->thd->variables.sortbuff_size,
- TIME_FOR_COMPARE_ROWID,
- FALSE, NULL);
+ {
+ const double dup_removal_cost = Unique::get_use_cost(
+ param->imerge_cost_buff, (uint)non_cpk_scan_records,
+ param->table->file->ref_length,
+ (size_t)param->thd->variables.sortbuff_size,
+ TIME_FOR_COMPARE_ROWID,
+ FALSE, NULL);
+ imerge_cost+= dup_removal_cost;
+ trace_best_disjunct.add("cost_duplicate_removal", dup_removal_cost)
+ .add("total_cost", imerge_cost);
+ }
+
DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
imerge_cost, read_time));
if (imerge_cost < read_time)
@@ -4961,11 +5220,16 @@ skip_to_ror_scan:
roru_total_records= 0;
cur_roru_plan= roru_read_plans;
+ Json_writer_array trace_analyze_ror(thd, "analyzing_roworder_scans");
+
/* Find 'best' ROR scan for each of trees in disjunction */
for (ptree= imerge->trees, cur_child= range_scans;
ptree != imerge->trees_next;
ptree++, cur_child++, cur_roru_plan++)
{
+ Json_writer_object trp_info(thd);
+ if (unlikely(thd->trace_started()))
+ (*cur_child)->trace_basic_info(param, &trp_info);
/*
Assume the best ROR scan is the one that has cheapest full-row-retrieval
scan cost.
@@ -5001,7 +5265,7 @@ skip_to_ror_scan:
roru_intersect_part *= (*cur_roru_plan)->records /
param->table->stat_records();
}
-
+ trace_analyze_ror.end();
/*
rows to retrieve=
SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
@@ -5027,11 +5291,14 @@ skip_to_ror_scan:
DBUG_PRINT("info", ("ROR-union: cost %g, %zu members",
roru_total_cost, n_child_scans));
+ trace_best_disjunct.add("index_roworder_union_cost", roru_total_cost)
+ .add("members", n_child_scans);
TRP_ROR_UNION* roru;
if (roru_total_cost < read_time)
{
if ((roru= new (param->mem_root) TRP_ROR_UNION))
{
+ trace_best_disjunct.add("chosen", true);
roru->first_ror= roru_read_plans;
roru->last_ror= roru_read_plans + n_child_scans;
roru->read_cost= roru_total_cost;
@@ -5039,7 +5306,9 @@ skip_to_ror_scan:
DBUG_RETURN(roru);
}
}
- DBUG_RETURN(imerge_trp);
+ else
+ trace_best_disjunct.add("chosen", false);
+ DBUG_RETURN(imerge_trp);
}
@@ -5300,6 +5569,15 @@ ha_rows get_table_cardinality_for_index_intersect(TABLE *table)
}
}
+static
+void print_keyparts(THD *thd, KEY *key, uint key_parts)
+{
+ KEY_PART_INFO *part= key->key_part;
+ Json_writer_array keyparts= Json_writer_array(thd, "keyparts");
+ for(uint i= 0; i < key_parts; i++, part++)
+ keyparts.add(part->field->field_name);
+}
+
static
ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
@@ -5352,8 +5630,9 @@ bool prepare_search_best_index_intersect(PARAM *param,
INDEX_SCAN_INFO *cpk_scan= NULL;
TABLE *table= param->table;
uint n_index_scans= (uint)(tree->index_scans_end - tree->index_scans);
+ THD *thd= param->thd;
- if (!n_index_scans)
+ if (n_index_scans <= 1)
return 1;
bzero(init, sizeof(*init));
@@ -5370,9 +5649,6 @@ bool prepare_search_best_index_intersect(PARAM *param,
common->table_cardinality=
get_table_cardinality_for_index_intersect(table);
- if (n_index_scans <= 1)
- return TRUE;
-
if (table->file->primary_key_is_clustered())
{
INDEX_SCAN_INFO **index_scan_end;
@@ -5397,23 +5673,38 @@ bool prepare_search_best_index_intersect(PARAM *param,
bzero(common->search_scans, sizeof(INDEX_SCAN_INFO *) * i);
INDEX_SCAN_INFO **selected_index_scans= common->search_scans;
-
+ Json_writer_array potential_idx_scans(thd, "potential_index_scans");
for (i=0, index_scan= tree->index_scans; i < n_index_scans; i++, index_scan++)
{
+ Json_writer_object idx_scan(thd);
uint used_key_parts= (*index_scan)->used_key_parts;
KEY *key_info= (*index_scan)->key_info;
+ idx_scan.add("index", key_info->name);
if (*index_scan == cpk_scan)
+ {
+ idx_scan.add("chosen", "false")
+ .add("cause", "clustered index used for filtering");
continue;
+ }
if (cpk_scan && cpk_scan->used_key_parts >= used_key_parts &&
same_index_prefix(cpk_scan->key_info, key_info, used_key_parts))
+ {
+ idx_scan.add("chosen", "false")
+ .add("cause", "clustered index used for filtering");
continue;
+ }
cost= table->file->keyread_time((*index_scan)->keynr,
(*index_scan)->range_count,
(*index_scan)->records);
+ idx_scan.add("cost", cost);
if (cost >= cutoff_cost)
+ {
+ idx_scan.add("chosen", false);
+ idx_scan.add("cause", "cost");
continue;
+ }
for (scan_ptr= selected_index_scans; *scan_ptr ; scan_ptr++)
{
@@ -5430,10 +5721,20 @@ bool prepare_search_best_index_intersect(PARAM *param,
}
if (!*scan_ptr || cost < (*scan_ptr)->index_read_cost)
{
+ idx_scan.add("chosen", true);
+ if (!*scan_ptr)
+ idx_scan.add("cause", "first occurence of index prefix");
+ else
+ idx_scan.add("cause", "better cost for same idx prefix");
*scan_ptr= *index_scan;
(*scan_ptr)->index_read_cost= cost;
}
- }
+ else
+ {
+ idx_scan.add("chosen", false).add("cause", "cost");
+ }
+ }
+ potential_idx_scans.end();
ha_rows records_in_scans= 0;
@@ -5443,6 +5744,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
return TRUE;
records_in_scans+= (*scan_ptr)->records;
}
+
n_search_scans= i;
if (cpk_scan && create_fields_bitmap(param, &cpk_scan->used_fields))
@@ -5472,6 +5774,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
my_qsort(selected_index_scans, n_search_scans, sizeof(INDEX_SCAN_INFO *),
(qsort_cmp) cmp_intersect_index_scan);
+ Json_writer_array selected_idx_scans(thd, "selected_index_scans");
if (cpk_scan)
{
PARTIAL_INDEX_INTERSECT_INFO curr;
@@ -5484,16 +5787,36 @@ bool prepare_search_best_index_intersect(PARAM *param,
curr.length= 1;
for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
{
+ KEY *key_info= (*scan_ptr)->key_info;
ha_rows scan_records= (*scan_ptr)->records;
ha_rows records= records_in_index_intersect_extension(&curr, *scan_ptr);
(*scan_ptr)->filtered_out= records >= scan_records ?
- 0 : scan_records-records;
+ 0 : scan_records-records;
+ if (thd->trace_started())
+ {
+ Json_writer_object selected_idx(thd);
+ selected_idx.add("index", key_info->name);
+ print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts);
+ selected_idx.add("records", (*scan_ptr)->records)
+ .add("filtered_records", (*scan_ptr)->filtered_out);
+ }
}
}
else
{
for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
+ {
+ KEY *key_info= (*scan_ptr)->key_info;
(*scan_ptr)->filtered_out= 0;
+ if (thd->trace_started())
+ {
+ Json_writer_object selected_idx(thd);
+ selected_idx.add("index", key_info->name);
+ print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts);
+ selected_idx.add("records", (*scan_ptr)->records)
+ .add("filtered_records", (*scan_ptr)->filtered_out);
+ }
+ }
}
return FALSE;
@@ -5950,10 +6273,12 @@ TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
PARTIAL_INDEX_INTERSECT_INFO init;
TRP_INDEX_INTERSECT *intersect_trp= NULL;
TABLE *table= param->table;
-
+ THD *thd= param->thd;
DBUG_ENTER("get_best_index_intersect");
+ Json_writer_object trace_idx_interect(thd, "analyzing_sort_intersect");
+
if (prepare_search_best_index_intersect(param, tree, &common, &init,
read_time))
DBUG_RETURN(NULL);
@@ -6015,11 +6340,15 @@ TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
if ((intersect_trp= new (param->mem_root)TRP_INDEX_INTERSECT))
{
+
intersect_trp->read_cost= common.best_cost;
intersect_trp->records= common.best_records;
intersect_trp->range_scans= range_scans;
intersect_trp->range_scans_end= cur_range;
intersect_trp->filtered_scans= common.filtered_scans;
+ trace_idx_interect.add("rows", intersect_trp->records)
+ .add("cost", intersect_trp->read_cost)
+ .add("chosen",true);
}
DBUG_RETURN(intersect_trp);
}
@@ -6029,6 +6358,46 @@ typedef struct st_ror_scan_info : INDEX_SCAN_INFO
{
} ROR_SCAN_INFO;
+void TRP_ROR_INTERSECT::trace_basic_info(const PARAM *param,
+ Json_writer_object *trace_object) const
+{
+ THD *thd= param->thd;
+ trace_object->add("type", "index_roworder_intersect");
+ trace_object->add("rows", records);
+ trace_object->add("cost", read_cost);
+ trace_object->add("covering", is_covering);
+ trace_object->add("clustered_pk_scan", cpk_scan != NULL);
+
+ Json_writer_array smth_trace(thd, "intersect_of");
+ for (ROR_SCAN_INFO **cur_scan = first_scan; cur_scan != last_scan;
+ cur_scan++)
+ {
+ const KEY &cur_key = param->table->key_info[(*cur_scan)->keynr];
+ const KEY_PART_INFO *key_part = cur_key.key_part;
+
+ Json_writer_object trace_isect_idx(thd);
+ trace_isect_idx.add("type", "range_scan");
+ trace_isect_idx.add("index", cur_key.name);
+ trace_isect_idx.add("rows", (*cur_scan)->records);
+
+ Json_writer_array trace_range(thd, "ranges");
+ for (const SEL_ARG *current = (*cur_scan)->sel_arg->first(); current;
+ current = current->next)
+ {
+ String range_info;
+ range_info.set_charset(system_charset_info);
+ for (const SEL_ARG *part = current; part;
+ part = part->next_key_part ? part->next_key_part : nullptr)
+ {
+ const KEY_PART_INFO *cur_key_part = key_part + part->part;
+ append_range(&range_info, cur_key_part, part->min_value,
+ part->max_value, part->min_flag | part->max_flag);
+ }
+ trace_range.add(range_info.ptr(), range_info.length());
+ }
+ }
+}
+
/*
Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
@@ -6414,7 +6783,9 @@ static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
*/
static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
- ROR_SCAN_INFO* ror_scan, bool is_cpk_scan)
+ ROR_SCAN_INFO* ror_scan,
+ Json_writer_object *trace_costs,
+ bool is_cpk_scan)
{
double selectivity_mult= 1.0;
@@ -6441,13 +6812,16 @@ static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID
per check this gives us:
*/
- info->index_scan_costs += rows2double(info->index_records) /
+ const double idx_cost= rows2double(info->index_records) /
TIME_FOR_COMPARE_ROWID;
+ info->index_scan_costs += idx_cost;
+ trace_costs->add("index_scan_cost", idx_cost);
}
else
{
info->index_records += info->param->quick_rows[ror_scan->keynr];
info->index_scan_costs += ror_scan->index_read_cost;
+ trace_costs->add("index_scan_cost", ror_scan->index_read_cost);
bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
&info->covered_fields))
@@ -6458,13 +6832,19 @@ static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
}
info->total_cost= info->index_scan_costs;
+ trace_costs->add("cumulateed_index_scan_cost", info->index_scan_costs);
DBUG_PRINT("info", ("info->total_cost: %g", info->total_cost));
if (!info->is_covering)
{
- info->total_cost +=
- get_sweep_read_cost(info->param, double2rows(info->out_rows));
+ double sweep_cost= get_sweep_read_cost(info->param,
+ double2rows(info->out_rows));
+ info->total_cost += sweep_cost;
+ trace_costs->add("disk_sweep_cost", sweep_cost);
DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
}
+ else
+ trace_costs->add("disk_sweep_cost", static_cast<longlong>(0));
+
DBUG_PRINT("info", ("New out_rows: %g", info->out_rows));
DBUG_PRINT("info", ("New cost: %g, %scovering", info->total_cost,
info->is_covering?"" : "non-"));
@@ -6544,10 +6924,16 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
uint idx;
double min_cost= DBL_MAX;
DBUG_ENTER("get_best_ror_intersect");
+ THD *thd= param->thd;
+ Json_writer_object trace_ror(thd, "analyzing_roworder_intersect");
if ((tree->n_ror_scans < 2) || !param->table->stat_records() ||
!optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT))
- DBUG_RETURN(NULL);
+ {
+ if (tree->n_ror_scans < 2)
+ trace_ror.add("cause", "too few roworder scans");
+ DBUG_RETURN(NULL);
+ }
/*
Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of
@@ -6622,15 +7008,27 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
ROR_SCAN_INFO **intersect_scans_best;
cur_ror_scan= tree->ror_scans;
intersect_scans_best= intersect_scans;
+ Json_writer_array trace_isect_idx(thd, "intersecting_indexes");
while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
{
+ Json_writer_object trace_idx(thd);
+ trace_idx.add("index",
+ param->table->key_info[(*cur_ror_scan)->keynr].name);
+
/* S= S + first(R); R= R - first(R); */
- if (!ror_intersect_add(intersect, *cur_ror_scan, FALSE))
+ if (!ror_intersect_add(intersect, *cur_ror_scan, &trace_idx, FALSE))
{
+ trace_idx.add("usable", false)
+ .add("cause", "does not reduce cost of intersect");
cur_ror_scan++;
continue;
}
+ trace_idx.add("cumulative_total_cost", intersect->total_cost)
+ .add("usable", true)
+ .add("matching_rows_now", intersect->out_rows)
+ .add("intersect_covering_with_this_index", intersect->is_covering);
+
*(intersect_scans_end++)= *(cur_ror_scan++);
if (intersect->total_cost < min_cost)
@@ -6639,12 +7037,21 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
ror_intersect_cpy(intersect_best, intersect);
intersect_scans_best= intersect_scans_end;
min_cost = intersect->total_cost;
+ trace_idx.add("chosen", true);
+ }
+ else
+ {
+ trace_idx.add("chosen", false)
+ .add("cause", "does not reduce cost");
}
}
+ trace_isect_idx.end();
if (intersect_scans_best == intersect_scans)
{
DBUG_PRINT("info", ("None of scans increase selectivity"));
+ trace_ror.add("chosen", false)
+ .add("cause","does not increase selectivity");
DBUG_RETURN(NULL);
}
@@ -6662,16 +7069,31 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
Check if we should add a CPK scan. If the obtained ROR-intersection is
covering, it doesn't make sense to add CPK scan.
*/
+ Json_writer_object trace_cpk(thd, "clustered_pk");
if (cpk_scan && !intersect->is_covering)
{
- if (ror_intersect_add(intersect, cpk_scan, TRUE) &&
+ if (ror_intersect_add(intersect, cpk_scan, &trace_cpk, TRUE) &&
(intersect->total_cost < min_cost))
+ {
+ trace_cpk.add("clustered_pk_scan_added_to_intersect", true)
+ .add("cumulated_cost", intersect->total_cost);
intersect_best= intersect; //just set pointer here
+ }
else
+ {
+ trace_cpk.add("clustered_pk_added_to_intersect", false)
+ .add("cause", "cost");
cpk_scan= 0; // Don't use cpk_scan
+ }
}
else
+ {
+ trace_cpk.add("clustered_pk_added_to_intersect", false)
+ .add("cause", cpk_scan ? "roworder is covering"
+ : "no clustered pk index");
cpk_scan= 0; // Don't use cpk_scan
+ }
+ trace_cpk.end();
/* Ok, return ROR-intersect plan if we have found one */
TRP_ROR_INTERSECT *trp= NULL;
@@ -6698,6 +7120,17 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
"cost %g, records %lu",
trp->read_cost, (ulong) trp->records));
+ trace_ror.add("rows", trp->records)
+ .add("cost", trp->read_cost)
+ .add("covering", trp->is_covering)
+ .add("chosen", true);
+ }
+ else
+ {
+ trace_ror.add("chosen", false)
+ .add("cause", (read_time > min_cost)
+ ? "too few indexes to merge"
+ : "cost");
}
DBUG_RETURN(trp);
}
@@ -6885,6 +7318,7 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
UNINIT_VAR(best_buf_size); /* protected by key_to_read */
TRP_RANGE* read_plan= NULL;
DBUG_ENTER("get_key_scans_params");
+ THD *thd= param->thd;
/*
Note that there may be trees that have type SEL_TREE::KEY but contain no
key reads at all, e.g. tree for expression "key1 is not null" where key1
@@ -6892,6 +7326,8 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
*/
DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
"tree scans"););
+ Json_writer_array range_scan_alt(thd, "range_scan_alternatives");
+
tree->ror_scans_map.clear_all();
tree->n_ror_scans= 0;
tree->index_scans= 0;
@@ -6920,6 +7356,9 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
bool read_index_only= index_read_must_be_used ? TRUE :
(bool) param->table->covering_keys.is_set(keynr);
+ Json_writer_object trace_idx(thd);
+ trace_idx.add("index", param->table->key_info[keynr].name);
+
found_records= check_quick_select(param, idx, read_index_only, key,
update_tbl_stats, &mrr_flags,
&buf_size, &cost);
@@ -6928,6 +7367,14 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
(index_scan= (INDEX_SCAN_INFO *)alloc_root(param->mem_root,
sizeof(INDEX_SCAN_INFO))))
{
+ Json_writer_array trace_range(thd, "ranges");
+
+ const KEY &cur_key = param->table->key_info[keynr];
+ const KEY_PART_INFO *key_part = cur_key.key_part;
+
+ String range_info;
+ range_info.set_charset(system_charset_info);
+
index_scan->idx= idx;
index_scan->keynr= keynr;
index_scan->key_info= &param->table->key_info[keynr];
@@ -6936,6 +7383,17 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
index_scan->records= found_records;
index_scan->sel_arg= key;
*tree->index_scans_end++= index_scan;
+
+ if (unlikely(thd->trace_started()))
+ append_range_all_keyparts(&trace_range, NULL, &range_info, key,
+ key_part);
+ trace_range.end();
+
+ trace_idx.add("rowid_ordered", param->is_ror_scan)
+ .add("using_mrr", !(mrr_flags & HA_MRR_USE_DEFAULT_IMPL))
+ .add("index_only", read_index_only)
+ .add("rows", found_records)
+ .add("cost", cost.total_cost());
}
if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
{
@@ -6951,6 +7409,18 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
best_idx= idx;
best_mrr_flags= mrr_flags;
best_buf_size= buf_size;
+ trace_idx.add("chosen", true);
+ }
+ else
+ {
+ trace_idx.add("chosen", false);
+ if (found_records == HA_POS_ERROR)
+ if (key->type == SEL_ARG::Type::MAYBE_KEY)
+ trace_idx.add("cause", "depends on unread values");
+ else
+ trace_idx.add("cause", "unknown");
+ else
+ trace_idx.add("cause", "cost");
}
}
}
@@ -7055,10 +7525,11 @@ QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
"creating ROR-intersect",
first_scan, last_scan););
alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
- for (; first_scan != last_scan;++first_scan)
+ for (ROR_SCAN_INFO **curr_scan= first_scan; curr_scan != last_scan;
+ ++curr_scan)
{
- if (!(quick= get_quick_select(param, (*first_scan)->idx,
- (*first_scan)->sel_arg,
+ if (!(quick= get_quick_select(param, (*curr_scan)->idx,
+ (*curr_scan)->sel_arg,
HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
0, alloc)) ||
quick_intrsect->push_quick_back(alloc, quick))
@@ -12584,16 +13055,27 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
DBUG_ENTER("get_best_group_min_max");
+ Json_writer_object trace_group(thd, "group_index_range");
+ const char* cause= NULL;
+
/* Perform few 'cheap' tests whether this access method is applicable. */
- if (!join)
- DBUG_RETURN(NULL); /* This is not a select statement. */
- if ((join->table_count != 1) || /* The query must reference one table. */
- (join->select_lex->olap == ROLLUP_TYPE)) /* Check (B3) for ROLLUP */
+ if (!join) /* This is not a select statement. */
+ cause= "no join";
+ else if (join->table_count != 1) /* The query must reference one table. */
+ cause= "not single_table";
+ else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
+ cause= "rollup";
+ else if (table->s->keys == 0) /* There are no indexes to use. */
+ cause= "no index";
+ else if (join->conds && join->conds->used_tables()
+ & OUTER_REF_TABLE_BIT) /* Cannot execute with correlated conditions. */
+ cause= "correlated conditions";
+
+ if (cause)
+ {
+ trace_group.add("chosen", false).add("cause", cause);
DBUG_RETURN(NULL);
- if (table->s->keys == 0) /* There are no indexes to use. */
- DBUG_RETURN(NULL);
- if (join->conds && join->conds->used_tables() & OUTER_REF_TABLE_BIT)
- DBUG_RETURN(NULL); /* Cannot execute with correlated conditions. */
+ }
/* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
List_iterator<Item> select_items_it(join->fields_list);
@@ -12602,7 +13084,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
(!join->select_distinct) &&
!is_agg_distinct)
+ {
+ trace_group.add("chosen", false).add("cause","no group by or distinct");
DBUG_RETURN(NULL);
+ }
/* Analyze the query in more detail. */
if (join->sum_funcs[0])
@@ -12621,7 +13106,11 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC))
continue;
else
+ {
+ trace_group.add("chosen", false)
+ .add("cause", "not applicable aggregate function");
DBUG_RETURN(NULL);
+ }
/* The argument of MIN/MAX. */
Item *expr= min_max_item->get_arg(0)->real_item();
@@ -12630,26 +13119,41 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
if (! min_max_arg_item)
min_max_arg_item= (Item_field*) expr;
else if (! min_max_arg_item->eq(expr, 1))
+ {
+ trace_group.add("chosen", false)
+ .add("cause", "arguments different in min max function");
DBUG_RETURN(NULL);
+ }
}
else
+ {
+ trace_group.add("chosen", false)
+ .add("cause", "no field item in min max function");
DBUG_RETURN(NULL);
+ }
}
}
/* Check (SA7). */
if (is_agg_distinct && (have_max || have_min))
{
+ trace_group.add("chosen", false)
+ .add("cause", "have both agg distinct and min max");
DBUG_RETURN(NULL);
}
/* Check (SA5). */
if (join->select_distinct)
{
+ trace_group.add("distinct_query", true);
while ((item= select_items_it++))
{
if (item->real_item()->type() != Item::FIELD_ITEM)
+ {
+ trace_group.add("chosen", false)
+ .add("cause", "distinct field is expression");
DBUG_RETURN(NULL);
+ }
}
}
@@ -12658,7 +13162,11 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
{
if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM)
+ {
+ trace_group.add("chosen", false)
+ .add("cause", "group field is expression");
DBUG_RETURN(NULL);
+ }
elements_in_group++;
}
@@ -12680,10 +13188,16 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
ha_rows cur_quick_prefix_records= 0;
// We go through allowed indexes
+ Json_writer_array trace_indexes(thd, "potential_group_range_indexes");
+
for (uint cur_param_idx= 0; cur_param_idx < param->keys ; ++cur_param_idx)
{
const uint cur_index= param->real_keynr[cur_param_idx];
KEY *const cur_index_info= &table->key_info[cur_index];
+
+ Json_writer_object trace_idx(thd);
+ trace_idx.add("index", cur_index_info->name);
+
KEY_PART_INFO *cur_part;
KEY_PART_INFO *end_part; /* Last part for loops. */
/* Last index part. */
@@ -12708,7 +13222,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
*/
if (!table->covering_keys.is_set(cur_index) ||
!table->keys_in_use_for_group_by.is_set(cur_index))
- continue;
+ {
+ cause= "not covering";
+ goto next_index;
+ }
/*
This function is called on the precondition that the index is covering.
@@ -12716,7 +13233,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
these are duplicates. The GROUP BY list cannot be a prefix of the index.
*/
if (elements_in_group > table->actual_n_key_parts(cur_index_info))
- continue;
+ {
+ cause= "group key parts greater than index key parts";
+ goto next_index;
+ }
/*
Unless extended keys can be used for cur_index:
@@ -12742,10 +13262,15 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
*/
if (bitmap_is_set(table->read_set, cur_field->field_index) &&
!cur_field->part_of_key_not_clustered.is_set(cur_index))
+ {
+ cause= "not covering";
goto next_index; // Field was not part of key
+ }
}
}
+ trace_idx.add("covering", true);
+
max_key_part= 0;
used_key_parts_map.clear_all();
@@ -12776,7 +13301,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
used_key_parts_map.set_bit(max_key_part);
}
else
+ {
+ cause= "group attribute not prefix in index";
goto next_index;
+ }
}
}
/*
@@ -12805,7 +13333,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
/* not doing loose index scan for derived tables */
if (!item_field->field)
+ {
+ cause= "derived table";
goto next_index;
+ }
/* Find the order of the key part in the index. */
key_part_nr= get_field_keypart(cur_index_info, item_field->field);
@@ -12817,7 +13348,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
continue;
if (key_part_nr < 1 ||
(!is_agg_distinct && key_part_nr > join->fields_list.elements))
+ {
+ cause= "select attribute not prefix in index";
goto next_index;
+ }
cur_part= cur_index_info->key_part + key_part_nr - 1;
cur_group_prefix_len+= cur_part->store_length;
used_key_parts_map.set_bit(key_part_nr);
@@ -12842,7 +13376,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
{
key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
if (key_part_nr <= cur_group_key_parts)
+ {
+ cause = "aggregate column not suffix in idx";
goto next_index;
+ }
min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
}
@@ -12853,6 +13390,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
if (cur_index_info->flags & HA_NOSAME &&
cur_group_key_parts == cur_index_info->user_defined_key_parts)
{
+ cause= "using unique index";
goto next_index;
}
@@ -12892,7 +13430,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
last_part, thd, cur_key_infix,
&cur_key_infix_len,
&first_non_infix_part))
+ {
+ cause = "nonconst equality gap attribute";
goto next_index;
+ }
}
else if (min_max_arg_part &&
(min_max_arg_part - first_non_group_part > 0))
@@ -12901,6 +13442,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
There is a gap but no range tree, thus no predicates at all for the
non-group keyparts.
*/
+ cause = "no nongroup keypart predicate";
goto next_index;
}
else if (first_non_group_part && join->conds)
@@ -12924,7 +13466,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
/* Check if cur_part is referenced in the WHERE clause. */
if (join->conds->walk(&Item::find_item_in_field_list_processor, 0,
key_part_range))
+ {
+ cause = "keypart reference from where clause";
goto next_index;
+ }
}
}
@@ -12939,7 +13484,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
for (; cur_part != last_part; cur_part++)
{
if (bitmap_is_set(table->read_set, cur_part->field->field_index))
+ {
+ cause = "keypart after infix in query";
goto next_index;
+ }
}
}
@@ -12956,6 +13504,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
index_range_tree, &cur_range) ||
(cur_range && cur_range->type != SEL_ARG::KEY_RANGE))
{
+ cause = "minmax keypart in disjunctive query";
goto next_index;
}
}
@@ -12978,6 +13527,17 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
cur_index_tree, TRUE,
&mrr_flags, &mrr_bufsize,
&dummy_cost);
+ if (unlikely(cur_index_tree && thd->trace_started()))
+ {
+ Json_writer_array trace_range(thd, "ranges");
+
+ const KEY_PART_INFO *key_part = cur_index_info->key_part;
+
+ String range_info;
+ range_info.set_charset(system_charset_info);
+ append_range_all_keyparts(&trace_range, NULL, &range_info,
+ cur_index_tree, key_part);
+ }
}
cost_group_min_max(table, cur_index_info, cur_used_key_parts,
cur_group_key_parts, tree, cur_index_tree,
@@ -12988,6 +13548,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
Do not compare doubles directly because they may have different
representations (64 vs. 80 bits).
*/
+ trace_idx.add("rows", cur_records).add("cost", cur_read_cost);
+
if (cur_read_cost < best_read_cost - (DBL_EPSILON * cur_read_cost))
{
index_info= cur_index_info;
@@ -13005,8 +13567,16 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
used_key_parts= cur_used_key_parts;
}
- next_index:;
+ next_index:
+ if (cause)
+ {
+ trace_idx.add("usable", false).add("cause", cause);
+ cause = NULL;
+ }
}
+
+ trace_indexes.end();
+
if (!index_info) /* No usable index found. */
DBUG_RETURN(NULL);
@@ -13017,14 +13587,22 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
(index_info->flags & HA_SPATIAL) ?
Field::itMBR : Field::itRAW,
&has_min_max_fld, &has_other_fld))
+ {
+ trace_group.add("usable", false)
+ .add("cause", "unsupported predicate on agg attribute");
DBUG_RETURN(NULL);
+ }
/*
Check (SA6) if clustered key is used
*/
if (is_agg_distinct && index == table->s->primary_key &&
table->file->primary_key_is_clustered())
+ {
+ trace_group.add("usable", false)
+ .add("cause", "index is clustered");
DBUG_RETURN(NULL);
+ }
/* The query passes all tests, so construct a new TRP object. */
read_plan= new (param->mem_root)
@@ -13045,6 +13623,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
read_plan->records= best_records;
if (read_time < best_read_cost && is_agg_distinct)
{
+ trace_group.add("index_scan", true);
read_plan->read_cost= 0;
read_plan->use_index_scan();
}
@@ -15002,7 +15581,6 @@ static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
DBUG_VOID_RETURN;
}
-
void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
{
/* purecov: begin inspected */
@@ -15130,3 +15708,174 @@ void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
}
#endif /* !DBUG_OFF */
+static
+void append_range(String *out, const KEY_PART_INFO *key_part,
+ const uchar *min_key, const uchar *max_key, const uint flag)
+{
+ if (out->length() > 0)
+ out->append(STRING_WITH_LEN(" AND "));
+
+ if (flag & GEOM_FLAG)
+ {
+ /*
+ The flags of GEOM ranges do not work the same way as for other
+ range types, so printing "col < some_geom" doesn't make sense.
+ Just print the column name, not operator.
+ */
+ out->append(key_part->field->field_name);
+ out->append(STRING_WITH_LEN(" "));
+ print_key_value(out, key_part, min_key);
+ return;
+ }
+
+ if (!(flag & NO_MIN_RANGE))
+ {
+ print_key_value(out, key_part, min_key);
+ if (flag & NEAR_MIN)
+ out->append(STRING_WITH_LEN(" < "));
+ else
+ out->append(STRING_WITH_LEN(" <= "));
+ }
+
+ out->append(key_part->field->field_name);
+
+ if (!(flag & NO_MAX_RANGE))
+ {
+ if (flag & NEAR_MAX)
+ out->append(STRING_WITH_LEN(" < "));
+ else
+ out->append(STRING_WITH_LEN(" <= "));
+ print_key_value(out, key_part, max_key);
+ }
+}
+
+/*
+
+ Add ranges to the trace
+ For ex:
+ query: select * from t1 where a=2 ;
+ and we have an index on a , so we create a range
+ 2 <= a <= 2
+ this is added to the trace
+*/
+
+static void append_range_all_keyparts(Json_writer_array *range_trace,
+ String *range_string,
+ String *range_so_far, const SEL_ARG *keypart,
+ const KEY_PART_INFO *key_parts)
+{
+
+ DBUG_ASSERT(keypart);
+ DBUG_ASSERT(keypart && keypart != &null_element);
+
+ // Navigate to first interval in red-black tree
+ const KEY_PART_INFO *cur_key_part = key_parts + keypart->part;
+ const SEL_ARG *keypart_range = keypart->first();
+ const size_t save_range_so_far_length = range_so_far->length();
+
+
+ while (keypart_range)
+ {
+ // Append the current range predicate to the range String
+ switch (keypart->type)
+ {
+ case SEL_ARG::Type::KEY_RANGE:
+ append_range(range_so_far, cur_key_part, keypart_range->min_value,
+ keypart_range->max_value,
+ keypart_range->min_flag | keypart_range->max_flag);
+ break;
+ case SEL_ARG::Type::MAYBE_KEY:
+ range_so_far->append("MAYBE_KEY");
+ break;
+ case SEL_ARG::Type::IMPOSSIBLE:
+ range_so_far->append("IMPOSSIBLE");
+ break;
+ default:
+ DBUG_ASSERT(false);
+ break;
+ }
+
+ if (keypart_range->next_key_part &&
+ keypart_range->next_key_part->part ==
+ keypart_range->part + 1 &&
+ keypart_range->is_singlepoint())
+ {
+ append_range_all_keyparts(range_trace, range_string, range_so_far,
+ keypart_range->next_key_part, key_parts);
+ }
+ else
+ range_trace->add(range_so_far->c_ptr_safe(), range_so_far->length());
+ keypart_range= keypart_range->next;
+ range_so_far->length(save_range_so_far_length);
+ }
+}
+
+/**
+ Print a key to a string
+
+ @param[out] out String the key is appended to
+ @param[in] key_part Index components description
+ @param[in] key Key tuple
+*/
+static void print_key_value(String *out, const KEY_PART_INFO *key_part,
+ const uchar *key)
+{
+ Field *field = key_part->field;
+
+ if (field->flags & BLOB_FLAG) {
+ // Byte 0 of a nullable key is the null-byte. If set, key is NULL.
+ if (field->real_maybe_null() && *key)
+ out->append(STRING_WITH_LEN("NULL"));
+ else
+ (field->type() == MYSQL_TYPE_GEOMETRY)
+ ? out->append(STRING_WITH_LEN("unprintable_geometry_value"))
+ : out->append(STRING_WITH_LEN("unprintable_blob_value"));
+ return;
+ }
+
+ uint store_length = key_part->store_length;
+
+ if (field->real_maybe_null())
+ {
+ /*
+ Byte 0 of key is the null-byte. If set, key is NULL.
+ Otherwise, print the key value starting immediately after the
+ null-byte
+ */
+ if (*key) {
+ out->append(STRING_WITH_LEN("NULL"));
+ return;
+ }
+ key++; // Skip null byte
+ store_length--;
+ }
+
+ /*
+ Binary data cannot be converted to UTF8 which is what the
+ optimizer trace expects. If the column is binary, the hex
+ representation is printed to the trace instead.
+ */
+ if (field->flags & BINARY_FLAG) {
+ out->append("0x");
+ for (uint i = 0; i < store_length; i++) {
+ out->append(_dig_vec_lower[*(key + i) >> 4]);
+ out->append(_dig_vec_lower[*(key + i) & 0x0F]);
+ }
+ return;
+ }
+
+ StringBuffer<128> tmp(system_charset_info);
+ TABLE *table = field->table;
+ my_bitmap_map *old_sets[2];
+
+ dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set);
+
+ field->set_key_image(key, key_part->length);
+ if (field->type() == MYSQL_TYPE_BIT)
+ (void)field->val_int_as_str(&tmp, 1); // may change tmp's charset
+ else
+ field->val_str(&tmp); // may change tmp's charset
+ out->append(tmp.ptr(), tmp.length(), tmp.charset());
+
+ dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
+}
diff --git a/sql/opt_range.h b/sql/opt_range.h
index d5416988b88..7e92e1f54ee 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -566,7 +566,7 @@ public:
FALSE Otherwise
*/
- bool is_singlepoint()
+ bool is_singlepoint() const
{
/*
Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field)
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index c4c30c9b50d..228fcd0f7e6 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -33,6 +33,7 @@
#include "opt_subselect.h"
#include "sql_test.h"
#include <my_bit.h>
+#include "opt_trace.h"
/*
This file contains optimizations for semi-join subqueries.
@@ -437,7 +438,7 @@ Currently, solution #2 is implemented.
LEX_CSTRING weedout_key= {STRING_WITH_LEN("weedout_key")};
static
-bool subquery_types_allow_materialization(Item_in_subselect *in_subs);
+bool subquery_types_allow_materialization(THD *thd, Item_in_subselect *in_subs);
static bool replace_where_subcondition(JOIN *, Item **, Item *, Item *, bool);
static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2,
void *arg);
@@ -521,7 +522,7 @@ bool is_materialization_applicable(THD *thd, Item_in_subselect *in_subs,
parent_unit->first_select()->leaf_tables.elements && // 2
child_select->outer_select() &&
child_select->outer_select()->leaf_tables.elements && // 2A
- subquery_types_allow_materialization(in_subs) &&
+ subquery_types_allow_materialization(thd, in_subs) &&
(in_subs->is_top_level_item() || //3
optimizer_flag(thd,
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3
@@ -682,7 +683,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
{
DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
- (void)subquery_types_allow_materialization(in_subs);
+ (void)subquery_types_allow_materialization(thd, in_subs);
in_subs->is_flattenable_semijoin= TRUE;
@@ -696,6 +697,9 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
if (arena)
thd->restore_active_arena(arena, &backup);
in_subs->is_registered_semijoin= TRUE;
+ OPT_TRACE_TRANSFORM(thd, oto0, oto1, select_lex->select_number,
+ "IN (SELECT)", "semijoin");
+ oto1.add("chosen", true);
}
}
else
@@ -823,7 +827,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
*/
static
-bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
+bool subquery_types_allow_materialization(THD* thd, Item_in_subselect *in_subs)
{
DBUG_ENTER("subquery_types_allow_materialization");
@@ -831,9 +835,14 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
List_iterator<Item> it(in_subs->unit->first_select()->item_list);
uint elements= in_subs->unit->first_select()->item_list.elements;
+ const char* cause= NULL;
in_subs->types_allow_materialization= FALSE; // Assign default values
in_subs->sjm_scan_allowed= FALSE;
+
+ OPT_TRACE_TRANSFORM(thd, oto0, oto1,
+ in_subs->get_select_lex()->select_number,
+ "IN (SELECT)", "materialization");
bool all_are_fields= TRUE;
uint32 total_key_length = 0;
@@ -846,7 +855,11 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
total_key_length += inner->max_length;
if (!inner->type_handler()->subquery_type_allows_materialization(inner,
outer))
+ {
+ oto1.add("possible", false);
+ oto1.add("cause", "types mismatch");
DBUG_RETURN(FALSE);
+ }
}
/*
@@ -856,14 +869,23 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
Make sure that the length of the key for the temp_table is atleast
greater than 0.
*/
- if (!total_key_length || total_key_length > tmp_table_max_key_length() ||
- elements > tmp_table_max_key_parts())
- DBUG_RETURN(FALSE);
-
- in_subs->types_allow_materialization= TRUE;
- in_subs->sjm_scan_allowed= all_are_fields;
- DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed"));
- DBUG_RETURN(TRUE);
+ if (!total_key_length)
+ cause= "zero length key for materialized table";
+ else if (total_key_length > tmp_table_max_key_length())
+ cause= "length of key greater than allowed key length for materialized tables";
+ else if (elements > tmp_table_max_key_parts())
+ cause= "#keyparts greater than allowed key parts for materialized tables";
+ else
+ {
+ in_subs->types_allow_materialization= TRUE;
+ in_subs->sjm_scan_allowed= all_are_fields;
+ oto1.add("sjm_scan_allowed", all_are_fields)
+ .add("possible", true);
+ DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed"));
+ DBUG_RETURN(TRUE);
+ }
+ oto1.add("possible", false).add("cause", cause);
+ DBUG_RETURN(FALSE);
}
@@ -1213,15 +1235,30 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
/* Stop processing if we've reached a subquery that's attached to the ON clause */
if (in_subq->do_not_convert_to_sj)
+ {
+ OPT_TRACE_TRANSFORM(thd, oto0, oto1,
+ in_subq->get_select_lex()->select_number,
+ "IN (SELECT)", "semijoin");
+ oto1.add("converted_to_semi_join", false)
+ .add("cause", "subquery attached to the ON clause");
break;
+ }
if (in_subq->is_flattenable_semijoin)
{
+ OPT_TRACE_TRANSFORM(thd, oto0, oto1,
+ in_subq->get_select_lex()->select_number,
+ "IN (SELECT)", "semijoin");
if (join->table_count +
in_subq->unit->first_select()->join->table_count >= MAX_TABLES)
+ {
+ oto1.add("converted_to_semi_join", false);
+ oto1.add("cause", "table in parent join now exceeds MAX_TABLES");
break;
+ }
if (convert_subq_to_sj(join, in_subq))
goto restore_arena_and_fail;
+ oto1.add("converted_to_semi_join", true);
}
else
{
@@ -2340,8 +2377,13 @@ int pull_out_semijoin_tables(JOIN *join)
bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
{
DBUG_ENTER("optimize_semijoin_nests");
+ THD *thd= join->thd;
List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
TABLE_LIST *sj_nest;
+ Json_writer_object wrapper(thd);
+ Json_writer_object trace_semijoin_nest(thd,
+ "execution_plan_for_potential_materialization");
+ Json_writer_array trace_steps_array(thd, "steps");
while ((sj_nest= sj_list_it++))
{
/* semi-join nests with only constant tables are not valid */
diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc
index 74d1e775c43..03516146de2 100644
--- a/sql/opt_table_elimination.cc
+++ b/sql/opt_table_elimination.cc
@@ -31,6 +31,8 @@
#include "mariadb.h"
#include "my_bit.h"
#include "sql_select.h"
+#include "opt_trace.h"
+#include "my_json_writer.h"
/*
OVERVIEW
@@ -522,7 +524,8 @@ eliminate_tables_for_list(JOIN *join,
List<TABLE_LIST> *join_list,
table_map tables_in_list,
Item *on_expr,
- table_map tables_used_elsewhere);
+ table_map tables_used_elsewhere,
+ Json_writer_array* eliminate_tables);
static
bool check_func_dependency(JOIN *join,
table_map dep_tables,
@@ -541,7 +544,8 @@ static
Dep_module_expr *merge_eq_mods(Dep_module_expr *start,
Dep_module_expr *new_fields,
Dep_module_expr *end, uint and_level);
-static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl);
+static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl,
+ Json_writer_array* eliminate_tables);
static
void add_module_expr(Dep_analysis_context *dac, Dep_module_expr **eq_mod,
uint and_level, Dep_value_field *field_val, Item *right,
@@ -608,6 +612,8 @@ void eliminate_tables(JOIN *join)
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_TABLE_ELIMINATION))
DBUG_VOID_RETURN; /* purecov: inspected */
+ Json_writer_object trace_wrapper(thd);
+
/* Find the tables that are referred to from WHERE/HAVING */
used_tables= (join->conds? join->conds->used_tables() : 0) |
(join->having? join->having->used_tables() : 0);
@@ -663,13 +669,14 @@ void eliminate_tables(JOIN *join)
}
}
}
-
+
table_map all_tables= join->all_tables_map();
+ Json_writer_array eliminated_tables(thd,"eliminated_tables");
if (all_tables & ~used_tables)
{
/* There are some tables that we probably could eliminate. Try it. */
eliminate_tables_for_list(join, join->join_list, all_tables, NULL,
- used_tables);
+ used_tables, &eliminated_tables);
}
DBUG_VOID_RETURN;
}
@@ -712,7 +719,8 @@ void eliminate_tables(JOIN *join)
static bool
eliminate_tables_for_list(JOIN *join, List<TABLE_LIST> *join_list,
table_map list_tables, Item *on_expr,
- table_map tables_used_elsewhere)
+ table_map tables_used_elsewhere,
+ Json_writer_array *eliminate_tables)
{
TABLE_LIST *tbl;
List_iterator<TABLE_LIST> it(*join_list);
@@ -734,9 +742,9 @@ eliminate_tables_for_list(JOIN *join, List<TABLE_LIST> *join_list,
&tbl->nested_join->join_list,
tbl->nested_join->used_tables,
tbl->on_expr,
- outside_used_tables))
+ outside_used_tables, eliminate_tables))
{
- mark_as_eliminated(join, tbl);
+ mark_as_eliminated(join, tbl, eliminate_tables);
}
else
all_eliminated= FALSE;
@@ -748,7 +756,7 @@ eliminate_tables_for_list(JOIN *join, List<TABLE_LIST> *join_list,
check_func_dependency(join, tbl->table->map, NULL, tbl,
tbl->on_expr))
{
- mark_as_eliminated(join, tbl);
+ mark_as_eliminated(join, tbl, eliminate_tables);
}
else
all_eliminated= FALSE;
@@ -1788,7 +1796,8 @@ Dep_module* Dep_value_field::get_next_unbound_module(Dep_analysis_context *dac,
Mark one table or the whole join nest as eliminated.
*/
-static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl)
+static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl,
+ Json_writer_array* eliminate_tables)
{
TABLE *table;
/*
@@ -1801,7 +1810,7 @@ static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl)
TABLE_LIST *child;
List_iterator<TABLE_LIST> it(tbl->nested_join->join_list);
while ((child= it++))
- mark_as_eliminated(join, child);
+ mark_as_eliminated(join, child, eliminate_tables);
}
else if ((table= tbl->table))
{
@@ -1812,6 +1821,7 @@ static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl)
tab->type= JT_CONST;
tab->table->const_table= 1;
join->eliminated_tables |= table->map;
+ eliminate_tables->add(table->alias.c_ptr_safe());
join->const_table_map|= table->map;
set_position(join, join->const_tables++, tab, (KEYUSE*)0);
}
diff --git a/sql/opt_trace.cc b/sql/opt_trace.cc
new file mode 100644
index 00000000000..ca05f36579a
--- /dev/null
+++ b/sql/opt_trace.cc
@@ -0,0 +1,722 @@
+/* This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "mariadb.h"
+#include "sql_array.h"
+#include "sql_string.h"
+#include "sql_class.h"
+#include "sql_show.h"
+#include "field.h"
+#include "table.h"
+#include "opt_trace.h"
+#include "sql_parse.h"
+#include "set_var.h"
+#include "my_json_writer.h"
+#include "sp_head.h"
+
+const char I_S_table_name[] = "OPTIMIZER_TRACE";
+
+/**
+ Whether a list of tables contains information_schema.OPTIMIZER_TRACE.
+ @param tbl list of tables
+
+ Can we do better than this here??
+ @note this does not catch that a stored routine or view accesses
+ the OPTIMIZER_TRACE table. So using a stored routine or view to read
+ OPTIMIZER_TRACE will overwrite OPTIMIZER_TRACE as it runs and provide
+ uninteresting info.
+*/
+bool list_has_optimizer_trace_table(const TABLE_LIST *tbl)
+{
+ for (; tbl; tbl = tbl->next_global)
+ {
+ if (tbl->schema_table &&
+ 0 == strcmp(tbl->schema_table->table_name, I_S_table_name))
+ return true;
+ }
+ return false;
+}
+
+/*
+ Returns if a query has a set command with optimizer_trace being switched on/off.
+ True: Don't trace the query(uninteresting)
+*/
+
+bool sets_var_optimizer_trace(enum enum_sql_command sql_command,
+ List<set_var_base> *set_vars)
+{
+ if (sql_command == SQLCOM_SET_OPTION)
+ {
+ List_iterator_fast<set_var_base> it(*set_vars);
+ const set_var_base *var;
+ while ((var = it++))
+ if (var->is_var_optimizer_trace()) return true;
+ }
+ return false;
+}
+
+
+ST_FIELD_INFO optimizer_trace_info[] = {
+ /* name, length, type, value, maybe_null, old_name, open_method */
+ {"QUERY", 65535, MYSQL_TYPE_STRING, 0, false, NULL, SKIP_OPEN_TABLE},
+ {"TRACE", 65535, MYSQL_TYPE_STRING, 0, false, NULL, SKIP_OPEN_TABLE},
+ {"MISSING_BYTES_BEYOND_MAX_MEM_SIZE", 20, MYSQL_TYPE_LONG, 0, false, NULL,
+ SKIP_OPEN_TABLE},
+ {"INSUFFICIENT_PRIVILEGES", 1, MYSQL_TYPE_TINY, 0, false, NULL,
+ SKIP_OPEN_TABLE},
+ {NULL, 0, MYSQL_TYPE_STRING, 0, true, NULL, 0}};
+
+/*
+ TODO: one-line needs to be implemented seperately
+*/
+const char *Opt_trace_context::flag_names[] = {"enabled", "default",
+ NullS};
+
+/*
+ Returns if a particular command will be traced or not
+*/
+
+inline bool sql_command_can_be_traced(enum enum_sql_command sql_command)
+{
+ /*
+ For first iteration we are only allowing select queries.
+ TODO: change to allow other queries.
+ */
+ return sql_command == SQLCOM_SELECT ||
+ sql_command == SQLCOM_UPDATE ||
+ sql_command == SQLCOM_DELETE ||
+ sql_command == SQLCOM_DELETE_MULTI ||
+ sql_command == SQLCOM_UPDATE_MULTI;
+}
+
+void opt_trace_print_expanded_query(THD *thd, SELECT_LEX *select_lex,
+ Json_writer_object *writer)
+
+{
+ if (!thd->trace_started())
+ return;
+ char buff[1024];
+ String str(buff, sizeof(buff), system_charset_info);
+ str.length(0);
+ select_lex->print(thd, &str,
+ enum_query_type(QT_TO_SYSTEM_CHARSET |
+ QT_SHOW_SELECT_NUMBER |
+ QT_ITEM_IDENT_SKIP_DB_NAMES |
+ QT_VIEW_INTERNAL
+ ));
+ /*
+ The output is not very pretty lots of back-ticks, the output
+ is as the one in explain extended , lets try to improved it here.
+ */
+ writer->add("expanded_query", str.c_ptr_safe(), str.length());
+}
+
+void opt_trace_disable_if_no_security_context_access(THD *thd)
+{
+ if (likely(!(thd->variables.optimizer_trace &
+ Opt_trace_context::FLAG_ENABLED)) || // (1)
+ thd->system_thread) // (2)
+ {
+ /*
+ (1) We know that the routine's execution starts with "enabled=off".
+ If it stays so until the routine ends, we needn't do security checks on
+ the routine.
+ If it does not stay so, it means the definer sets it to "on" somewhere
+ in the routine's body. Then it is his conscious decision to generate
+ traces, thus it is still correct to skip the security check.
+
+ (2) Threads of the Events Scheduler have an unusual security context
+ (thd->m_main_security_ctx.priv_user==NULL, see comment in
+ Security_context::change_security_context()).
+ */
+ return;
+ }
+ Opt_trace_context *const trace = &thd->opt_trace;
+ if (!thd->trace_started())
+ {
+ /*
+ @@optimizer_trace has "enabled=on" but trace is not started.
+ Either Opt_trace_start ctor was not called for our statement (3), or it
+ was called but at that time, the variable had "enabled=off" (4).
+
+ There are no known cases of (3).
+
+ (4) suggests that the user managed to change the variable during
+ execution of the statement, and this statement is using
+ view/routine (note that we have not been able to provoke this, maybe
+ this is impossible). If it happens it is suspicious.
+
+ We disable I_S output. And we cannot do otherwise: we have no place to
+ store a possible "missing privilege" information (no Opt_trace_stmt, as
+ is_started() is false), so cannot do security checks, so cannot safely
+ do tracing, so have to disable I_S output. And even then, we don't know
+ when to re-enable I_S output, as we have no place to store the
+ information "re-enable tracing at the end of this statement", and we
+ don't even have a notion of statement here (statements in the optimizer
+ trace world mean an Opt_trace_stmt object, and there is none here). So
+ we must disable for the session's life.
+
+ COM_FIELD_LIST opens views, thus used to be a case of (3). To avoid
+ disabling I_S output for the session's life when this command is issued
+ (like in: "SET OPTIMIZER_TRACE='ENABLED=ON';USE somedb;" in the 'mysql'
+ command-line client), we have decided to create a Opt_trace_start for
+ this command. The command itself is not traced though
+ (SQLCOM_SHOW_FIELDS does not have CF_OPTIMIZER_TRACE).
+ */
+ return;
+ }
+ /*
+ Note that thd->main_security_ctx.master_access is probably invariant
+ accross the life of THD: GRANT/REVOKE don't affect global privileges of an
+ existing connection, per the manual.
+ */
+ if (!(thd->main_security_ctx.check_access(GLOBAL_ACLS & ~GRANT_ACL)) &&
+ (0 != strcmp(thd->main_security_ctx.priv_user,
+ thd->security_context()->priv_user) ||
+ 0 != my_strcasecmp(system_charset_info,
+ thd->main_security_ctx.priv_host,
+ thd->security_context()->priv_host)))
+ trace->missing_privilege();
+ return;
+}
+
+void opt_trace_disable_if_no_stored_proc_func_access(THD *thd, sp_head *sp)
+{
+ if (likely(!(thd->variables.optimizer_trace &
+ Opt_trace_context::FLAG_ENABLED)) ||
+ thd->system_thread)
+ return;
+
+ Opt_trace_context *const trace = &thd->opt_trace;
+ if (!thd->trace_started())
+ return;
+ bool full_access;
+ Security_context *const backup_thd_sctx = thd->security_context();
+ thd->set_security_context(&thd->main_security_ctx);
+ const bool rc = check_show_routine_access(thd, sp, &full_access) || !full_access;
+ thd->set_security_context(backup_thd_sctx);
+ if (rc)
+ trace->missing_privilege();
+ return;
+}
+
+/**
+ If tracing is on, checks additional privileges on a list of tables/views,
+ to make sure that the user has the right to do SHOW CREATE TABLE/VIEW and
+ "SELECT *". For that:
+ - this functions checks table-level SELECT
+ - which is sufficient for SHOW CREATE TABLE and "SELECT *", if a base table
+ - if a view, if the view has not been identified as such then
+ opt_trace_disable_if_no_view_access() will be later called and check SHOW
+ VIEW; other we check SHOW VIEW here; SHOW VIEW + SELECT is sufficient for
+ SHOW CREATE VIEW.
+ If a privilege is missing, notifies the trace system.
+
+ @param thd
+ @param tbl list of tables to check
+*/
+
+void opt_trace_disable_if_no_tables_access(THD *thd, TABLE_LIST *tbl)
+{
+ if (likely(!(thd->variables.optimizer_trace &
+ Opt_trace_context::FLAG_ENABLED)) || thd->system_thread)
+ return;
+ Opt_trace_context *const trace = &thd->opt_trace;
+
+ if (!thd->trace_started())
+ return;
+
+ Security_context *const backup_thd_sctx = thd->security_context();
+ thd->set_security_context(&thd->main_security_ctx);
+ const TABLE_LIST *const first_not_own_table = thd->lex->first_not_own_table();
+ for (TABLE_LIST *t = tbl; t != NULL && t != first_not_own_table;
+ t = t->next_global)
+ {
+ /*
+ Anonymous derived tables (as in
+ "SELECT ... FROM (SELECT ...)") don't have their grant.privilege set.
+ */
+ if (!t->is_anonymous_derived_table())
+ {
+ const GRANT_INFO backup_grant_info = t->grant;
+ Security_context *const backup_table_sctx = t->security_ctx;
+ t->security_ctx = NULL;
+ /*
+ (1) check_table_access() fills t->grant.privilege.
+ (2) Because SELECT privileges can be column-based,
+ check_table_access() will return 'false' as long as there is SELECT
+ privilege on one column. But we want a table-level privilege.
+ */
+
+ bool rc =
+ check_table_access(thd, SELECT_ACL, t, false, 1, true) || // (1)
+ ((t->grant.privilege & SELECT_ACL) == 0); // (2)
+ if (t->is_view())
+ {
+ /*
+ It's a view which has already been opened: we are executing a
+ prepared statement. The view has been unfolded in the global list of
+ tables. So underlying tables will be automatically checked in the
+ present function, but we need an explicit check of SHOW VIEW:
+ */
+ rc |= check_table_access(thd, SHOW_VIEW_ACL, t, false, 1, true);
+ }
+ t->security_ctx = backup_table_sctx;
+ t->grant = backup_grant_info;
+ if (rc)
+ {
+ trace->missing_privilege();
+ break;
+ }
+ }
+ }
+ thd->set_security_context(backup_thd_sctx);
+ return;
+}
+
+void opt_trace_disable_if_no_view_access(THD *thd, TABLE_LIST *view,
+ TABLE_LIST *underlying_tables)
+{
+
+ if (likely(!(thd->variables.optimizer_trace &
+ Opt_trace_context::FLAG_ENABLED)) ||
+ thd->system_thread)
+ return;
+ Opt_trace_context *const trace = &thd->opt_trace;
+ if (!thd->trace_started())
+ return;
+
+ Security_context *const backup_table_sctx = view->security_ctx;
+ Security_context *const backup_thd_sctx = thd->security_context();
+ const GRANT_INFO backup_grant_info = view->grant;
+
+ view->security_ctx = NULL; // no SUID context for view
+ // no SUID context for THD
+ thd->set_security_context(&thd->main_security_ctx);
+ const int rc = check_table_access(thd, SHOW_VIEW_ACL, view, false, 1, true);
+
+ view->security_ctx = backup_table_sctx;
+ thd->set_security_context(backup_thd_sctx);
+ view->grant = backup_grant_info;
+
+ if (rc)
+ {
+ trace->missing_privilege();
+ return;
+ }
+ /*
+ We needn't check SELECT privilege on this view. Some
+ opt_trace_disable_if_no_tables_access() call has or will check it.
+
+ Now we check underlying tables/views of our view:
+ */
+ opt_trace_disable_if_no_tables_access(thd, underlying_tables);
+ return;
+}
+
+
+/**
+ @class Opt_trace_stmt
+
+ The trace of one statement.
+*/
+
+class Opt_trace_stmt {
+ public:
+ /**
+ Constructor, starts a trace for information_schema and dbug.
+ @param ctx_arg context
+ */
+ Opt_trace_stmt(Opt_trace_context *ctx_arg)
+ {
+ ctx= ctx_arg;
+ current_json= new Json_writer();
+ missing_priv= false;
+ I_S_disabled= 0;
+ }
+ ~Opt_trace_stmt()
+ {
+ delete current_json;
+ missing_priv= false;
+ ctx= NULL;
+ I_S_disabled= 0;
+ }
+ void set_query(const char *query_ptr, size_t length, const CHARSET_INFO *charset);
+ void open_struct(const char *key, char opening_bracket);
+ void close_struct(const char *saved_key, char closing_bracket);
+ void fill_info(Opt_trace_info* info);
+ void add(const char *key, char *opening_bracket, size_t val_length);
+ Json_writer* get_current_json(){return current_json;}
+ void missing_privilege();
+ void disable_tracing_for_children();
+ void enable_tracing_for_children();
+ bool is_enabled();
+
+ void set_allowed_mem_size(size_t mem_size);
+ size_t get_length() { return current_json->output.length(); }
+ size_t get_truncated_bytes() { return current_json->get_truncated_bytes(); }
+ bool get_missing_priv() { return missing_priv; }
+
+private:
+ Opt_trace_context *ctx;
+ String query; // store the query sent by the user
+ Json_writer *current_json; // stores the trace
+ bool missing_priv; ///< whether user lacks privilege to see this trace
+ uint I_S_disabled;
+};
+
+void Opt_trace_stmt::set_query(const char *query_ptr, size_t length,
+ const CHARSET_INFO *charset)
+{
+ query.append(query_ptr, length, charset);
+}
+
+Json_writer* Opt_trace_context::get_current_json()
+{
+ if (!is_started())
+ return NULL;
+ return current_trace->get_current_json();
+}
+
+void Opt_trace_context::missing_privilege()
+{
+ if (current_trace)
+ current_trace->missing_privilege();
+}
+
+void Opt_trace_context::set_allowed_mem_size(size_t mem_size)
+{
+ current_trace->set_allowed_mem_size(mem_size);
+}
+
+/*
+ TODO: In future when we would be saving multiple trace,
+ this function would return
+ max_mem_size - memory_occupied_by_the_saved_traces
+*/
+
+size_t Opt_trace_context::remaining_mem_size()
+{
+ return max_mem_size;
+}
+
+bool Opt_trace_context::disable_tracing_if_required()
+{
+ if (current_trace)
+ {
+ current_trace->disable_tracing_for_children();
+ return true;
+ }
+ return false;
+}
+
+bool Opt_trace_context::enable_tracing_if_required()
+{
+ if (current_trace)
+ {
+ current_trace->enable_tracing_for_children();
+ return true;
+ }
+ return false;
+}
+
+bool Opt_trace_context::is_enabled()
+{
+ if (current_trace)
+ return current_trace->is_enabled();
+ return false;
+}
+
+Opt_trace_context::Opt_trace_context()
+{
+ current_trace= NULL;
+ inited= FALSE;
+ traces= NULL;
+ max_mem_size= 0;
+}
+Opt_trace_context::~Opt_trace_context()
+{
+ inited= FALSE;
+ /*
+ would be nice to move this to a function
+ */
+ if (traces)
+ {
+ while (traces->elements())
+ {
+ Opt_trace_stmt *prev= traces->at(0);
+ delete prev;
+ traces->del(0);
+ }
+ delete traces;
+ traces= NULL;
+ }
+ max_mem_size= 0;
+}
+
+void Opt_trace_context::set_query(const char *query, size_t length, const CHARSET_INFO *charset)
+{
+ current_trace->set_query(query, length, charset);
+}
+
+void Opt_trace_context::start(THD *thd, TABLE_LIST *tbl,
+ enum enum_sql_command sql_command,
+ const char *query,
+ size_t query_length,
+ const CHARSET_INFO *query_charset,
+ ulong max_mem_size_arg)
+{
+ /*
+ This is done currently because we don't want to have multiple
+ traces open at the same time, so as soon as a new trace is created
+ we forcefully end the previous one, if it has not ended by itself.
+ This would mostly happen with stored functions or procedures.
+
+ TODO: handle multiple traces
+ */
+ DBUG_ASSERT(!current_trace);
+ current_trace= new Opt_trace_stmt(this);
+ max_mem_size= max_mem_size_arg;
+ if (!inited)
+ {
+ traces= new Dynamic_array<Opt_trace_stmt*>();
+ inited= TRUE;
+ }
+ set_allowed_mem_size(remaining_mem_size());
+}
+
+void Opt_trace_context::end()
+{
+ if (current_trace)
+ traces->push(current_trace);
+
+ if (!traces->elements())
+ return;
+ if (traces->elements() > 1)
+ {
+ Opt_trace_stmt *prev= traces->at(0);
+ delete prev;
+ traces->del(0);
+ }
+ current_trace= NULL;
+}
+
+Opt_trace_start::Opt_trace_start(THD *thd, TABLE_LIST *tbl,
+ enum enum_sql_command sql_command,
+ List<set_var_base> *set_vars,
+ const char *query,
+ size_t query_length,
+ const CHARSET_INFO *query_charset):ctx(&thd->opt_trace)
+{
+ /*
+ if optimizer trace is enabled and the statment we have is traceable,
+ then we start the context.
+ */
+ const ulonglong var = thd->variables.optimizer_trace;
+ traceable= FALSE;
+ if (unlikely(var & Opt_trace_context::FLAG_ENABLED) &&
+ sql_command_can_be_traced(sql_command) &&
+ !list_has_optimizer_trace_table(tbl) &&
+ !sets_var_optimizer_trace(sql_command, set_vars) &&
+ !thd->system_thread &&
+ !ctx->disable_tracing_if_required())
+ {
+ ctx->start(thd, tbl, sql_command, query, query_length, query_charset,
+ thd->variables.optimizer_trace_max_mem_size);
+ ctx->set_query(query, query_length, query_charset);
+ traceable= TRUE;
+ opt_trace_disable_if_no_tables_access(thd, tbl);
+ }
+}
+
+Opt_trace_start::~Opt_trace_start()
+{
+ if (traceable)
+ {
+ ctx->end();
+ traceable= FALSE;
+ }
+ else
+ {
+ ctx->enable_tracing_if_required();
+ }
+}
+
+void Opt_trace_stmt::fill_info(Opt_trace_info* info)
+{
+ if (unlikely(info->missing_priv = get_missing_priv()))
+ {
+ info->trace_ptr = info->query_ptr = "";
+ info->trace_length = info->query_length = 0;
+ info->query_charset = &my_charset_bin;
+ info->missing_bytes = 0;
+ }
+ else
+ {
+ info->trace_ptr = current_json->output.get_string()->ptr();
+ info->trace_length = get_length();
+ info->query_ptr = query.ptr();
+ info->query_length = query.length();
+ info->query_charset = query.charset();
+ info->missing_bytes = get_truncated_bytes();
+ info->missing_priv= get_missing_priv();
+ }
+}
+
+void Opt_trace_stmt::missing_privilege()
+{
+ missing_priv= true;
+}
+
+void Opt_trace_stmt::disable_tracing_for_children()
+{
+ ++I_S_disabled;
+}
+
+void Opt_trace_stmt::enable_tracing_for_children()
+{
+ if (I_S_disabled)
+ --I_S_disabled;
+}
+
+bool Opt_trace_stmt::is_enabled()
+{
+ return I_S_disabled == 0;
+}
+
+void Opt_trace_stmt::set_allowed_mem_size(size_t mem_size)
+{
+ current_json->set_size_limit(mem_size);
+}
+
+/*
+ Prefer this when you are iterating over JOIN_TABs
+*/
+
+void Json_writer::add_table_name(const JOIN_TAB *tab)
+{
+ if (tab != NULL)
+ {
+ char table_name_buffer[SAFE_NAME_LEN];
+ if (tab->table && tab->table->derived_select_number)
+ {
+ /* Derived table name generation */
+ size_t len= my_snprintf(table_name_buffer, sizeof(table_name_buffer)-1,
+ "<derived%u>",
+ tab->table->derived_select_number);
+ add_str(table_name_buffer, len);
+ }
+ else if (tab->bush_children)
+ {
+ JOIN_TAB *ctab= tab->bush_children->start;
+ size_t len= my_snprintf(table_name_buffer,
+ sizeof(table_name_buffer)-1,
+ "<subquery%d>",
+ ctab->emb_sj_nest->sj_subq_pred->get_identifier());
+ add_str(table_name_buffer, len);
+ }
+ else
+ {
+ TABLE_LIST *real_table= tab->table->pos_in_table_list;
+ add_str(real_table->alias.str, real_table->alias.length);
+ }
+ }
+ else
+ DBUG_ASSERT(0);
+}
+
+void Json_writer::add_table_name(const TABLE *table)
+{
+ add_str(table->pos_in_table_list->alias.str);
+}
+
+
+void add_table_scan_values_to_trace(THD *thd, JOIN_TAB *tab)
+{
+ Json_writer_object table_records(thd);
+ table_records.add_table_name(tab);
+ Json_writer_object table_rec(thd, "table_scan");
+ table_rec.add("rows", tab->found_records)
+ .add("cost", tab->read_time);
+}
+/*
+ Introduce enum_query_type flags parameter, maybe also allow
+ EXPLAIN also use this function.
+*/
+
+void Json_writer::add_str(Item *item)
+{
+ if (item)
+ {
+ THD *thd= current_thd;
+ char buff[256];
+ String str(buff, sizeof(buff), system_charset_info);
+ str.length(0);
+
+ ulonglong save_option_bits= thd->variables.option_bits;
+ thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE;
+ item->print(&str,
+ enum_query_type(QT_TO_SYSTEM_CHARSET | QT_SHOW_SELECT_NUMBER
+ | QT_ITEM_IDENT_SKIP_DB_NAMES));
+ thd->variables.option_bits= save_option_bits;
+ add_str(str.c_ptr_safe());
+ }
+ else
+ add_null();
+}
+
+void Opt_trace_context::flush_optimizer_trace()
+{
+ inited= false;
+ if (traces)
+ {
+ while (traces->elements())
+ {
+ Opt_trace_stmt *prev= traces->at(0);
+ delete prev;
+ traces->del(0);
+ }
+ delete traces;
+ traces= NULL;
+ }
+}
+
+
+int fill_optimizer_trace_info(THD *thd, TABLE_LIST *tables, Item *)
+{
+ TABLE *table = tables->table;
+ Opt_trace_info info;
+
+ /* get_values of trace, query , missing bytes and missing_priv
+
+ @todo: Need an iterator here to walk over all the traces
+ */
+ Opt_trace_context* ctx= &thd->opt_trace;
+
+ if (thd->opt_trace.empty())
+ {
+ Opt_trace_stmt *stmt= ctx->get_top_trace();
+ stmt->fill_info(&info);
+
+ table->field[0]->store(info.query_ptr, static_cast<uint>(info.query_length),
+ info.query_charset);
+ table->field[1]->store(info.trace_ptr, static_cast<uint>(info.trace_length),
+ system_charset_info);
+ table->field[2]->store(info.missing_bytes, true);
+ table->field[3]->store(info.missing_priv, true);
+ // Store in IS
+ if (schema_table_store_record(thd, table))
+ return 1;
+ }
+ return 0;
+}
diff --git a/sql/opt_trace.h b/sql/opt_trace.h
new file mode 100644
index 00000000000..0e2d0146a49
--- /dev/null
+++ b/sql/opt_trace.h
@@ -0,0 +1,201 @@
+#ifndef OPT_TRACE_INCLUDED
+#define OPT_TRACE_INCLUDED
+/* This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "opt_trace_context.h" // Opt_trace_context
+#include "sql_lex.h"
+#include "my_json_writer.h"
+#include "sql_select.h"
+class Item;
+class THD;
+struct TABLE_LIST;
+
+class Opt_trace_stmt;
+
+/*
+ User-visible information about a trace.
+*/
+
+struct Opt_trace_info
+{
+ /**
+ String containing trace.
+ If trace has been end()ed, this is 0-terminated, which is only to aid
+ debugging or unit testing; this property is not relied upon in normal
+ server usage.
+ If trace has not been ended, this is not 0-terminated. That rare case can
+ happen when a substatement reads OPTIMIZER_TRACE (at that stage, the top
+ statement is still executing so its trace is not ended yet, but may still
+ be read by the sub-statement).
+ */
+ const char *trace_ptr;
+ size_t trace_length;
+ //// String containing original query.
+ const char *query_ptr;
+ size_t query_length;
+ const CHARSET_INFO *query_charset; ///< charset of query string
+ /**
+ How many bytes this trace is missing (for traces which were truncated
+ because of @@@@optimizer-trace-max-mem-size).
+ The trace is not extended beyond trace-max-mem-size.
+ */
+ size_t missing_bytes;
+ /*
+ Whether user lacks privilege to see this trace.
+ If this is set to TRUE, then we return an empty trace
+ */
+ bool missing_priv;
+};
+
+/**
+ Instantiate this class to start tracing a THD's actions (generally at a
+ statement's start), and to set the "original" query (not transformed, as
+ sent by client) for the new trace. Destructor will end the trace.
+
+ @param thd the THD
+ @param tbl list of tables read/written by the statement.
+ @param sql_command SQL command being prepared or executed
+ @param set_vars what variables are set by this command (only used if
+ sql_command is SQLCOM_SET_OPTION)
+ @param query query
+ @param length query's length
+ @param charset charset which was used to encode this query
+*/
+
+
+class Opt_trace_start {
+ public:
+ Opt_trace_start(THD *thd_arg, TABLE_LIST *tbl,
+ enum enum_sql_command sql_command,
+ List<set_var_base> *set_vars,
+ const char *query,
+ size_t query_length,
+ const CHARSET_INFO *query_charset);
+ ~Opt_trace_start();
+
+ private:
+ Opt_trace_context *const ctx;
+ /*
+ True: the query will be traced
+ False: otherwise
+ */
+ bool traceable;
+};
+
+/**
+ Prints SELECT query to optimizer trace. It is not the original query (as in
+ @c Opt_trace_context::set_query()) but a printout of the parse tree
+ (Item-s).
+ @param thd the THD
+ @param select_lex query's parse tree
+ @param trace_object Json_writer object to which the query will be added
+*/
+void opt_trace_print_expanded_query(THD *thd, SELECT_LEX *select_lex,
+ Json_writer_object *trace_object);
+
+void add_table_scan_values_to_trace(THD *thd, JOIN_TAB *tab);
+
+/*
+ Security related (need to add a proper comment here)
+*/
+
+/**
+ If the security context is not that of the connected user, inform the trace
+ system that a privilege is missing. With one exception: see below.
+
+ @param thd
+
+ This serves to eliminate the following issue.
+ Any information readable by a SELECT may theoretically end up in
+ the trace. And a SELECT may read information from other places than tables:
+ - from views (reading their bodies)
+ - from stored routines (reading their bodies)
+ - from files (reading their content), with LOAD_FILE()
+ - from the list of connections (reading their queries...), with
+ I_S.PROCESSLIST.
+ If the connected user has EXECUTE privilege on a routine which does a
+ security context change, the routine can retrieve information internally
+ (if allowed by the SUID context's privileges), and present only a portion
+ of it to the connected user. But with tracing on, all information is
+ possibly in the trace. So the connected user receives more information than
+ the routine's definer intended to provide. Fixing this issue would require
+ adding, near many privilege checks in the server, a new
+ optimizer-trace-specific check done against the connected user's context,
+ to verify that the connected user has the right to see the retrieved
+ information.
+
+ Instead, our chosen simpler solution is that if we see a security context
+ change where SUID user is not the connected user, we disable tracing. With
+ only one safe exception: if the connected user has all global privileges
+ (because then she/he can find any information anyway). By "all global
+ privileges" we mean everything but WITH GRANT OPTION (that latter one isn't
+ related to information gathering).
+
+ Read access to I_S.OPTIMIZER_TRACE by another user than the connected user
+ is restricted: @see fill_optimizer_trace_info().
+*/
+void opt_trace_disable_if_no_security_context_access(THD *thd);
+
+void opt_trace_disable_if_no_tables_access(THD *thd, TABLE_LIST *tbl);
+
+/**
+ If tracing is on, checks additional privileges for a view, to make sure
+ that the user has the right to do SHOW CREATE VIEW. For that:
+ - this function checks SHOW VIEW
+ - SELECT is tested in opt_trace_disable_if_no_tables_access()
+ - SELECT + SHOW VIEW is sufficient for SHOW CREATE VIEW.
+ We also check underlying tables.
+ If a privilege is missing, notifies the trace system.
+ This function should be called when the view's underlying tables have not
+ yet been merged.
+
+ @param thd THD context
+ @param view view to check
+ @param underlying_tables underlying tables/views of 'view'
+ */
+
+void opt_trace_disable_if_no_view_access(THD *thd, TABLE_LIST *view,
+ TABLE_LIST *underlying_tables);
+
+/**
+ If tracing is on, checks additional privileges on a stored routine, to make
+ sure that the user has the right to do SHOW CREATE PROCEDURE/FUNCTION. For
+ that, we use the same checks as in those SHOW commands.
+ If a privilege is missing, notifies the trace system.
+
+ This function is not redundant with
+ opt_trace_disable_if_no_security_context_access().
+ Indeed, for a SQL SECURITY INVOKER routine, there is no context change, but
+ we must still verify that the invoker can do SHOW CREATE.
+
+ For triggers, see note in sp_head::execute_trigger().
+
+ @param thd
+ @param sp routine to check
+ */
+void opt_trace_disable_if_no_stored_proc_func_access(THD *thd, sp_head *sp);
+
+/**
+ Fills information_schema.OPTIMIZER_TRACE with rows (one per trace)
+ @retval 0 ok
+ @retval 1 error
+*/
+int fill_optimizer_trace_info(THD *thd, TABLE_LIST *tables, Item *);
+
+#define OPT_TRACE_TRANSFORM(writer, object_level0, object_level1, \
+ select_number, from, to) \
+ Json_writer_object object_level0(writer); \
+ Json_writer_object object_level1(writer, "transformation"); \
+ object_level1.add_select_number(select_number).add("from", from).add("to", to);
+#endif \ No newline at end of file
diff --git a/sql/opt_trace_context.h b/sql/opt_trace_context.h
new file mode 100644
index 00000000000..87317f67e22
--- /dev/null
+++ b/sql/opt_trace_context.h
@@ -0,0 +1,92 @@
+#ifndef OPT_TRACE_CONTEXT_INCLUDED
+#define OPT_TRACE_CONTEXT_INCLUDED
+
+#include "sql_array.h"
+
+class Opt_trace_stmt;
+
+class Opt_trace_context
+{
+public:
+ Opt_trace_context();
+ ~Opt_trace_context();
+
+ void start(THD *thd, TABLE_LIST *tbl,
+ enum enum_sql_command sql_command,
+ const char *query,
+ size_t query_length,
+ const CHARSET_INFO *query_charset,
+ ulong max_mem_size_arg);
+ void end();
+ void set_query(const char *query, size_t length, const CHARSET_INFO *charset);
+ void flush_optimizer_trace();
+ void set_allowed_mem_size(size_t mem_size);
+ size_t remaining_mem_size();
+
+private:
+ Opt_trace_stmt* top_trace()
+ {
+ return *(traces->front());
+ }
+
+public:
+
+ /*
+ This returns the top trace from the list of traces. This function
+ is used when we want to see the contents of the INFORMATION_SCHEMA.OPTIMIZER_TRACE
+ table.
+ */
+
+ Opt_trace_stmt* get_top_trace()
+ {
+ if (!traces || !traces->elements())
+ return NULL;
+ return top_trace();
+ }
+
+ /*
+ This returns the current trace, to which we are still writing and has not been finished
+ */
+
+ Json_writer* get_current_json();
+
+ bool empty()
+ {
+ return traces && (static_cast<uint>(traces->elements()) != 0);
+ }
+
+ bool is_started()
+ {
+ return current_trace && is_enabled();
+ }
+
+ bool disable_tracing_if_required();
+
+ bool enable_tracing_if_required();
+
+ bool is_enabled();
+
+ void missing_privilege();
+
+ static const char *flag_names[];
+ enum
+ {
+ FLAG_DEFAULT = 0,
+ FLAG_ENABLED = 1 << 0
+ };
+
+private:
+ /*
+ List of traces (currently it stores only 1 trace)
+ */
+ Dynamic_array<Opt_trace_stmt*> *traces;
+ Opt_trace_stmt *current_trace;
+ /*
+ TRUE: if we allocate memory for list of traces
+ FALSE: otherwise
+ */
+ bool inited;
+ size_t max_mem_size;
+};
+
+#endif /* OPT_TRACE_CONTEXT_INCLUDED */
diff --git a/sql/set_var.h b/sql/set_var.h
index 6097b28e76f..8a82e07fd6b 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -275,6 +275,10 @@ public:
virtual int update(THD *thd)=0; /* To set the value */
virtual int light_check(THD *thd) { return check(thd); } /* for PS */
virtual bool is_system() { return FALSE; }
+ /**
+ @returns whether this variable is @@@@optimizer_trace.
+ */
+ virtual bool is_var_optimizer_trace() const { return false; }
};
@@ -306,6 +310,11 @@ public:
int check(THD *thd);
int update(THD *thd);
int light_check(THD *thd);
+ virtual bool is_var_optimizer_trace() const
+ {
+ extern sys_var *Sys_optimizer_trace_ptr;
+ return var == Sys_optimizer_trace_ptr;
+ }
};
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
index 8345a9efe61..11c1234e2db 100644
--- a/sql/sp_head.cc
+++ b/sql/sp_head.cc
@@ -29,6 +29,8 @@
#include "sql_derived.h" // mysql_handle_derived
#include "sql_cte.h"
#include "sql_select.h" // Virtual_tmp_table
+#include "opt_trace.h"
+#include "my_json_writer.h"
#ifdef USE_PRAGMA_IMPLEMENTATION
#pragma implementation
@@ -1146,6 +1148,8 @@ sp_head::execute(THD *thd, bool merge_da_on_success)
if (check_stack_overrun(thd, 7 * STACK_MIN_SIZE, (uchar*)&old_packet))
DBUG_RETURN(TRUE);
+ opt_trace_disable_if_no_security_context_access(thd);
+
/* init per-instruction memroot */
init_sql_alloc(&execute_mem_root, "per_instruction_memroot",
MEM_ROOT_BLOCK_SIZE, 0, MYF(0));
@@ -1982,6 +1986,7 @@ sp_head::execute_function(THD *thd, Item **argp, uint argcount,
thd->variables.option_bits&= ~OPTION_BIN_LOG;
}
+ opt_trace_disable_if_no_stored_proc_func_access(thd, this);
/*
Switch to call arena/mem_root so objects like sp_cursor or
Item_cache holders for case expressions can be allocated on it.
@@ -2272,6 +2277,7 @@ sp_head::execute_procedure(THD *thd, List<Item> *args)
err_status= set_routine_security_ctx(thd, this, &save_security_ctx);
#endif
+ opt_trace_disable_if_no_stored_proc_func_access(thd, this);
if (!err_status)
{
err_status= execute(thd, TRUE);
@@ -3297,6 +3303,13 @@ sp_lex_keeper::reset_lex_and_exec_core(THD *thd, uint *nextp,
thd->lex->safe_to_cache_query= 0;
#endif
+ Opt_trace_start ots(thd, m_lex->query_tables,
+ SQLCOM_SELECT, &m_lex->var_list,
+ NULL, 0,
+ thd->variables.character_set_client);
+
+ Json_writer_object trace_command(thd);
+ Json_writer_array trace_command_steps(thd, "steps");
if (open_tables)
res= check_dependencies_in_with_clauses(m_lex->with_clauses_list) ||
instr->exec_open_and_lock_tables(thd, m_lex->query_tables);
diff --git a/sql/sp_head.h b/sql/sp_head.h
index 8db6ecac9e7..f59da93d8aa 100644
--- a/sql/sp_head.h
+++ b/sql/sp_head.h
@@ -2024,6 +2024,7 @@ private:
}; // class sp_instr_set_case_expr : public sp_instr_opt_meta
+bool check_show_routine_access(THD *thd, sp_head *sp, bool *full_access);
#ifndef NO_EMBEDDED_ACCESS_CHECKS
bool
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 81db3c286ad..97e3bde97dd 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -71,6 +71,7 @@
#include "wsrep_thd.h"
#include "wsrep_trans_observer.h"
#endif /* WITH_WSREP */
+#include "opt_trace.h"
#ifdef HAVE_SYS_SYSCALL_H
#include <sys/syscall.h>
@@ -1410,6 +1411,7 @@ void THD::change_user(void)
sp_cache_clear(&sp_func_cache);
sp_cache_clear(&sp_package_spec_cache);
sp_cache_clear(&sp_package_body_cache);
+ opt_trace.flush_optimizer_trace();
}
/**
@@ -2187,6 +2189,11 @@ void THD::reset_globals()
net.thd= 0;
}
+bool THD::trace_started()
+{
+ return opt_trace.is_started();
+}
+
/*
Cleanup after query.
@@ -4337,6 +4344,13 @@ bool Security_context::set_user(char *user_arg)
return user == 0;
}
+bool Security_context::check_access(ulong want_access, bool match_any)
+{
+ DBUG_ENTER("Security_context::check_access");
+ DBUG_RETURN((match_any ? (master_access & want_access)
+ : ((master_access & want_access) == want_access)));
+}
+
#ifndef NO_EMBEDDED_ACCESS_CHECKS
/**
Initialize this security context from the passed in credentials
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 402a114aadd..d6da0566bbe 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -28,6 +28,7 @@
#include "rpl_tblmap.h"
#include "mdl.h"
#include "field.h" // Create_field
+#include "opt_trace_context.h"
#include "probes_mysql.h"
#include "sql_locale.h" /* my_locale_st */
#include "sql_profile.h" /* PROFILING */
@@ -569,6 +570,8 @@ typedef struct system_variables
ulonglong long_query_time;
ulonglong max_statement_time;
ulonglong optimizer_switch;
+ ulonglong optimizer_trace;
+ ulong optimizer_trace_max_mem_size;
sql_mode_t sql_mode; ///< which non-standard SQL behaviour should be enabled
sql_mode_t old_behavior; ///< which old SQL behaviour should be enabled
ulonglong option_bits; ///< OPTION_xxx constants, e.g. OPTION_PROFILING
@@ -1350,6 +1353,14 @@ public:
restore_security_context(THD *thd, Security_context *backup);
#endif
bool user_matches(Security_context *);
+ /**
+ Check global access
+ @param want_access The required privileges
+ @param match_any if the security context must match all or any of the req.
+ * privileges.
+ @return True if the security context fulfills the access requirements.
+ */
+ bool check_access(ulong want_access, bool match_any = false);
};
@@ -2306,6 +2317,8 @@ public:
Security_context main_security_ctx;
Security_context *security_ctx;
+ Security_context *security_context() const { return security_ctx; }
+ void set_security_context(Security_context *sctx) { security_ctx = sctx; }
/*
Points to info-string that we show in SHOW PROCESSLIST
@@ -2989,6 +3002,7 @@ public:
ulonglong bytes_sent_old;
ulonglong affected_rows; /* Number of changed rows */
+ Opt_trace_context opt_trace;
pthread_t real_id; /* For debugging */
my_thread_id thread_id, thread_dbug_id;
uint32 os_thread_id;
@@ -3297,6 +3311,7 @@ public:
void reset_for_reuse();
bool store_globals();
void reset_globals();
+ bool trace_started();
#ifdef SIGNAL_WITH_VIO_CLOSE
inline void set_active_vio(Vio* vio)
{
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 878aa715b84..28d28ca2425 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -32,6 +32,7 @@
#include "sql_acl.h" // SELECT_ACL
#include "sql_class.h"
#include "sql_cte.h"
+#include "my_json_writer.h"
typedef bool (*dt_processor)(THD *thd, LEX *lex, TABLE_LIST *derived);
@@ -198,6 +199,7 @@ mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases)
if ((res= (*processors[phase])(lex->thd, lex, derived)))
break;
}
+
lex->thd->derived_tables_processing= FALSE;
DBUG_RETURN(res);
}
@@ -368,6 +370,7 @@ bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived)
DBUG_PRINT("enter", ("Alias: '%s' Unit: %p",
(derived->alias.str ? derived->alias.str : "<NULL>"),
derived->get_unit()));
+ const char *cause= NULL;
if (derived->merged)
{
@@ -379,6 +382,7 @@ bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived)
if (dt_select->uncacheable & UNCACHEABLE_RAND)
{
/* There is random function => fall back to materialization. */
+ cause= "Random function in the select";
derived->change_refs_to_fields();
derived->set_materialized_derived();
DBUG_RETURN(FALSE);
@@ -401,15 +405,11 @@ bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived)
and small subqueries, and the bigger one can't be merged it wouldn't
block the smaller one.
*/
- if (parent_lex->get_free_table_map(&map, &tablenr))
- {
- /* There is no enough table bits, fall back to materialization. */
- goto unconditional_materialization;
- }
-
- if (dt_select->leaf_tables.elements + tablenr > MAX_TABLES)
+ if (parent_lex->get_free_table_map(&map, &tablenr) ||
+ dt_select->leaf_tables.elements + tablenr > MAX_TABLES)
{
/* There is no enough table bits, fall back to materialization. */
+ cause= "Not enough table bits to merge subquery";
goto unconditional_materialization;
}
@@ -486,6 +486,24 @@ exit_merge:
DBUG_RETURN(res);
unconditional_materialization:
+
+ if (unlikely(thd->trace_started()))
+ {
+ /*
+ Add to the optimizer trace the change in choice for merged
+ derived tables/views to materialised ones.
+ */
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_derived(thd, derived->is_derived() ?
+ "derived" : "view");
+ trace_derived.add("table", derived->alias.str ? derived->alias.str : "<NULL>")
+ .add_select_number(derived->get_unit()->
+ first_select()->select_number)
+ .add("initial_choice", "merged")
+ .add("final_choice", "materialized")
+ .add("cause", cause);
+ }
+
derived->change_refs_to_fields();
derived->set_materialized_derived();
if (!derived->table || !derived->table->is_created())
@@ -654,7 +672,6 @@ bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived)
DBUG_ENTER("mysql_derived_prepare");
DBUG_PRINT("enter", ("unit: %p table_list: %p alias: '%s'",
unit, derived, derived->alias.str));
-
if (!unit)
DBUG_RETURN(FALSE);
@@ -747,6 +764,22 @@ bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived)
}
}
+ if (unlikely(thd->trace_started()))
+ {
+ /*
+ Add to optimizer trace whether a derived table/view
+ is merged into the parent select or not.
+ */
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_derived(thd, derived->is_derived() ?
+ "derived" : "view");
+ trace_derived.add("table", derived->alias.str ? derived->alias.str : "<NULL>")
+ .add_select_number(derived->get_unit()->first_select()->select_number);
+ if (derived->is_materialized_derived())
+ trace_derived.add("materialized", true);
+ if (derived->is_merged_derived())
+ trace_derived.add("merged", true);
+ }
/*
Above cascade call of prepare is important for PS protocol, but after it
is called we can check if we really need prepare for this derived
diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc
index 1c45b05ccc5..022a8beb6d6 100644
--- a/sql/sql_explain.cc
+++ b/sql/sql_explain.cc
@@ -233,7 +233,7 @@ void Explain_query::print_explain_json(select_result_sink *output,
CHARSET_INFO *cs= system_charset_info;
List<Item> item_list;
- String *buf= &writer.output;
+ const String *buf= writer.output.get_string();
item_list.push_back(new (thd->mem_root)
Item_string(thd, buf->ptr(), buf->length(), cs),
thd->mem_root);
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 7b6029aaa12..25dfdbcbc71 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -100,6 +100,7 @@
#include "set_var.h"
#include "sql_bootstrap.h"
#include "sql_sequence.h"
+#include "opt_trace.h"
#include "my_json_writer.h"
@@ -3412,6 +3413,13 @@ mysql_execute_command(THD *thd)
#ifdef HAVE_REPLICATION
} /* endif unlikely slave */
#endif
+ Opt_trace_start ots(thd, all_tables, lex->sql_command, &lex->var_list,
+ thd->query(), thd->query_length(),
+ thd->variables.character_set_client);
+
+ Json_writer_object trace_command(thd);
+ Json_writer_array trace_command_steps(thd, "steps");
+
#ifdef WITH_WSREP
if (WSREP(thd))
{
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index c8cc64dba7e..6c3ad9c6924 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -112,6 +112,7 @@ When one supplies long data for a placeholder:
#include "sp_cache.h"
#include "sql_handler.h" // mysql_ha_rm_tables
#include "probes_mysql.h"
+#include "opt_trace.h"
#ifdef EMBEDDED_LIBRARY
/* include MYSQL_BIND headers */
#include <mysql.h>
@@ -2273,6 +2274,17 @@ static bool check_prepared_statement(Prepared_statement *stmt)
lex->first_select_lex()->context.resolve_in_table_list_only(select_lex->
get_table_list());
+ /*
+ For the optimizer trace, this is the symmetric, for statement preparation,
+ of what is done at statement execution (in mysql_execute_command()).
+ */
+ Opt_trace_start ots(thd, tables, lex->sql_command, &lex->var_list,
+ thd->query(), thd->query_length(),
+ thd->variables.character_set_client);
+
+ Json_writer_object trace_command(thd);
+ Json_writer_array trace_command_steps(thd, "steps");
+
/* Reset warning count for each query that uses tables */
if (tables)
thd->get_stmt_da()->opt_clear_warning_info(thd->query_id);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 1478dd93af4..06205a46f40 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -64,6 +64,8 @@
#include "sys_vars_shared.h"
#include "sp_head.h"
#include "sp_rcontext.h"
+#include "my_json_writer.h"
+#include "opt_trace.h"
/*
A key part number that means we're using a fulltext scan.
@@ -295,6 +297,8 @@ void set_postjoin_aggr_write_func(JOIN_TAB *tab);
static Item **get_sargable_cond(JOIN *join, TABLE *table);
+static void trace_plan_prefix(JOIN *join, uint idx, table_map remaining_tables);
+
#ifndef DBUG_OFF
/*
@@ -345,6 +349,40 @@ bool dbug_user_var_equals_int(THD *thd, const char *name, int value)
}
#endif
+static void trace_table_dependencies(THD *thd,
+ JOIN_TAB *join_tabs, uint table_count)
+{
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_array trace_dep(thd, "table_dependencies");
+ for (uint i = 0; i < table_count; i++)
+ {
+ TABLE_LIST *table_ref = join_tabs[i].tab_list;
+ Json_writer_object trace_one_table(thd);
+ trace_one_table.add_table_name(&join_tabs[i]);
+ trace_one_table.add("row_may_be_null",
+ (bool)table_ref->table->maybe_null);
+ const table_map map = table_ref->get_map();
+ DBUG_ASSERT(map < (1ULL << table_count));
+ for (uint j = 0; j < table_count; j++)
+ {
+ if (map & (1ULL << j))
+ {
+ trace_one_table.add("map_bit", static_cast<longlong>(j));
+ break;
+ }
+ }
+ Json_writer_array depends_on(thd, "depends_on_map_bits");
+ static_assert(sizeof(table_ref->get_map()) <= 64,
+ "RAND_TABLE_BIT may be in join_tabs[i].dependent, so we test "
+ "all 64 bits.");
+ for (uint j = 0; j < 64; j++)
+ {
+ if (join_tabs[i].dependent & (1ULL << j))
+ depends_on.add(static_cast<longlong>(j));
+ }
+ }
+}
+
/**
This handles SELECT with and without UNION.
@@ -994,6 +1032,11 @@ JOIN::prepare(TABLE_LIST *tables_init,
join_list= &select_lex->top_join_list;
union_part= unit_arg->is_unit_op();
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_prepare(thd, "join_preparation");
+ trace_prepare.add_select_number(select_lex->select_number);
+ Json_writer_array trace_steps(thd, "steps");
+
// simple check that we got usable conds
dbug_print_item(conds);
@@ -1336,6 +1379,11 @@ JOIN::prepare(TABLE_LIST *tables_init,
}
}
+ {
+ Json_writer_object trace_wrapper(thd);
+ opt_trace_print_expanded_query(thd, select_lex, &trace_wrapper);
+ }
+
if (!procedure && result && result->prepare(fields_list, unit_arg))
goto err; /* purecov: inspected */
@@ -1517,6 +1565,11 @@ JOIN::optimize_inner()
set_allowed_join_cache_types();
need_distinct= TRUE;
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_prepare(thd, "join_optimization");
+ trace_prepare.add_select_number(select_lex->select_number);
+ Json_writer_array trace_steps(thd, "steps");
+
/*
Needed in case optimizer short-cuts,
set properly in make_aggr_tables_info()
@@ -3914,6 +3967,12 @@ void JOIN::exec_inner()
limit in order to produce the partial query result stored in the
UNION temp table.
*/
+
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_exec(thd, "join_execution");
+ trace_exec.add_select_number(select_lex->select_number);
+ Json_writer_array trace_steps(thd, "steps");
+
if (!select_lex->outer_select() && // (1)
select_lex != select_lex->master_unit()->fake_select_lex) // (2)
thd->lex->set_limit_rows_examined();
@@ -4464,6 +4523,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
SARGABLE_PARAM *sargables= 0;
List_iterator<TABLE_LIST> ti(tables_list);
TABLE_LIST *tables;
+ THD *thd= join->thd;
DBUG_ENTER("make_join_statistics");
table_count=join->table_count;
@@ -4659,9 +4719,12 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
}
}
+ if (thd->trace_started())
+ trace_table_dependencies(thd, stat, join->table_count);
+
if (join->conds || outer_join)
{
- if (update_ref_and_keys(join->thd, keyuse_array, stat, join->table_count,
+ if (update_ref_and_keys(thd, keyuse_array, stat, join->table_count,
join->conds, ~outer_join, join->select_lex, &sargables))
goto error;
/*
@@ -4673,10 +4736,12 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
((Item_in_subselect*)join->unit->item)->test_strategy(SUBS_IN_TO_EXISTS));
if (keyuse_array->elements &&
- sort_and_filter_keyuse(join->thd, keyuse_array,
+ sort_and_filter_keyuse(thd, keyuse_array,
skip_unprefixed_keyparts))
goto error;
DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
+ if (thd->trace_started())
+ print_keyuse_array_for_trace(thd, keyuse_array);
}
join->const_table_map= no_rows_const_tables;
@@ -4961,143 +5026,160 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
/* Calc how many (possible) matched records in each table */
- for (s=stat ; s < stat_end ; s++)
+ /*
+ Todo: add a function so that we can add these Json_writer_objects
+ easily.
+ Another way would be to enclose them in a scope {};
+ */
{
- s->startup_cost= 0;
- if (s->type == JT_SYSTEM || s->type == JT_CONST)
- {
- /* Only one matching row */
- s->found_records= s->records= 1;
- s->read_time=1.0;
- s->worst_seeks=1.0;
- continue;
- }
- /* Approximate found rows and time to read them */
- if (s->table->is_filled_at_execution())
- {
- get_delayed_table_estimates(s->table, &s->records, &s->read_time,
- &s->startup_cost);
- s->found_records= s->records;
- table->quick_condition_rows=s->records;
- }
- else
- {
- s->scan_time();
- }
-
- if (s->table->is_splittable())
- s->add_keyuses_for_splitting();
+ Json_writer_object rows_estimation_wrapper(thd);
+ Json_writer_array rows_estimation(thd, "rows_estimation");
+ for (s=stat ; s < stat_end ; s++)
+ {
+ s->startup_cost= 0;
+ if (s->type == JT_SYSTEM || s->type == JT_CONST)
+ {
+
+ Json_writer_object table_records(thd);
+ /* Only one matching row */
+ s->found_records= s->records= 1;
+ s->read_time=1.0;
+ s->worst_seeks=1.0;
+ table_records.add_table_name(s)
+ .add("rows", s->found_records)
+ .add("cost", s->read_time)
+ .add("table_type", s->type == JT_CONST ?
+ "const" :
+ "system");
+ continue;
+ }
+ /* Approximate found rows and time to read them */
+ if (s->table->is_filled_at_execution())
+ {
+ get_delayed_table_estimates(s->table, &s->records, &s->read_time,
+ &s->startup_cost);
+ s->found_records= s->records;
+ table->quick_condition_rows=s->records;
+ }
+ else
+ s->scan_time();
- /*
- Set a max range of how many seeks we can expect when using keys
- This is can't be to high as otherwise we are likely to use
- table scan.
- */
- s->worst_seeks= MY_MIN((double) s->found_records / 10,
- (double) s->read_time*3);
- if (s->worst_seeks < 2.0) // Fix for small tables
- s->worst_seeks=2.0;
+ if (s->table->is_splittable())
+ s->add_keyuses_for_splitting();
- /*
- Add to stat->const_keys those indexes for which all group fields or
- all select distinct fields participate in one index.
- */
- add_group_and_distinct_keys(join, s);
+ /*
+ Set a max range of how many seeks we can expect when using keys
+ This is can't be to high as otherwise we are likely to use
+ table scan.
+ */
+ s->worst_seeks= MY_MIN((double) s->found_records / 10,
+ (double) s->read_time*3);
+ if (s->worst_seeks < 2.0) // Fix for small tables
+ s->worst_seeks=2.0;
- s->table->cond_selectivity= 1.0;
-
- /*
- Perform range analysis if there are keys it could use (1).
- Don't do range analysis for materialized subqueries (2).
- Don't do range analysis for materialized derived tables (3)
- */
- if ((!s->const_keys.is_clear_all() ||
- !bitmap_is_clear_all(&s->table->cond_set)) && // (1)
- !s->table->is_filled_at_execution() && // (2)
- !(s->table->pos_in_table_list->derived && // (3)
- s->table->pos_in_table_list->is_materialized_derived())) // (3)
- {
- bool impossible_range= FALSE;
- ha_rows records= HA_POS_ERROR;
- SQL_SELECT *select= 0;
- Item **sargable_cond= NULL;
- if (!s->const_keys.is_clear_all())
- {
- sargable_cond= get_sargable_cond(join, s->table);
-
- select= make_select(s->table, found_const_table_map,
- found_const_table_map,
- *sargable_cond,
- (SORT_INFO*) 0,
- 1, &error);
- if (!select)
- goto error;
- records= get_quick_record_count(join->thd, select, s->table,
- &s->const_keys, join->row_limit);
+ /*
+ Add to stat->const_keys those indexes for which all group fields or
+ all select distinct fields participate in one index.
+ */
+ add_group_and_distinct_keys(join, s);
- /*
- Range analyzer might have modified the condition. Put it the new
- condition to where we got it from.
- */
- *sargable_cond= select->cond;
+ s->table->cond_selectivity= 1.0;
- s->quick=select->quick;
- s->needed_reg=select->needed_reg;
- select->quick=0;
- impossible_range= records == 0 && s->table->reginfo.impossible_range;
- }
- if (!impossible_range)
- {
- if (!sargable_cond)
+ /*
+ Perform range analysis if there are keys it could use (1).
+ Don't do range analysis for materialized subqueries (2).
+ Don't do range analysis for materialized derived tables (3)
+ */
+ if ((!s->const_keys.is_clear_all() ||
+ !bitmap_is_clear_all(&s->table->cond_set)) && // (1)
+ !s->table->is_filled_at_execution() && // (2)
+ !(s->table->pos_in_table_list->derived && // (3)
+ s->table->pos_in_table_list->is_materialized_derived())) // (3)
+ {
+ bool impossible_range= FALSE;
+ ha_rows records= HA_POS_ERROR;
+ SQL_SELECT *select= 0;
+ Item **sargable_cond= NULL;
+ if (!s->const_keys.is_clear_all())
+ {
sargable_cond= get_sargable_cond(join, s->table);
- if (join->thd->variables.optimizer_use_condition_selectivity > 1)
- calculate_cond_selectivity_for_table(join->thd, s->table,
- sargable_cond);
- if (s->table->reginfo.impossible_range)
- {
- impossible_range= TRUE;
- records= 0;
+
+ select= make_select(s->table, found_const_table_map,
+ found_const_table_map,
+ *sargable_cond,
+ (SORT_INFO*) 0, 1, &error);
+ if (!select)
+ goto error;
+ records= get_quick_record_count(join->thd, select, s->table,
+ &s->const_keys, join->row_limit);
+
+ /*
+ Range analyzer might have modified the condition. Put it the new
+ condition to where we got it from.
+ */
+ *sargable_cond= select->cond;
+
+ s->quick=select->quick;
+ s->needed_reg=select->needed_reg;
+ select->quick=0;
+ impossible_range= records == 0 && s->table->reginfo.impossible_range;
}
- }
- if (impossible_range)
- {
- /*
- Impossible WHERE or ON expression
- In case of ON, we mark that the we match one empty NULL row.
- In case of WHERE, don't set found_const_table_map to get the
- caller to abort with a zero row result.
- */
- TABLE_LIST *emb= s->table->pos_in_table_list->embedding;
- if (emb && !emb->sj_on_expr)
+ if (!impossible_range)
{
- /* Mark all tables in a multi-table join nest as const */
- mark_join_nest_as_const(join, emb, &found_const_table_map,
- &const_count);
+ if (!sargable_cond)
+ sargable_cond= get_sargable_cond(join, s->table);
+ if (join->thd->variables.optimizer_use_condition_selectivity > 1)
+ calculate_cond_selectivity_for_table(join->thd, s->table,
+ sargable_cond);
+ if (s->table->reginfo.impossible_range)
+ {
+ impossible_range= TRUE;
+ records= 0;
+ }
}
- else
+ if (impossible_range)
{
- join->const_table_map|= s->table->map;
- set_position(join,const_count++,s,(KEYUSE*) 0);
- s->type= JT_CONST;
- s->table->const_table= 1;
- if (*s->on_expr_ref)
+ /*
+ Impossible WHERE or ON expression
+ In case of ON, we mark that the we match one empty NULL row.
+ In case of WHERE, don't set found_const_table_map to get the
+ caller to abort with a zero row result.
+ */
+ TABLE_LIST *emb= s->table->pos_in_table_list->embedding;
+ if (emb && !emb->sj_on_expr)
+ {
+ /* Mark all tables in a multi-table join nest as const */
+ mark_join_nest_as_const(join, emb, &found_const_table_map,
+ &const_count);
+ }
+ else
{
- /* Generate empty row */
- s->info= ET_IMPOSSIBLE_ON_CONDITION;
- found_const_table_map|= s->table->map;
- mark_as_null_row(s->table); // All fields are NULL
+ join->const_table_map|= s->table->map;
+ set_position(join,const_count++,s,(KEYUSE*) 0);
+ s->type= JT_CONST;
+ s->table->const_table= 1;
+ if (*s->on_expr_ref)
+ {
+ /* Generate empty row */
+ s->info= ET_IMPOSSIBLE_ON_CONDITION;
+ found_const_table_map|= s->table->map;
+ mark_as_null_row(s->table); // All fields are NULL
+ }
}
}
+ if (records != HA_POS_ERROR)
+ {
+ s->found_records=records;
+ s->read_time= s->quick ? s->quick->read_time : 0.0;
+ }
+ if (select)
+ delete select;
+ else
+ add_table_scan_values_to_trace(thd, s);
}
- if (records != HA_POS_ERROR)
- {
- s->found_records=records;
- s->read_time= s->quick ? s->quick->read_time : 0.0;
- }
- if (select)
- delete select;
+ else
+ add_table_scan_values_to_trace(thd, s);
}
-
}
if (pull_out_semijoin_tables(join))
@@ -6856,11 +6938,15 @@ best_access_path(JOIN *join,
MY_BITMAP *eq_join_set= &s->table->eq_join_set;
KEYUSE *hj_start_key= 0;
SplM_plan_info *spl_plan= 0;
+ const char* cause= NULL;
disable_jbuf= disable_jbuf || idx == join->const_tables;
Loose_scan_opt loose_scan_opt;
DBUG_ENTER("best_access_path");
+
+ Json_writer_object trace_wrapper(thd, "best_access_path");
+ Json_writer_array trace_paths(thd, "considered_access_paths");
bitmap_clear_all(eq_join_set);
@@ -6975,6 +7061,7 @@ best_access_path(JOIN *join,
if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables
+ Json_writer_object trace_access_idx(thd);
/*
ft-keys require special treatment
*/
@@ -6986,6 +7073,8 @@ best_access_path(JOIN *join,
*/
tmp= prev_record_reads(join->positions, idx, found_ref);
records= 1.0;
+ trace_access_idx.add("access_type", "fulltext")
+ .add("index", keyinfo->name);
}
else
{
@@ -7000,11 +7089,15 @@ best_access_path(JOIN *join,
if ((key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME ||
MY_TEST(key_flags & HA_EXT_NOSAME))
{
+ trace_access_idx.add("access_type", "eq_ref")
+ .add("index", keyinfo->name);
tmp = prev_record_reads(join->positions, idx, found_ref);
records=1.0;
}
else
{
+ trace_access_idx.add("access_type", "ref")
+ .add("index", keyinfo->name);
if (!found_ref)
{ /* We found a const key */
/*
@@ -7025,11 +7118,16 @@ best_access_path(JOIN *join,
empty interval we wouldn't have got here).
*/
if (table->quick_keys.is_set(key))
+ {
records= (double) table->quick_rows[key];
+ trace_access_idx.add("used_range_estimates", true);
+ }
else
{
/* quick_range couldn't use key! */
records= (double) s->records/rec;
+ trace_access_idx.add("used_range_estimates", false)
+ .add("cause", "not available");
}
}
else
@@ -7061,7 +7159,23 @@ best_access_path(JOIN *join,
table->quick_n_ranges[key] == 1 &&
records > (double) table->quick_rows[key])
{
+
records= (double) table->quick_rows[key];
+ trace_access_idx.add("used_range_estimates", true);
+ }
+ else
+ {
+ if (table->quick_keys.is_set(key))
+ {
+ trace_access_idx.add("used_range_estimates",false)
+ .add("cause",
+ "not better than ref estimates");
+ }
+ else
+ {
+ trace_access_idx.add("used_range_estimates", false)
+ .add("cause", "not available");
+ }
}
}
/* Limit the number of matched rows */
@@ -7077,6 +7191,9 @@ best_access_path(JOIN *join,
}
else
{
+ trace_access_idx.add("access_type",
+ ref_or_null_part ? "ref_or_null" : "ref")
+ .add("index", keyinfo->name);
/*
Use as much key-parts as possible and a uniq key is better
than a not unique key
@@ -7131,6 +7248,7 @@ best_access_path(JOIN *join,
table->quick_n_ranges[key] == 1 + MY_TEST(ref_or_null_part)) //(C3)
{
tmp= records= (double) table->quick_rows[key];
+ trace_access_idx.add("used_range_estimates", true);
}
else
{
@@ -7156,7 +7274,19 @@ best_access_path(JOIN *join,
if (!found_ref && table->quick_keys.is_set(key) && // (1)
table->quick_key_parts[key] > max_key_part && // (2)
records < (double)table->quick_rows[key]) // (3)
+ {
+ trace_access_idx.add("used_range_estimates", true);
records= (double)table->quick_rows[key];
+ }
+ else
+ {
+ if (table->quick_keys.is_set(key) &&
+ table->quick_key_parts[key] < max_key_part)
+ {
+ trace_access_idx.add("chosen", false);
+ cause= "range uses more keyparts";
+ }
+ }
tmp= records;
}
@@ -7240,15 +7370,22 @@ best_access_path(JOIN *join,
tmp*= record_count;
}
else
+ {
+ if (!(found_part & 1))
+ cause= "no predicate for first keypart";
tmp= best_time; // Do nothing
+ }
}
tmp += s->startup_cost;
loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
} /* not ft_key */
+ trace_access_idx.add("rows", records).add("cost", tmp);
+
if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE)
{
+ trace_access_idx.add("chosen", true);
best_time= tmp + records/(double) TIME_FOR_COMPARE;
best= tmp;
best_records= records;
@@ -7256,6 +7393,12 @@ best_access_path(JOIN *join,
best_max_key_part= max_key_part;
best_ref_depends_map= found_ref;
}
+ else
+ {
+ trace_access_idx.add("chosen", false)
+ .add("cause", cause ? cause : "cost");
+ }
+ cause= NULL;
} /* for each key */
records= best_records;
}
@@ -7277,6 +7420,7 @@ best_access_path(JOIN *join,
(!(s->table->map & join->outer_join) ||
join->allowed_outer_join_with_cache)) // (2)
{
+ Json_writer_object trace_access_hash(thd);
double join_sel= 0.1;
/* Estimate the cost of the hash join access to the table */
double rnd_records= matching_candidates_in_table(s, found_constraint,
@@ -7296,7 +7440,12 @@ best_access_path(JOIN *join,
best_key= hj_start_key;
best_ref_depends_map= 0;
best_uses_jbuf= TRUE;
- }
+ trace_access_hash.add("type", "hash");
+ trace_access_hash.add("index", "hj-key");
+ trace_access_hash.add("cost", rnd_records);
+ trace_access_hash.add("cost", best);
+ trace_access_hash.add("chosen", true);
+ }
/*
Don't test table scan if it can't be better.
@@ -7331,6 +7480,7 @@ best_access_path(JOIN *join,
can be [considered to be] more expensive, which causes lookups not to
be used for cases with small datasets, which is annoying.
*/
+ Json_writer_object trace_access_scan(thd);
if ((records >= s->found_records || best > s->read_time) && // (1)
!(s->quick && best_key && s->quick->index == best_key->key && // (2)
best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2)
@@ -7350,6 +7500,10 @@ best_access_path(JOIN *join,
if (s->quick)
{
+ trace_access_scan.add("access_type", "range");
+ /*
+ should have some info about all the different QUICK_SELECT
+ */
/*
For each record we:
- read record range through 'quick'
@@ -7367,6 +7521,7 @@ best_access_path(JOIN *join,
}
else
{
+ trace_access_scan.add("access_type", "scan");
/* Estimate cost of reading table. */
if (s->table->force_index && !best_key) // index scan
tmp= s->table->file->read_time(s->ref.key, 1, s->records);
@@ -7411,6 +7566,8 @@ best_access_path(JOIN *join,
as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
tmp give us total cost of using TABLE SCAN
*/
+ trace_access_scan.add("resulting_rows", rnd_records);
+ trace_access_scan.add("cost", tmp);
if (best == DBL_MAX ||
(tmp + record_count/(double) TIME_FOR_COMPARE*rnd_records <
(best_key->is_for_hash_join() ? best_time :
@@ -7429,6 +7586,13 @@ best_access_path(JOIN *join,
join->outer_join)));
spl_plan= 0;
}
+ trace_access_scan.add("chosen", best_key == NULL);
+ }
+ else
+ {
+ trace_access_scan.add("type", "scan");
+ trace_access_scan.add("chosen", false);
+ trace_access_scan.add("cause", "cost");
}
/* Update the cost information for the current partial plan */
@@ -7447,7 +7611,10 @@ best_access_path(JOIN *join,
idx == join->const_tables &&
s->table == join->sort_by_table &&
join->unit->select_limit_cnt >= records)
+ {
+ trace_access_scan.add("use_tmp_table", true);
join->sort_by_table= (TABLE*) 1; // Must use temporary table
+ }
DBUG_VOID_RETURN;
}
@@ -7590,6 +7757,7 @@ choose_plan(JOIN *join, table_map join_tables)
uint use_cond_selectivity=
join->thd->variables.optimizer_use_condition_selectivity;
bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
+ THD *thd= join->thd;
DBUG_ENTER("choose_plan");
join->cur_embedding_map= 0;
@@ -7626,6 +7794,9 @@ choose_plan(JOIN *join, table_map join_tables)
join->table_count - join->const_tables, sizeof(JOIN_TAB*),
jtab_sort_func, (void*)join->emb_sjm_nest);
+ Json_writer_object wrapper(thd);
+ Json_writer_array trace_plan(thd,"considered_execution_plans");
+
if (!join->emb_sjm_nest)
{
choose_initial_table_order(join);
@@ -7919,9 +8090,16 @@ optimize_straight_join(JOIN *join, table_map join_tables)
uint use_cond_selectivity=
join->thd->variables.optimizer_use_condition_selectivity;
POSITION loose_scan_pos;
+ THD *thd= join->thd;
for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
{
+ Json_writer_object trace_one_table(thd);
+ if (unlikely(thd->trace_started()))
+ {
+ trace_plan_prefix(join, idx, join_tables);
+ trace_one_table.add_table_name(s);
+ }
/* Find the best access method from 's' to the current partial plan */
best_access_path(join, s, join_tables, idx, disable_jbuf, record_count,
join->positions + idx, &loose_scan_pos);
@@ -8650,6 +8828,18 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
}
+static void trace_plan_prefix(JOIN *join, uint idx, table_map remaining_tables)
+{
+ THD *const thd = join->thd;
+ Json_writer_array plan_prefix(thd, "plan_prefix");
+ for (uint i = 0; i < idx; i++)
+ {
+ TABLE_LIST *const tr = join->positions[i].table->tab_list;
+ if (!(tr->map & remaining_tables))
+ plan_prefix.add_table_name(join->positions[i].table);
+ }
+}
+
/**
Find a good, possibly optimal, query execution plan (QEP) by a possibly
exhaustive search.
@@ -8827,6 +9017,13 @@ best_extension_by_limited_search(JOIN *join,
double current_record_count, current_read_time;
POSITION *position= join->positions + idx;
+ Json_writer_object trace_one_table(thd);
+ if (unlikely(thd->trace_started()))
+ {
+ trace_plan_prefix(join, idx, remaining_tables);
+ trace_one_table.add_table_name(s);
+ }
+
/* Find the best access method from 's' to the current partial plan */
POSITION loose_scan_pos;
best_access_path(join, s, remaining_tables, idx, disable_jbuf,
@@ -8840,6 +9037,9 @@ best_extension_by_limited_search(JOIN *join,
current_read_time=read_time + position->read_time +
current_record_count / (double) TIME_FOR_COMPARE;
+ /*
+ TODO add filtering estimates here
+ */
advance_sj_state(join, remaining_tables, idx, &current_record_count,
&current_read_time, &loose_scan_pos);
@@ -8851,6 +9051,7 @@ best_extension_by_limited_search(JOIN *join,
read_time,
current_read_time,
"prune_by_cost"););
+ trace_one_table.add("pruned_by_cost", true);
restore_prev_nj_state(s);
restore_prev_sj_state(remaining_tables, s, idx);
continue;
@@ -8884,6 +9085,7 @@ best_extension_by_limited_search(JOIN *join,
read_time,
current_read_time,
"pruned_by_heuristic"););
+ trace_one_table.add("pruned_by_heuristic", true);
restore_prev_nj_state(s);
restore_prev_sj_state(remaining_tables, s, idx);
continue;
@@ -8901,6 +9103,7 @@ best_extension_by_limited_search(JOIN *join,
if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
{ /* Recursively expand the current partial plan */
swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
+ Json_writer_array trace_rest(thd, "rest_of_plan");
if (best_extension_by_limited_search(join,
remaining_tables & ~real_table_bit,
idx + 1,
@@ -10502,23 +10705,40 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
there inside the triggers.
*/
{ // Check const tables
- join->exec_const_cond=
- make_cond_for_table(thd, cond,
+ Item* const_cond= NULL;
+ const_cond= make_cond_for_table(thd, cond,
join->const_table_map,
(table_map) 0, -1, FALSE, FALSE);
/* Add conditions added by add_not_null_conds(). */
for (uint i= 0 ; i < join->const_tables ; i++)
- add_cond_and_fix(thd, &join->exec_const_cond,
+ add_cond_and_fix(thd, &const_cond,
join->join_tab[i].select_cond);
- DBUG_EXECUTE("where",print_where(join->exec_const_cond,"constants",
+ DBUG_EXECUTE("where",print_where(const_cond,"constants",
QT_ORDINARY););
- if (join->exec_const_cond && !join->exec_const_cond->is_expensive() &&
- !join->exec_const_cond->val_int())
+
+ if (const_cond)
{
- DBUG_PRINT("info",("Found impossible WHERE condition"));
- join->exec_const_cond= NULL;
- DBUG_RETURN(1); // Impossible const condition
+ Json_writer_object trace_const_cond(thd);
+ trace_const_cond.add("condition_on_constant_tables", const_cond);
+ if (const_cond->is_expensive())
+ {
+ trace_const_cond.add("evalualted", "false")
+ .add("cause", "expensive cond");
+ }
+ else
+ {
+ const bool const_cond_result = const_cond->val_int() != 0;
+ if (!const_cond_result)
+ {
+ DBUG_PRINT("info",("Found impossible WHERE condition"));
+ trace_const_cond.add("evalualted", "true")
+ .add("found", "impossible where");
+ join->exec_const_cond= NULL;
+ DBUG_RETURN(1);
+ }
+ }
+ join->exec_const_cond= const_cond;
}
if (join->table_count != join->const_tables)
@@ -10555,6 +10775,11 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
/*
Step #2: Extract WHERE/ON parts
*/
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_conditions(thd, "attaching_conditions_to_tables");
+ trace_conditions.add("original_condition", cond);
+ Json_writer_array trace_attached_comp(thd,
+ "attached_conditions_computation");
uint i;
for (i= join->top_join_tab_count - 1; i >= join->const_tables; i--)
{
@@ -10617,10 +10842,19 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
(!is_hash_join_key_no(tab->ref.key) &&
tab->table->intersect_keys.is_set(tab->ref.key))))
{
- /* Range uses longer key; Use this instead of ref on key */
- tab->type=JT_ALL;
- use_quick_range=1;
- tab->use_quick=1;
+ /* Range uses longer key; Use this instead of ref on key */
+
+ /*
+ We can trace here, changing ref access to range access here
+ have a range that uses longer key.
+ Lets take @spetrunia's opinion
+ */
+ Json_writer_object ref_to_range(thd);
+ ref_to_range.add("ref_to_range", true);
+ ref_to_range.add("cause", "range uses longer key");
+ tab->type=JT_ALL;
+ use_quick_range=1;
+ tab->use_quick=1;
tab->ref.key= -1;
tab->ref.key_parts=0; // Don't use ref key.
join->best_positions[i].records_read= rows2double(tab->quick->records);
@@ -11058,6 +11292,20 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
if (!tab->bush_children)
i++;
}
+
+ trace_attached_comp.end();
+ Json_writer_array trace_attached_summary(thd,
+ "attached_conditions_summary");
+ for (tab= first_depth_first_tab(join); tab;
+ tab= next_depth_first_tab(join, tab))
+ {
+ if (!tab->table)
+ continue;
+ Item *const cond = tab->select_cond;
+ Json_writer_object trace_one_table(thd);
+ trace_one_table.add_table_name(tab);
+ trace_one_table.add("attached", cond);
+ }
}
DBUG_RETURN(0);
}
@@ -15983,12 +16231,24 @@ optimize_cond(JOIN *join, COND *conds,
that occurs in a function set a pointer to the multiple equality
predicate. Substitute a constant instead of this field if the
multiple equality contains a constant.
- */
+ */
+
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_cond(thd, "condition_processing");
+ trace_cond.add("condition", join->conds == conds ? "WHERE" : "HAVING")
+ .add("original_condition", conds);
+
+ Json_writer_array trace_steps(thd, "steps");
DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY););
conds= build_equal_items(join, conds, NULL, join_list,
ignore_on_conds, cond_equal,
MY_TEST(flags & OPT_LINK_EQUAL_FIELDS));
DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY););
+ {
+ Json_writer_object equal_prop_wrapper(thd);
+ equal_prop_wrapper.add("transformation", "equality_propagation")
+ .add("resulting_condition", conds);
+ }
/* change field = field to field = const for each found field = const */
propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds);
@@ -15997,10 +16257,21 @@ optimize_cond(JOIN *join, COND *conds,
Remove all and-levels where CONST item != CONST item
*/
DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY););
+ {
+ Json_writer_object const_prop_wrapper(thd);
+ const_prop_wrapper.add("transformation", "constant_propagation")
+ .add("resulting_condition", conds);
+ }
conds= conds->remove_eq_conds(thd, cond_value, true);
if (conds && conds->type() == Item::COND_ITEM &&
((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
*cond_equal= &((Item_cond_and*) conds)->m_cond_equal;
+
+ {
+ Json_writer_object cond_removal_wrapper(thd);
+ cond_removal_wrapper.add("transformation", "trivial_condition_removal")
+ .add("resulting_condition", conds);
+ }
DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY););
}
DBUG_RETURN(conds);
@@ -26122,7 +26393,8 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str,
// A view
if (!(belong_to_view &&
- belong_to_view->compact_view_format))
+ belong_to_view->compact_view_format) &&
+ !(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES))
{
append_identifier(thd, str, &view_db);
str->append('.');
@@ -26151,7 +26423,8 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str,
// A normal table
if (!(belong_to_view &&
- belong_to_view->compact_view_format))
+ belong_to_view->compact_view_format) &&
+ !(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES))
{
append_identifier(thd, str, &db);
str->append('.');
@@ -26886,6 +27159,12 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
bool group= join && join->group && order == join->group_list;
ha_rows refkey_rows_estimate= table->quick_condition_rows;
const bool has_limit= (select_limit_arg != HA_POS_ERROR);
+ THD* thd= join ? join->thd : table->in_use;
+
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_cheaper_ordering(
+ thd, "reconsidering_access_paths_for_index_ordering");
+ trace_cheaper_ordering.add("clause", group ? "GROUP BY" : "ORDER BY");
/*
If not used with LIMIT, only use keys if the whole query can be
@@ -26924,11 +27203,12 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
else
read_time= table->file->scan_time();
+ trace_cheaper_ordering.add("fanout", fanout);
/*
TODO: add cost of sorting here.
*/
read_time += COST_EPS;
-
+ trace_cheaper_ordering.add("read_time", read_time);
/*
Calculate the selectivity of the ref_key for REF_ACCESS. For
RANGE_ACCESS we use table->quick_condition_rows.
@@ -26945,11 +27225,20 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
set_if_bigger(refkey_rows_estimate, 1);
}
+ if (tab)
+ trace_cheaper_ordering.add_table_name(tab);
+ else
+ trace_cheaper_ordering.add_table_name(table);
+ trace_cheaper_ordering.add("rows_estimation", refkey_rows_estimate);
+
+ Json_writer_array possible_keys(thd,"possible_keys");
for (nr=0; nr < table->s->keys ; nr++)
{
int direction;
ha_rows select_limit= select_limit_arg;
uint used_key_parts= 0;
+ Json_writer_object possible_key(thd);
+ possible_key.add("index", table->key_info[nr].name);
if (keys.is_set(nr) &&
(direction= test_if_order_by_key(join, order, table, nr,
@@ -26962,6 +27251,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
*/
DBUG_ASSERT (ref_key != (int) nr);
+ possible_key.add("can_resolve_order", true);
bool is_covering= (table->covering_keys.is_set(nr) ||
(table->file->index_flags(nr, 0, 1) &
HA_CLUSTERED_INDEX));
@@ -27092,6 +27382,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
select_limit= (ha_rows) (select_limit *
(double) table_records /
refkey_rows_estimate);
+ possible_key.add("updated_limit", select_limit);
rec_per_key= keyinfo->actual_rec_per_key(keyinfo->user_defined_key_parts-1);
set_if_bigger(rec_per_key, 1);
/*
@@ -27111,9 +27402,11 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
if (get_range_limit_read_cost(tab, table, table_records, nr,
select_limit, &range_scan_time))
{
+ possible_key.add("range_scan_time", range_scan_time);
if (range_scan_time < index_scan_time)
index_scan_time= range_scan_time;
}
+ possible_key.add("index_scan_time", index_scan_time);
if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
index_scan_time < read_time)
@@ -27124,17 +27417,29 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
table->covering_keys.is_set(ref_key)) ?
refkey_rows_estimate :
HA_POS_ERROR;
- if ((is_best_covering && !is_covering) ||
- (is_covering && refkey_select_limit < select_limit))
+ if (is_best_covering && !is_covering)
+ {
+ possible_key.add("chosen", false);
+ possible_key.add("cause", "covering index already found");
+ continue;
+ }
+
+ if (is_covering && refkey_select_limit < select_limit)
+ {
+ possible_key.add("chosen", false);
+ possible_key.add("cause", "ref estimates better");
continue;
+ }
if (table->quick_keys.is_set(nr))
quick_records= table->quick_rows[nr];
+ possible_key.add("records", quick_records);
if (best_key < 0 ||
(select_limit <= MY_MIN(quick_records,best_records) ?
keyinfo->user_defined_key_parts < best_key_parts :
quick_records < best_records) ||
(!is_best_covering && is_covering))
{
+ possible_key.add("chosen", true);
best_key= nr;
best_key_parts= keyinfo->user_defined_key_parts;
if (saved_best_key_parts)
@@ -27144,8 +27449,47 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
best_key_direction= direction;
best_select_limit= select_limit;
}
+ else
+ {
+ char const *cause;
+ possible_key.add("chosen", false);
+ if (is_covering)
+ cause= "covering index already found";
+ else
+ {
+ if (select_limit <= MY_MIN(quick_records,best_records))
+ cause= "keyparts greater than the current best keyparts";
+ else
+ cause= "rows estimation greater";
+ }
+ possible_key.add("cause", cause);
+ }
+ }
+ else
+ {
+ possible_key.add("usable", false);
+ possible_key.add("cause", "cost");
}
- }
+ }
+ else
+ {
+ possible_key.add("usable", false);
+ if (!group && select_limit == HA_POS_ERROR)
+ possible_key.add("cause", "order by without limit");
+ }
+ }
+ else
+ {
+ if (keys.is_set(nr))
+ {
+ possible_key.add("can_resolve_order", false);
+ possible_key.add("cause", "order can not be resolved by key");
+ }
+ else
+ {
+ possible_key.add("can_resolve_order", false);
+ possible_key.add("cause", "not usable index for the query");
+ }
}
}
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 3d2723081a3..80884d66590 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -63,6 +63,7 @@
#include "ha_partition.h"
#endif
#include "transaction.h"
+#include "opt_trace.h"
enum enum_i_s_events_fields
{
@@ -9764,6 +9765,10 @@ ST_FIELD_INFO check_constraints_fields_info[]=
OPEN_FULL_TABLE},
{0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
};
+
+/** For creating fields of information_schema.OPTIMIZER_TRACE */
+extern ST_FIELD_INFO optimizer_trace_info[];
+
/*
Description of ST_FIELD_INFO in table.h
@@ -9816,6 +9821,8 @@ ST_SCHEMA_TABLE schema_tables[]=
OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY},
{"OPEN_TABLES", open_tables_fields_info, 0,
fill_open_tables, make_old_format, 0, -1, -1, 1, 0},
+ {"OPTIMIZER_TRACE", optimizer_trace_info, 0,
+ fill_optimizer_trace_info, NULL, NULL, -1, -1, false, 0},
{"PARAMETERS", parameters_fields_info, 0,
fill_schema_proc, 0, 0, -1, -1, 0, 0},
{"PARTITIONS", partitions_fields_info, 0,
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 3d43c35177d..f247fb10f89 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -24,6 +24,7 @@
#include "sql_show.h" // calc_sum_of_all_status
#include "sql_select.h"
#include "keycaches.h"
+#include "my_json_writer.h"
#include <hash.h>
#include <thr_alarm.h>
#if defined(HAVE_MALLINFO) && defined(HAVE_MALLOC_H)
@@ -36,6 +37,8 @@
#include "events.h"
#endif
+#define FT_KEYPART (MAX_FIELDS+10)
+
static const char *lock_descriptions[] =
{
/* TL_UNLOCK */ "No lock",
@@ -225,8 +228,6 @@ TEST_join(JOIN *join)
}
-#define FT_KEYPART (MAX_FIELDS+10)
-
static void print_keyuse(KEYUSE *keyuse)
{
char buff[256];
@@ -263,7 +264,6 @@ void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array)
DBUG_UNLOCK_FILE;
}
-
/*
Print the current state during query optimization.
@@ -655,3 +655,24 @@ Memory allocated by threads: %s\n",
puts("");
fflush(stdout);
}
+
+void print_keyuse_array_for_trace(THD *thd, DYNAMIC_ARRAY *keyuse_array)
+{
+ Json_writer_object wrapper(thd);
+ Json_writer_array trace_key_uses(thd, "ref_optimizer_key_uses");
+ for(uint i=0; i < keyuse_array->elements; i++)
+ {
+ KEYUSE *keyuse= (KEYUSE*)dynamic_array_ptr(keyuse_array, i);
+ Json_writer_object keyuse_elem(thd);
+ keyuse_elem.add_table_name(keyuse->table->reginfo.join_tab);
+ keyuse_elem.add("field", (keyuse->keypart == FT_KEYPART) ? "<fulltext>"
+ : (keyuse->is_for_hash_join()
+ ? keyuse->table->field[keyuse->keypart]
+ ->field_name.str
+ : keyuse->table->key_info[keyuse->key]
+ .key_part[keyuse->keypart]
+ .field->field_name.str));
+ keyuse_elem.add("equals",keyuse->val);
+ keyuse_elem.add("null_rejecting",keyuse->null_rejecting);
+ }
+}
diff --git a/sql/sql_test.h b/sql/sql_test.h
index 867582a9569..cbef581b784 100644
--- a/sql/sql_test.h
+++ b/sql/sql_test.h
@@ -17,6 +17,7 @@
#define SQL_TEST_INCLUDED
#include "mysqld.h"
+#include "opt_trace_context.h"
class JOIN;
struct TABLE_LIST;
@@ -34,6 +35,7 @@ void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array);
void print_sjm(SJ_MATERIALIZATION_INFO *sjm);
void dump_TABLE_LIST_graph(SELECT_LEX *select_lex, TABLE_LIST* tl);
#endif
+void print_keyuse_array_for_trace(THD *thd, DYNAMIC_ARRAY *keyuse_array);
void mysql_print_status();
#endif /* SQL_TEST_INCLUDED */
diff --git a/sql/sql_view.cc b/sql/sql_view.cc
index e475a3d3719..31032c5cd5e 100644
--- a/sql/sql_view.cc
+++ b/sql/sql_view.cc
@@ -36,6 +36,7 @@
#include "datadict.h" // dd_frm_is_view()
#include "sql_derived.h"
#include "sql_cte.h" // check_dependencies_in_with_clauses()
+#include "opt_trace.h"
#define MD5_BUFF_LENGTH 33
@@ -1420,6 +1421,15 @@ bool mysql_make_view(THD *thd, TABLE_SHARE *share, TABLE_LIST *table,
goto err;
/*
+ Check rights to run commands which show underlying tables.
+ In the optimizer trace we would not like to show trace for
+ cases when the current user does not have rights for the
+ underlying tables.
+ */
+ if (!table->prelocking_placeholder)
+ opt_trace_disable_if_no_view_access(thd, table, view_tables);
+
+ /*
Check rights to run commands (ANALYZE SELECT, EXPLAIN SELECT &
SHOW CREATE) which show underlying tables.
Skip this step if we are opening view for prelocking only.
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 7241685fb61..a8b557c90c8 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -53,6 +53,7 @@
#include <myisam.h>
#include "debug_sync.h" // DEBUG_SYNC
#include "sql_show.h"
+#include "opt_trace_context.h"
#include "log_event.h"
#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
@@ -2542,6 +2543,23 @@ static Sys_var_flagset Sys_optimizer_switch(
NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
ON_UPDATE(fix_optimizer_switch));
+static Sys_var_flagset Sys_optimizer_trace(
+ "optimizer_trace",
+ "Controls tracing of the Optimizer:"
+ " optimizer_trace=option=val[,option=val...], where option is one of"
+ " {enabled}"
+ " and val is one of {on, off, default}",
+ SESSION_VAR(optimizer_trace), CMD_LINE(REQUIRED_ARG),
+ Opt_trace_context::flag_names, DEFAULT(Opt_trace_context::FLAG_DEFAULT));
+ // @see set_var::is_var_optimizer_trace()
+export sys_var *Sys_optimizer_trace_ptr = &Sys_optimizer_trace;
+
+static Sys_var_ulong Sys_optimizer_trace_max_mem_size(
+ "optimizer_trace_max_mem_size",
+ "Maximum allowed size of an optimizer trace",
+ SESSION_VAR(optimizer_trace_max_mem_size), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, ULONG_MAX), DEFAULT(1024 * 1024), BLOCK_SIZE(1));
+
static Sys_var_charptr Sys_pid_file(
"pid_file", "Pid file used by safe_mysqld",
READ_ONLY GLOBAL_VAR(pidfile_name_ptr), CMD_LINE(REQUIRED_ARG),
diff --git a/sql/table.cc b/sql/table.cc
index 375f7a3f65f..d5c88b226c7 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -44,6 +44,7 @@
#include "sql_cte.h"
#include "ha_sequence.h"
#include "sql_show.h"
+#include "opt_trace.h"
/* For MySQL 5.7 virtual fields */
#define MYSQL57_GENERATED_FIELD 128
@@ -5714,6 +5715,7 @@ bool TABLE_LIST::prepare_security(THD *thd)
if (prepare_view_security_context(thd))
DBUG_RETURN(TRUE);
thd->security_ctx= find_view_security_context(thd);
+ opt_trace_disable_if_no_security_context_access(thd);
while ((tbl= tb++))
{
DBUG_ASSERT(tbl->referencing_view);