MDEV-7836: ANALYZE FORMAT=JSON should provide info about GROUP/ORDER BY

Provide basic info about sorting/grouping done by the queries.
author: Sergei Petrunia <psergey@askmonty.org> 2015-04-12 04:48:42 +0300
committer: Sergei Petrunia <psergey@askmonty.org> 2015-04-12 04:48:42 +0300
commit: 4938b822634b173c0d7ef882f721b553b223fadd (patch)
tree: f7fc465829170e1afb3816e940e4b8304502ff28
parent: 66ff1632f53ef2378c2f2546e0716547ee5d3217 (diff)
download: mariadb-git-4938b822634b173c0d7ef882f721b553b223fadd.tar.gz
23 files changed, 1110 insertions, 180 deletions
diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt
index 7168c67e57e..3472ee85f2f 100644
--- a/libmysqld/CMakeLists.txt
+++ b/libmysqld/CMakeLists.txt
@@ -102,6 +102,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
            ../sql/my_json_writer.cc ../sql/my_json_writer.h
 	   ../sql/rpl_gtid.cc
            ../sql/sql_explain.cc ../sql/sql_explain.h
+           ../sql/sql_analyze_stmt.cc ../sql/sql_analyze_stmt.h
            ../sql/compat56.cc
            ../sql/table_cache.cc
            ../sql/item_inetfunc.cc
diff --git a/mysql-test/r/analyze_format_json.result b/mysql-test/r/analyze_format_json.result
index 2c339042f43..1c24531715c 100644
--- a/mysql-test/r/analyze_format_json.result
+++ b/mysql-test/r/analyze_format_json.result
@@ -281,6 +281,7 @@ ANALYZE
 {
   "query_block": {
     "select_id": 1,
+    "r_total_time_ms": "REPLACED",
     "table": {
       "update": 1,
       "table_name": "t1",
@@ -324,6 +325,7 @@ ANALYZE
 {
   "query_block": {
     "select_id": 1,
+    "r_total_time_ms": "REPLACED",
     "table": {
       "delete": 1,
       "table_name": "t1",
diff --git a/mysql-test/r/analyze_stmt_orderby.result b/mysql-test/r/analyze_stmt_orderby.result
new file mode 100644
index 00000000000..d26d32180c1
--- /dev/null
+++ b/mysql-test/r/analyze_stmt_orderby.result
@@ -0,0 +1,289 @@
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+a int, 
+b int, 
+key (a)
+);
+insert into t2 select A.a*1000 + B.a, A.a*1000 + B.a from t0 A, t1 B;
+#
+# Try an UPDATE that uses filesort:
+#
+explain 
+update t2 set b=b+1 order by b limit 5;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	10000	Using filesort
+explain format=json
+update t2 set b=b+1 order by b limit 5;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "filesort": {
+      "table": {
+        "update": 1,
+        "table_name": "t2",
+        "access_type": "ALL",
+        "rows": 10000
+      }
+    }
+  }
+}
+analyze format=json
+update t2 set b=b+1 order by b limit 5;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_total_time_ms": "REPLACED",
+    "filesort": {
+      "r_loops": 1,
+      "r_limit": 5,
+      "r_used_priority_queue": true,
+      "r_output_rows": 6,
+      "table": {
+        "update": 1,
+        "table_name": "t2",
+        "access_type": "ALL",
+        "rows": 10000,
+        "r_rows": 10000,
+        "r_filtered": 100,
+        "r_total_time_ms": "REPLACED"
+      }
+    }
+  }
+}
+#
+# Try an UPDATE that uses buffering:
+#
+explain
+update t2 set a=a+1 where a<10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	range	a	a	5	NULL	8	Using where; Using buffer
+explain format=json
+update t2 set a=a+1 where a<10;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "buffer": {
+      "table": {
+        "update": 1,
+        "table_name": "t2",
+        "access_type": "range",
+        "possible_keys": ["a"],
+        "key": "a",
+        "key_length": "5",
+        "used_key_parts": ["a"],
+        "rows": 8,
+        "attached_condition": "(t2.a < 10)"
+      }
+    }
+  }
+}
+analyze format=json
+update t2 set a=a+1 where a<10;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_total_time_ms": "REPLACED",
+    "buffer": {
+      "table": {
+        "update": 1,
+        "table_name": "t2",
+        "access_type": "range",
+        "possible_keys": ["a"],
+        "key": "a",
+        "key_length": "5",
+        "used_key_parts": ["a"],
+        "rows": 8,
+        "r_rows": 10,
+        "r_filtered": 100,
+        "r_total_time_ms": "REPLACED",
+        "attached_condition": "(t2.a < 10)"
+      }
+    }
+  }
+}
+#
+# Try a DELETE that uses filesort:
+#
+explain 
+delete from t2 order by b limit 5;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	10000	Using filesort
+explain format=json
+delete from t2 order by b limit 5;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "filesort": {
+      "table": {
+        "delete": 1,
+        "table_name": "t2",
+        "access_type": "ALL",
+        "rows": 10000
+      }
+    }
+  }
+}
+analyze format=json
+delete from t2 order by b limit 5;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_total_time_ms": "REPLACED",
+    "filesort": {
+      "r_loops": 1,
+      "r_used_priority_queue": false,
+      "r_output_rows": 10000,
+      "r_buffer_size": "195Kb",
+      "table": {
+        "delete": 1,
+        "table_name": "t2",
+        "access_type": "ALL",
+        "rows": 10000,
+        "r_rows": 10000,
+        "r_filtered": 100,
+        "r_total_time_ms": "REPLACED"
+      }
+    }
+  }
+}
+#
+# Try a SELECT with QEP in form: filesort { tmp_table { join } } 
+#
+explain
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	10	Using where; Using temporary; Using filesort
+1	SIMPLE	t2	ref	a	a	5	test.t0.a	1	
+explain format=json
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "temporary_table": {
+      "function": "buffer",
+      "table": {
+        "table_name": "t0",
+        "access_type": "ALL",
+        "rows": 10,
+        "filtered": 100,
+        "attached_condition": "(t0.a is not null)"
+      },
+      "table": {
+        "table_name": "t2",
+        "access_type": "ref",
+        "possible_keys": ["a"],
+        "key": "a",
+        "key_length": "5",
+        "used_key_parts": ["a"],
+        "ref": ["test.t0.a"],
+        "rows": 1,
+        "filtered": 100
+      }
+    }
+  }
+}
+analyze format=json
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_loops": 1,
+    "r_total_time_ms": "REPLACED",
+    "filesort": {
+      "temporary_table": {
+        "table": {
+          "table_name": "t0",
+          "access_type": "ALL",
+          "r_loops": 1,
+          "rows": 10,
+          "r_rows": 10,
+          "r_total_time_ms": "REPLACED",
+          "filtered": 100,
+          "r_filtered": 100,
+          "attached_condition": "(t0.a is not null)"
+        },
+        "table": {
+          "table_name": "t2",
+          "access_type": "ref",
+          "possible_keys": ["a"],
+          "key": "a",
+          "key_length": "5",
+          "used_key_parts": ["a"],
+          "ref": ["test.t0.a"],
+          "r_loops": 10,
+          "rows": 1,
+          "r_rows": 0.4,
+          "r_total_time_ms": "REPLACED",
+          "filtered": 100,
+          "r_filtered": 100
+        }
+      }
+    }
+  }
+}
+#
+# Try a SELECT with QEP in form: join { filesort { table0 }, table2 }
+#
+explain
+select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	10	Using where; Using filesort
+1	SIMPLE	t2	ref	a	a	5	test.t0.a	1	
+analyze format=json 
+select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_loops": 1,
+    "r_total_time_ms": "REPLACED",
+    "read_sorted_file": {
+      "r_rows": 10,
+      "filesort": {
+        "r_loops": 1,
+        "r_used_priority_queue": false,
+        "r_output_rows": 10,
+        "r_buffer_size": "360",
+        "table": {
+          "table_name": "t0",
+          "access_type": "ALL",
+          "r_loops": 1,
+          "rows": 10,
+          "r_rows": 10,
+          "r_total_time_ms": "REPLACED",
+          "filtered": 100,
+          "r_filtered": 1,
+          "attached_condition": "(t0.a is not null)"
+        }
+      }
+    },
+    "table": {
+      "table_name": "t2",
+      "access_type": "ref",
+      "possible_keys": ["a"],
+      "key": "a",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t0.a"],
+      "r_loops": 10,
+      "rows": 1,
+      "r_rows": 0.4,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100
+    }
+  }
+}
+drop table t2;
+drop table t0, t1;
diff --git a/mysql-test/r/explain_json.result b/mysql-test/r/explain_json.result
index 93220cb5fd7..c9203d50d80 100644
--- a/mysql-test/r/explain_json.result
+++ b/mysql-test/r/explain_json.result
@@ -478,11 +478,14 @@ EXPLAIN
       "materialized": {
         "query_block": {
           "select_id": 2,
-          "table": {
-            "table_name": "t1",
-            "access_type": "ALL",
-            "rows": 10,
-            "filtered": 100
+          "temporary_table": {
+            "function": "buffer",
+            "table": {
+              "table_name": "t1",
+              "access_type": "ALL",
+              "rows": 10,
+              "filtered": 100
+            }
           }
         }
       }
@@ -517,11 +520,14 @@ EXPLAIN
       "materialized": {
         "query_block": {
           "select_id": 2,
-          "table": {
-            "table_name": "t1",
-            "access_type": "ALL",
-            "rows": 10,
-            "filtered": 100
+          "temporary_table": {
+            "function": "buffer",
+            "table": {
+              "table_name": "t1",
+              "access_type": "ALL",
+              "rows": 10,
+              "filtered": 100
+            }
           }
         }
       }
@@ -558,11 +564,14 @@ EXPLAIN
         "unique": 1,
         "query_block": {
           "select_id": 2,
-          "table": {
-            "table_name": "t1",
-            "access_type": "ALL",
-            "rows": 10,
-            "filtered": 100
+          "temporary_table": {
+            "function": "buffer",
+            "table": {
+              "table_name": "t1",
+              "access_type": "ALL",
+              "rows": 10,
+              "filtered": 100
+            }
           }
         }
       }
diff --git a/mysql-test/t/analyze_stmt_orderby.test b/mysql-test/t/analyze_stmt_orderby.test
new file mode 100644
index 00000000000..096bebbf147
--- /dev/null
+++ b/mysql-test/t/analyze_stmt_orderby.test
@@ -0,0 +1,73 @@
+
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+create table t2 (
+  a int, 
+  b int, 
+  key (a)
+);
+
+insert into t2 select A.a*1000 + B.a, A.a*1000 + B.a from t0 A, t1 B;
+
+--echo #
+--echo # Try an UPDATE that uses filesort:
+--echo #
+explain 
+update t2 set b=b+1 order by b limit 5;
+explain format=json
+update t2 set b=b+1 order by b limit 5;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json
+update t2 set b=b+1 order by b limit 5;
+
+--echo #
+--echo # Try an UPDATE that uses buffering:
+--echo #
+explain
+update t2 set a=a+1 where a<10;
+explain format=json
+update t2 set a=a+1 where a<10;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json
+update t2 set a=a+1 where a<10;
+
+--echo #
+--echo # Try a DELETE that uses filesort:
+--echo #
+explain 
+delete from t2 order by b limit 5;
+explain format=json
+delete from t2 order by b limit 5;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json
+delete from t2 order by b limit 5;
+
+--echo #
+--echo # Try a SELECT with QEP in form: filesort { tmp_table { join } } 
+--echo #
+explain
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+explain format=json
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+
+
+--echo #
+--echo # Try a SELECT with QEP in form: join { filesort { table0 }, table2 }
+--echo #
+explain
+select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+## explain format=json
+## select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json 
+select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+
+drop table t2;
+drop table t0, t1;
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 8c426d010f8..cab320bb8c9 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -105,7 +105,7 @@ SET (SQL_SOURCE
 
                # added in MariaDB:
                sql_explain.h sql_explain.cc
-               sql_analyze_stmt.h
+               sql_analyze_stmt.h sql_analyze_stmt.cc
                sql_lifo_buffer.h sql_join_cache.h sql_join_cache.cc
                create_options.cc multi_range_read.cc
                opt_index_cond_pushdown.cc opt_subselect.cc
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 76f744df720..66087d3271e 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -145,7 +145,8 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 		 SQL_SELECT *select, ha_rows max_rows,
                  bool sort_positions,
                  ha_rows *examined_rows,
-                 ha_rows *found_rows)
+                 ha_rows *found_rows,
+                 Filesort_tracker* tracker)
 {
   int error;
   size_t memory_available= thd->variables.sortbuff_size;
@@ -211,6 +212,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   else
     thd->inc_status_sort_scan();
   thd->query_plan_flags|= QPLAN_FILESORT;
+  tracker->report_use(max_rows);
 
   // If number of rows is not known, use as much of sort buffer as possible. 
   num_rows= table->file->estimate_rows_upper_bound();
@@ -226,6 +228,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
     DBUG_PRINT("info", ("filesort PQ is applicable"));
     thd->query_plan_flags|= QPLAN_FILESORT_PRIORITY_QUEUE;
     status_var_increment(thd->status_var.filesort_pq_sorts_);
+    tracker->incr_pq_used();
     const size_t compare_length= param.sort_length;
     if (pq.init(param.max_rows,
                 true,                           // max_at_top
@@ -282,6 +285,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
       my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERROR + ME_FATALERROR));
       goto err;
     }
+    tracker->report_sort_buffer_size(table_sort.sort_buffer_size());
   }
 
   if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
@@ -300,6 +304,8 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
     goto err;
 
   maxbuffer= (uint) (my_b_tell(&buffpek_pointers)/sizeof(*buffpek));
+  tracker->report_merge_passes_at_start(thd->query_plan_fsort_passes);
+  tracker->report_row_numbers(param.examined_rows, *found_rows, num_rows);
 
   if (maxbuffer == 0)			// The whole set is in memory
   {
@@ -365,6 +371,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 
   err:
   my_free(param.tmp_buffer);
+  tracker->report_merge_passes_at_end(thd->query_plan_fsort_passes);
   if (!subselect || !subselect->is_uncacheable())
   {
     table_sort.free_sort_buffer();
diff --git a/sql/filesort.h b/sql/filesort.h
index 8960fa6cb66..4c95f1202b2 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -25,11 +25,13 @@ class SQL_SELECT;
 class THD;
 struct TABLE;
 typedef struct st_sort_field SORT_FIELD;
+class Filesort_tracker;
 
 ha_rows filesort(THD *thd, TABLE *table, st_sort_field *sortorder,
                  uint s_length, SQL_SELECT *select,
                  ha_rows max_rows, bool sort_positions,
-                 ha_rows *examined_rows, ha_rows *found_rows);
+                 ha_rows *examined_rows, ha_rows *found_rows,
+                 Filesort_tracker* tracker);
 void filesort_free_buffers(TABLE *table, bool full);
 void change_double_for_sort(double nr,uchar *to);
 
diff --git a/sql/handler.h b/sql/handler.h
index 0ecfa756ab9..d0478824158 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -2596,9 +2596,13 @@ public:
   ulonglong rows_changed;
   /* One bigger than needed to avoid to test if key == MAX_KEY */
   ulonglong index_rows_read[MAX_KEY+1];
-  
+
+private:
   /* ANALYZE time tracker, if present */
   Exec_time_tracker *tracker;
+public:
+  void set_time_tracker(Exec_time_tracker *tracker_arg) { tracker=tracker_arg;}
+
 
   Item *pushed_idx_cond;
   uint pushed_idx_cond_keyno;  /* The index which the above condition is for */
diff --git a/sql/my_json_writer.cc b/sql/my_json_writer.cc
index 7a3dc776093..e97db210da7 100644
--- a/sql/my_json_writer.cc
+++ b/sql/my_json_writer.cc
@@ -130,6 +130,27 @@ void Json_writer::add_ll(longlong val)
 }
 
 
+/* Add a memory size, printing in Kb, Kb, Gb if necessary */
+void Json_writer::add_size(longlong val)
+{
+  char buf[64];
+  if (val < 1024) 
+    my_snprintf(buf, sizeof(buf), "%ld", val);
+  else if (val < 1024*1024*16)
+  {
+    /* Values less than 16MB are specified in KB for precision */
+    size_t len= my_snprintf(buf, sizeof(buf), "%ld", val/1024);
+    strcpy(buf + len, "Kb");
+  }
+  else
+  {
+    size_t len= my_snprintf(buf, sizeof(buf), "%ld", val/(1024*1024));
+    strcpy(buf + len, "Mb");
+  }
+  add_str(buf);
+}
+
+
 void Json_writer::add_double(double val)
 {
   char buf[64];
diff --git a/sql/my_json_writer.h b/sql/my_json_writer.h
index 3a7defc3566..8ab20b10d73 100644
--- a/sql/my_json_writer.h
+++ b/sql/my_json_writer.h
@@ -108,6 +108,7 @@ public:
   void add_str(const String &str);
 
   void add_ll(longlong val);
+  void add_size(longlong val);
   void add_double(double val);
   void add_bool(bool val);
   void add_null();
diff --git a/sql/sql_analyze_stmt.cc b/sql/sql_analyze_stmt.cc
new file mode 100644
index 00000000000..4874c33a544
--- /dev/null
+++ b/sql/sql_analyze_stmt.cc
@@ -0,0 +1,62 @@
+/*
+   Copyright (c) 2015 MariaDB Corporation Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include <my_global.h>
+#include "sql_priv.h"
+#include "sql_select.h"
+#include "my_json_writer.h"
+
+void Filesort_tracker::print_json(Json_writer *writer)
+{
+  const char *varied_str= "(varied across executions)";
+  writer->add_member("r_loops").add_ll(r_loops);
+  
+  if (r_limit != HA_POS_ERROR)
+  {
+    writer->add_member("r_limit");
+    if (r_limit == 0)
+      writer->add_str(varied_str);
+    else
+      writer->add_ll(rint(r_limit/r_loops));
+  }
+
+  writer->add_member("r_used_priority_queue"); 
+  if (r_used_pq == r_loops)
+    writer->add_bool(true);
+  else if (r_used_pq == 0)
+    writer->add_bool(false);
+  else
+    writer->add_str(varied_str);
+
+  writer->add_member("r_output_rows").add_ll(rint(r_output_rows / r_loops));
+
+  if (sort_passes)
+    writer->add_member("r_sort_passes").add_ll(rint(sort_passes / r_loops));
+
+  if (sort_buffer_size != 0)
+  {
+    writer->add_member("r_buffer_size");
+    if (sort_buffer_size == ulonglong(-1))
+      writer->add_str(varied_str);
+    else
+      writer->add_size(sort_buffer_size);
+  }
+}
+
diff --git a/sql/sql_analyze_stmt.h b/sql/sql_analyze_stmt.h
index d7a7f1337e0..abd4fc1ea4b 100644
--- a/sql/sql_analyze_stmt.h
+++ b/sql/sql_analyze_stmt.h
@@ -14,6 +14,29 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
+/*
+
+== ANALYZE-stmt classes ==
+
+This file contains classes for supporting "ANALYZE statement" feature. These are 
+a set of data structures that can be used to store the data about how the 
+statement executed.
+
+There are two kinds of data collection:
+
+1. Various counters. We assume that incrementing counters has very low
+overhead. Because of that, execution code increments counters unconditionally
+(even when not running "ANALYZE $statement" commands. You run regular SELECT/
+UPDATE/DELETE/etc and the counters are incremented).
+
+As a free bonus, this lets us print detailed information into the slow query
+log, should the query be slow.
+
+2. Timing data. Measuring the time it took to run parts of query has noticeable
+overhead. Because of that, we measure the time only when running "ANALYZE
+$stmt").
+
+*/
 
 /*
   A class for tracking time it takes to do a certain action
@@ -85,4 +108,285 @@ public:
   if (unlikely((tracker)->timed)) \
   { (tracker)->stop_tracking(); }
 
+/*
+  A class for collecting read statistics.
+  
+  The idea is that we run several scans. Each scans gets rows, and then filters
+  some of them out.  We count scans, rows, and rows left after filtering.
+
+  (note: at the moment, the class is not actually tied to a physical table. 
+   It can be used to track reading from files, buffers, etc).
+*/
+
+class Table_access_tracker 
+{
+public:
+  Table_access_tracker() :
+    r_scans(0), r_rows(0), /*r_rows_after_table_cond(0),*/
+    r_rows_after_where(0)
+  {}
+
+  ha_rows r_scans; /* How many scans were ran on this join_tab */
+  ha_rows r_rows; /* How many rows we've got after that */
+  ha_rows r_rows_after_where; /* Rows after applying attached part of WHERE */
+
+  bool has_scans() { return (r_scans != 0); }
+  ha_rows get_loops() { return r_scans; }
+  double get_avg_rows()
+  {
+    return r_scans ? ((double)r_rows / r_scans): 0;
+  }
+
+  double get_filtered_after_where()
+  {
+    double r_filtered;
+    if (r_rows > 0)
+      r_filtered= (double)r_rows_after_where / r_rows;
+    else
+      r_filtered= 1.0;
+
+    return r_filtered;
+  }
+  
+  inline void on_scan_init() { r_scans++; }
+  inline void on_record_read() { r_rows++; }
+  inline void on_record_after_where() { r_rows_after_where++; }
+};
+
+
+class Json_writer;
+
+/*
+  This stores the data about how filesort executed.
+
+  A few things from here (e.g. r_used_pq, r_limit) belong to the query plan,
+  however, these parameters are calculated right during the execution so we 
+  can't easily put them into the query plan.
+
+  The class is designed to handle multiple invocations of filesort().
+*/
+
+class Filesort_tracker : public Sql_alloc
+{
+public:
+  Filesort_tracker() :
+    r_loops(0), r_limit(0), r_used_pq(0), 
+    r_examined_rows(0), r_sorted_rows(0), r_output_rows(0),
+    sort_passes(0),
+    sort_buffer_size(0)
+  {}
+  
+  /* Functions that filesort uses to report various things about its execution */
+
+  inline void report_use(ha_rows r_limit_arg)
+  {
+    if (!r_loops++)
+      r_limit= r_limit_arg;
+    else
+      r_limit= (r_limit != r_limit_arg)? 0: r_limit_arg;
+  }
+  inline void incr_pq_used() { r_used_pq++; }
+
+  inline void report_row_numbers(ha_rows examined_rows, 
+                                 ha_rows sorted_rows,
+                                 ha_rows returned_rows) 
+  { 
+    r_examined_rows += examined_rows;
+    r_sorted_rows   += sorted_rows;
+    r_output_rows   += returned_rows;
+  }
+
+  inline void report_merge_passes_at_start(ulong passes)
+  {
+    sort_passes -= passes;
+  }
+  inline void report_merge_passes_at_end(ulong passes)
+  {
+    sort_passes += passes;
+  }
+
+  inline void report_sort_buffer_size(size_t bufsize)
+  {
+    if (sort_buffer_size)
+      sort_buffer_size= ulonglong(-1); // multiple buffers of different sizes
+    else
+      sort_buffer_size= bufsize;
+  }
+  
+  /* Functions to get the statistics */
+  void print_json(Json_writer *writer);
+  
+  ulonglong get_r_loops() { return r_loops; }
+  double get_avg_examined_rows() 
+  { 
+    return ((double)r_examined_rows) / r_loops;
+  }
+  double get_avg_returned_rows()
+  { 
+    return ((double)r_output_rows) / r_loops; 
+  }
+  double get_r_filtered()
+  {
+    if (r_examined_rows > 0)
+      return ((double)r_sorted_rows / r_examined_rows);
+    else
+      return 1.0;
+  }
+private:
+  ulonglong r_loops; /* How many times filesort was invoked */
+  /*
+    LIMIT is typically a constant. There is never "LIMIT 0".
+      HA_POS_ERROR means we never had a limit
+      0            means different values of LIMIT were used in 
+                   different filesort invocations
+      other value  means the same LIMIT value was used every time.
+  */
+  ulonglong r_limit;
+  ulonglong r_used_pq; /* How many times PQ was used */
+
+  /* How many rows were examined (before checking the select->cond) */
+  ulonglong r_examined_rows;
+  
+  /* 
+    How many rows were put into sorting (this is examined_rows minus rows that
+    didn't pass the WHERE condition)
+  */
+  ulonglong r_sorted_rows;
+
+  /*
+    How many rows were returned. This is equal to r_sorted_rows, unless there
+    was a LIMIT N clause in which case filesort would not have returned more
+    than N rows.
+  */
+  ulonglong r_output_rows;
+
+  /* How many sorts in total (divide by r_count to get the average) */
+  ulonglong sort_passes;
+  
+  /* 
+    0              - means not used (or not known 
+    (ulonglong)-1  - multiple
+    other          - value
+  */
+  ulonglong sort_buffer_size;
+};
+
+
+typedef enum 
+{
+  EXPL_NO_TMP_TABLE=0,
+  EXPL_TMP_TABLE_BUFFER,
+  EXPL_TMP_TABLE_GROUP,
+  EXPL_TMP_TABLE_DISTINCT
+} enum_tmp_table_use;
+
+
+typedef enum 
+{
+  EXPL_ACTION_FILESORT,
+  EXPL_ACTION_TEMPTABLE,
+  EXPL_ACTION_REMOVE_DUPS,
+} enum_qep_action;
+
+
+/*
+  This is to track how a JOIN object has resolved ORDER/GROUP BY/DISTINCT
+  
+  We are not tied to the query plan at all, because query plan does not have 
+  sufficient information. *A lot* of decisions about ordering/grouping are 
+  made at very late stages (in JOIN::exec, JOIN::init_execution, in
+  create_sort_index and even in create_tmp_table).
+
+  The idea is that operations that happen during select execution will report
+  themselves. We have these operations:
+  - Sorting with filesort()
+  - Duplicate row removal (the one done by remove_duplicates()).
+  - Use of temporary table to buffer the result.
+
+  There is also "Selection" operation, done by do_select(). It reads rows,
+  there are several distinct cases:
+   1. doing the join operation on the base tables
+   2. reading the temporary table
+   3. reading the filesort output
+  it would be nice to build execution graph, e.g.
+
+    Select(JOIN op) -> temp.table -> filesort -> Select(filesort result)
+
+  the problem is that there is no way to tell what a do_select() call will do.
+
+  Our solution is not to have explicit selection operations. We make these
+  assumptions about the query plan:
+  - Select(JOIN op) is the first operation in the query plan
+  - Unless the first recorded operation is filesort(). filesort() is unable 
+    read result of a select, so when we find it first, the query plan is:
+
+    filesort(first join table) -> Select(JOIN op) -> ...
+
+  the other popular query plan is:
+
+    Select (JOIN op) -> temp.table -> filesort() -> ...
+
+///TODO: handle repeated execution with subselects!
+*/
+
+class Sort_and_group_tracker : public Sql_alloc
+{
+  enum { MAX_QEP_ACTIONS = 5 };
+
+  /* Query actions in the order they were made */
+  enum_qep_action qep_actions[MAX_QEP_ACTIONS];
+  uint n_actions;
+  
+  /* 
+    Trackers for filesort operation. JOIN::exec() may need at most two sorting
+    operations.
+  */
+  Filesort_tracker filesort_tracker[2];
+  int cur_tracker;
+  
+  /* Information about temporary tables */
+  enum_tmp_table_use tmp_table_kind[2];
+  int cur_tmp_table;
+
+  friend class Explain_select;
+
+public:
+  Sort_and_group_tracker() : 
+    n_actions(0),
+    cur_tracker(0),
+    cur_tmp_table(0)
+  {}
+
+  /*************** Reporting interface ***************/
+  /* Report that join execution is started */
+  void report_join_start()
+  {
+    n_actions= 0;
+    cur_tracker= 0;
+    cur_tmp_table= 0;
+  }
+
+  /* Report that a temporary table is created. */
+  void report_tmp_table(TABLE *tbl)
+  {
+    DBUG_ASSERT(n_actions < MAX_QEP_ACTIONS);
+    qep_actions[n_actions++]= EXPL_ACTION_TEMPTABLE;
+
+    DBUG_ASSERT(cur_tmp_table < 2);
+    cur_tmp_table++;
+  }
+
+  /* Report that we are doing a filesort. */
+  Filesort_tracker *report_sorting()
+  {
+    DBUG_ASSERT(n_actions < MAX_QEP_ACTIONS);
+    qep_actions[n_actions++]= EXPL_ACTION_FILESORT;
+
+    DBUG_ASSERT(cur_tracker < 2);
+    return &filesort_tracker[cur_tracker++];
+  }
+
+  /*************** Statistics retrieval interface ***************/
+  //enum_tmp_table_use get_tmp_table_type() { return join_result_tmp_table; }
+};
 
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index fda74481f11..b149747649e 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -67,7 +67,8 @@ Explain_delete* Delete_plan::save_explain_delete_data(MEM_ROOT *mem_root, THD *t
   else
   {
     explain->deleting_all_rows= false;
-    Update_plan::save_explain_data_intern(mem_root, query, explain);
+    Update_plan::save_explain_data_intern(mem_root, explain, 
+                                          thd->lex->analyze_stmt);
   }
  
   query->add_upd_del_plan(explain);
@@ -81,15 +82,15 @@ Update_plan::save_explain_update_data(MEM_ROOT *mem_root, THD *thd)
   Explain_query *query= thd->lex->explain;
   Explain_update* explain= 
     new (mem_root) Explain_update(mem_root, thd->lex->analyze_stmt);
-  save_explain_data_intern(mem_root, query, explain);
+  save_explain_data_intern(mem_root, explain, thd->lex->analyze_stmt);
   query->add_upd_del_plan(explain);
   return explain;
 }
 
 
 void Update_plan::save_explain_data_intern(MEM_ROOT *mem_root,
-                                           Explain_query *query, 
-                                           Explain_update *explain)
+                                           Explain_update *explain,
+                                           bool is_analyze)
 {
   explain->select_type= "SIMPLE";
   explain->table_name.append(table->pos_in_table_list->alias);
@@ -109,6 +110,9 @@ void Update_plan::save_explain_data_intern(MEM_ROOT *mem_root,
     return;
   }
   
+  if (is_analyze)
+    table->file->set_time_tracker(&explain->table_tracker);
+
   select_lex->set_explain_type(TRUE);
   explain->select_type= select_lex->type;
   /* Partitions */
@@ -151,7 +155,9 @@ void Update_plan::save_explain_data_intern(MEM_ROOT *mem_root,
 
   explain->using_where= MY_TEST(select && select->cond);
   explain->where_cond= select? select->cond: NULL;
-  explain->using_filesort= using_filesort;
+
+  if (using_filesort)
+    explain->filesort_tracker= new (mem_root) Filesort_tracker;
   explain->using_io_buffer= using_io_buffer;
 
   append_possible_keys(mem_root, explain->possible_keys, table, 
@@ -486,13 +492,16 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
       table->sort.io_cache= (IO_CACHE *) my_malloc(sizeof(IO_CACHE),
                                                    MYF(MY_FAE | MY_ZEROFILL |
                                                        MY_THREAD_SPECIFIC));
-    
+      Filesort_tracker *fs_tracker= 
+        thd->lex->explain->get_upd_del_plan()->filesort_tracker;
+
       if (!(sortorder= make_unireg_sortorder(order, &length, NULL)) ||
 	  (table->sort.found_records= filesort(thd, table, sortorder, length,
                                                select, HA_POS_ERROR,
                                                true,
-                                               &examined_rows, &found_rows))
-	  == HA_POS_ERROR)
+                                               &examined_rows, &found_rows,
+                                               fs_tracker))
+	    == HA_POS_ERROR)
       {
         delete select;
         free_underlaid_joins(thd, &thd->lex->select_lex);
diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc
index 15f6753130c..3c1ebc288ab 100644
--- a/sql/sql_explain.cc
+++ b/sql/sql_explain.cc
@@ -743,11 +743,6 @@ void Explain_select::print_explain_json(Explain_query *query,
   }
   else
   {
-    /*
-       TODO: how does this approach allow to print ORDER BY members?
-         Explain_basic_join does not have ORDER/GROUP.
-         A: factor out join tab printing loop into a common func.
-    */
     writer->add_member("query_block").start_object();
     writer->add_member("select_id").add_ll(select_id);
      
@@ -761,8 +756,67 @@ void Explain_select::print_explain_json(Explain_query *query,
       writer->add_member("const_condition");
       write_item(writer, exec_const_cond);
     }
+     
+    Filesort_tracker *first_table_sort= false;
+    int started_objects= 0;
+
+    if (is_analyze)
+    {
+      /* ANALYZE has collected this part of query plan independently */
+      for (int i= ops_tracker.n_actions-1; i >= 0; i--)
+      {
+        if (ops_tracker.qep_actions[i] == EXPL_ACTION_FILESORT)
+        {
+          if (i == 0)
+          {
+            /* filesort operation was the first in the pipeline */
+            first_table_sort= &ops_tracker.filesort_tracker[0];
+            break;
+          }
+          writer->add_member("filesort").start_object();
+          started_objects++;
+        }
+        else if (ops_tracker.qep_actions[i] == EXPL_ACTION_TEMPTABLE)
+        {
+          writer->add_member("temporary_table").start_object();
+          started_objects++;
+          /*
+          if (tmp == EXPL_TMP_TABLE_BUFFER)
+            func= "buffer";
+          else if (tmp == EXPL_TMP_TABLE_GROUP)
+            func= "group-by";
+          else
+            func= "distinct";
+          writer->add_member("function").add_str(func);
+         */
+        }
+        else if (ops_tracker.qep_actions[i] == EXPL_ACTION_REMOVE_DUPS)
+        {
+          writer->add_member("duplicate_removal").start_object();
+          started_objects++;
+        }
+        else
+          DBUG_ASSERT(0);
+      }
+
+    }
+    else
+    {
+      /* This is just EXPLAIN. Try to produce something meaningful */
+      if (using_temporary)
+      {
+        started_objects= 1;
+        writer->add_member("temporary_table").start_object();
+        writer->add_member("function").add_str("buffer");
+      }
+    }
+      
+    Explain_basic_join::print_explain_json_interns(query, writer, is_analyze,
+                                                   first_table_sort);
+
+    for (;started_objects; started_objects--)
+      writer->end_object();
 
-    Explain_basic_join::print_explain_json_interns(query, writer, is_analyze);
     writer->end_object();
   }
 
@@ -776,24 +830,27 @@ void Explain_basic_join::print_explain_json(Explain_query *query,
   writer->add_member("query_block").start_object();
   writer->add_member("select_id").add_ll(select_id);
   
-  print_explain_json_interns(query, writer, is_analyze);
+  print_explain_json_interns(query, writer, is_analyze, NULL);
 
   writer->end_object();
 }
 
 
-void Explain_basic_join::print_explain_json_interns(Explain_query *query, 
-                                                    Json_writer *writer, 
-                                                    bool is_analyze)
+void Explain_basic_join::
+print_explain_json_interns(Explain_query *query, 
+                           Json_writer *writer, 
+                           bool is_analyze,
+                           Filesort_tracker *first_table_sort)
 {
   Json_writer_nesting_guard guard(writer);
   for (uint i=0; i< n_join_tabs; i++)
   {
     if (join_tabs[i]->start_dups_weedout)
       writer->add_member("duplicates_removal").start_object();
-    
-    join_tabs[i]->print_explain_json(query, writer, is_analyze);
-    
+
+    join_tabs[i]->print_explain_json(query, writer, is_analyze,
+                                     (i==0)? first_table_sort : NULL);
+
     if (join_tabs[i]->end_dups_weedout)
       writer->end_object();
   }
@@ -1230,11 +1287,47 @@ void add_json_keyset(Json_writer *writer, const char *elem_name,
 }
 
 
+/*
+  @param fs_tracker   Normally NULL. When not NULL, it means that the join tab
+                      used filesort.
+*/
+
 void Explain_table_access::print_explain_json(Explain_query *query,
                                               Json_writer *writer,
-                                              bool is_analyze)
+                                              bool is_analyze,
+                                              Filesort_tracker *fs_tracker)
 {
   Json_writer_nesting_guard guard(writer);
+  
+  if (fs_tracker)
+  {
+    /* filesort was invoked on this join tab before doing the join with the rest */
+    writer->add_member("read_sorted_file").start_object();
+    if (is_analyze)
+    {
+      writer->add_member("r_rows");
+      /*
+        r_rows when reading filesort result. This can be less than the number
+        of rows produced by filesort due to NL-join having LIMIT.
+      */
+      if (tracker.has_scans())
+        writer->add_double(tracker.get_avg_rows());
+      else
+        writer->add_null();
+
+      /* 
+        r_filtered when reading filesort result. We should have checked the
+        WHERE while doing filesort but lets check just in case.
+      */
+      if (tracker.has_scans() && tracker.get_filtered_after_where() < 1.0)
+      {
+        writer->add_member("r_filtered");
+        writer->add_double(tracker.get_filtered_after_where()*100.0);
+      }
+    }
+    writer->add_member("filesort").start_object();
+    fs_tracker->print_json(writer);
+  }
 
   if (bka_type.is_using_jbuf())
   {
@@ -1322,13 +1415,21 @@ void Explain_table_access::print_explain_json(Explain_query *query,
   if (is_analyze)
   {
     writer->add_member("r_rows");
-    if (tracker.has_scans())
+    if (fs_tracker)
     {
-      double avg_rows= tracker.get_avg_rows();
-      writer->add_double(avg_rows);
+      /* Get r_rows value from filesort */
+      if (fs_tracker->get_r_loops())
+        writer->add_double(fs_tracker->get_avg_examined_rows());
+      else
+        writer->add_null();
     }
     else
-      writer->add_null();
+    {
+      if (tracker.has_scans())
+        writer->add_double(tracker.get_avg_rows());
+      else
+        writer->add_null();
+    }
 
     if (op_tracker.get_loops())
     {
@@ -1345,10 +1446,22 @@ void Explain_table_access::print_explain_json(Explain_query *query,
   if (is_analyze)
   {
     writer->add_member("r_filtered");
-    if (tracker.has_scans())
-      writer->add_double(tracker.get_filtered_after_where()*100.0);
+    if (fs_tracker)
+    {
+      /* Get r_filtered value from filesort */
+      if (fs_tracker->get_r_loops())
+        writer->add_double(fs_tracker->get_r_filtered());
+      else
+        writer->add_null();
+    }
     else
-      writer->add_null();
+    {
+      /* Get r_filtered from the NL-join runtime */
+      if (tracker.has_scans())
+        writer->add_double(tracker.get_filtered_after_where()*100.0);
+      else
+        writer->add_null();
+    }
   }
 
   for (int i=0; i < (int)extra_tags.elements(); i++)
@@ -1414,6 +1527,12 @@ void Explain_table_access::print_explain_json(Explain_query *query,
     writer->end_object();
   }
 
+  if (fs_tracker)
+  {
+    writer->end_object(); // filesort
+    writer->end_object(); // read_sorted_file
+  }
+
   writer->end_object();
 }
 
@@ -1777,7 +1896,7 @@ int Explain_update::print_explain(Explain_query *query,
     extra_str.append(mrr_type);
   }
   
-  if (using_filesort)
+  if (is_using_filesort())
   {
     if (extra_str.length() !=0)
       extra_str.append(STRING_WITH_LEN("; "));
@@ -1825,7 +1944,14 @@ void Explain_update::print_explain_json(Explain_query *query,
 
   writer->add_member("query_block").start_object();
   writer->add_member("select_id").add_ll(1);
-
+ 
+  /* This is the total time it took to do the UPDATE/DELETE */
+  if (is_analyze && command_tracker.get_loops())
+  {
+    writer->add_member("r_total_time_ms").
+            add_double(command_tracker.get_time_ms());
+  }
+  
   if (impossible_where || no_partitions)
   {
     const char *msg= impossible_where ?  STR_IMPOSSIBLE_WHERE : 
@@ -1837,6 +1963,25 @@ void Explain_update::print_explain_json(Explain_query *query,
     return;
   }
 
+  DBUG_ASSERT(!(is_using_filesort() && using_io_buffer));
+  
+  bool doing_buffering= false;
+
+  if (is_using_filesort())
+  {
+    writer->add_member("filesort").start_object();
+    if (is_analyze)
+      filesort_tracker->print_json(writer);
+    doing_buffering= true;
+  }
+
+  if (using_io_buffer)
+  {
+    writer->add_member("buffer").start_object();
+    doing_buffering= true;
+  }
+
+  /* Produce elements that are common for buffered and un-buffered cases */
   writer->add_member("table").start_object();
 
   if (get_type() == EXPLAIN_UPDATE)
@@ -1898,50 +2043,58 @@ void Explain_update::print_explain_json(Explain_query *query,
     writer->end_object();
   }
   
-#if 0
-  /* `ref` */
-  if (!ref_list.is_empty())
-  {
-    List_iterator_fast<char> it(ref_list);
-    const char *str;
-    writer->add_member("ref").start_array();
-    while ((str= it++))
-      writer->add_str(str);
-    writer->end_array();
-  }
-#endif
-
   /* `rows` */
   writer->add_member("rows").add_ll(rows);
 
-  /* `r_rows` */
-  if (is_analyze && tracker.has_scans())
-  {
-    double avg_rows= tracker.get_avg_rows();
-    writer->add_member("r_rows").add_double(avg_rows);
-  }
- 
-  /* UPDATE/DELETE do not produce `filtered` estimate */
 
-  /* `r_filtered` */
+  if (mrr_type.length() != 0)
+    writer->add_member("mrr_type").add_str(mrr_type.ptr());
+
   if (is_analyze)
   {
-    double r_filtered= tracker.get_filtered_after_where() * 100.0;
-    writer->add_member("r_filtered").add_double(r_filtered);
-  }
+    if (doing_buffering)
+    {
+      ha_rows r_rows;
+      double r_filtered;
 
-  if (mrr_type.length() != 0)
-    writer->add_member("mrr_type").add_str(mrr_type.ptr());
-  
-  if (using_filesort)
-    writer->add_member("using_filesort").add_ll(1);
+      if (is_using_filesort())
+      {
+        if (filesort_tracker->get_r_loops())
+          r_rows= filesort_tracker->get_avg_examined_rows();
+        else
+          r_rows= 0;
+        r_filtered= filesort_tracker->get_r_filtered() * 100.0;
+      }
+      else
+      {
+        if (buf_tracker.has_scans())
+          r_rows= (ha_rows) buf_tracker.get_avg_rows();
+        else
+          r_rows= 0;
+        r_filtered= buf_tracker.get_filtered_after_where() * 100.0;
+      }
+      writer->add_member("r_rows").add_ll(r_rows);
+      writer->add_member("r_filtered").add_double(r_filtered);
+    }
+    else /* Not doing buffering */
+    {
+      writer->add_member("r_rows");
+      if (tracker.has_scans())
+        writer->add_double(tracker.get_avg_rows());
+      else
+        writer->add_null();
 
-  if (using_io_buffer)
-    writer->add_member("using_io_buffer").add_ll(1);
+      /* There is no 'filtered' estimate in UPDATE/DELETE atm */
+      double r_filtered= tracker.get_filtered_after_where() * 100.0;
+      writer->add_member("r_filtered").add_double(r_filtered);
+    }
 
-  if (is_analyze && command_tracker.get_loops())
-    writer->
-      add_member("r_total_time_ms").add_double(command_tracker.get_time_ms());
+    if (table_tracker.get_loops())
+    {
+      writer->add_member("r_total_time_ms").
+              add_double(table_tracker.get_time_ms());
+    }
+  }
 
   if (where_cond)
   {
@@ -1949,7 +2102,15 @@ void Explain_update::print_explain_json(Explain_query *query,
     write_item(writer, where_cond);
   }
 
+  /*** The part of plan that is before the buffering/sorting ends here ***/
+  if (is_using_filesort())
+    writer->end_object();
+
+  if (using_io_buffer)
+    writer->end_object();
+
   writer->end_object(); // table
+
   print_explain_json_for_children(query, writer, is_analyze);
   writer->end_object(); // query_block
 }
@@ -2105,3 +2266,4 @@ void Explain_range_checked_fer::print_json(Json_writer *writer,
     writer->end_object();
   }
 }
+
diff --git a/sql/sql_explain.h b/sql/sql_explain.h
index 9c6171aa6d0..2513e03cf1c 100644
--- a/sql/sql_explain.h
+++ b/sql/sql_explain.h
@@ -59,51 +59,6 @@ public:
 
 class Json_writer;
 
-/*
-  A class for collecting read statistics.
-  
-  The idea is that we run several scans. Each scans gets rows, and then filters
-  some of them out.  We count scans, rows, and rows left after filtering.
-
-  (note: at the moment, the class is not actually tied to a physical table. 
-   It can be used to track reading from files, buffers, etc).
-*/
-
-class Table_access_tracker 
-{
-public:
-  Table_access_tracker() :
-    r_scans(0), r_rows(0), /*r_rows_after_table_cond(0),*/
-    r_rows_after_where(0)
-  {}
-
-  ha_rows r_scans; /* How many scans were ran on this join_tab */
-  ha_rows r_rows; /* How many rows we've got after that */
-  ha_rows r_rows_after_where; /* Rows after applying attached part of WHERE */
-
-  bool has_scans() { return (r_scans != 0); }
-  ha_rows get_loops() { return r_scans; }
-  double get_avg_rows()
-  {
-    return r_scans ? ((double)r_rows / r_scans): 0;
-  }
-
-  double get_filtered_after_where()
-  {
-    double r_filtered;
-    if (r_rows > 0)
-      r_filtered= (double)r_rows_after_where / r_rows;
-    else
-      r_filtered= 1.0;
-
-    return r_filtered;
-  }
-  
-  inline void on_scan_init() { r_scans++; }
-  inline void on_record_read() { r_rows++; }
-  inline void on_record_after_where() { r_rows_after_where++; }
-};
-
 /**************************************************************************************
  
   Data structures for producing EXPLAIN outputs.
@@ -212,7 +167,8 @@ public:
                           bool is_analyze);
 
   void print_explain_json_interns(Explain_query *query, Json_writer *writer,
-                                  bool is_analyze);
+                                  bool is_analyze, 
+                                  Filesort_tracker *first_table_sort);
 
   /* A flat array of Explain structs for tables. */
   Explain_table_access** join_tabs;
@@ -271,6 +227,8 @@ public:
 
   /* ANALYZE members */
   Time_and_counter_tracker time_tracker;
+
+  Sort_and_group_tracker  ops_tracker;
   
   int print_explain(Explain_query *query, select_result_sink *output, 
                     uint8 explain_flags, bool is_analyze);
@@ -295,9 +253,9 @@ private:
 class Explain_union : public Explain_node
 {
 public:
-  Explain_union(MEM_ROOT *root) : 
-  Explain_node(root),
-  time_tracker(false)
+  Explain_union(MEM_ROOT *root, bool is_analyze) : 
+    Explain_node(root),
+    fake_select_lex_explain(root, is_analyze)
   {}
 
   enum explain_node_type get_type() { return EXPLAIN_UNION; }
@@ -332,8 +290,13 @@ public:
   const char *fake_select_type;
   bool using_filesort;
   bool using_tmp;
-  /* TODO: the below is not printed yet:*/
-  Time_and_counter_tracker time_tracker; 
+  
+  /*
+    Explain data structure for "fake_select_lex" (i.e. for the degenerate
+    SELECT that reads UNION result).
+    It doesn't have a query plan, but we still need execution tracker, etc.
+  */
+  Explain_select fake_select_lex_explain;
 
   Table_access_tracker *get_fake_select_lex_tracker()
   {
@@ -729,13 +692,13 @@ public:
   Table_access_tracker tracker;
   Exec_time_tracker op_tracker;
   Table_access_tracker jbuf_tracker;
-
+  
   int print_explain(select_result_sink *output, uint8 explain_flags, 
                     bool is_analyze,
                     uint select_id, const char *select_type,
                     bool using_temporary, bool using_filesort);
   void print_explain_json(Explain_query *query, Json_writer *writer,
-                          bool is_analyze);
+                          bool is_analyze, Filesort_tracker *fs_tracker);
 
 private:
   void append_tag_name(String *str, enum explain_extra_tag tag);
@@ -759,6 +722,7 @@ public:
 
   Explain_update(MEM_ROOT *root, bool is_analyze) : 
     Explain_node(root),
+    filesort_tracker(NULL),
     command_tracker(is_analyze)
   {}
 
@@ -793,15 +757,30 @@ public:
 
   ha_rows rows;
 
-  bool using_filesort;
   bool using_io_buffer;
 
+  /* Tracker for doing reads when filling the buffer */
+  Table_access_tracker buf_tracker;
+  
+  bool is_using_filesort() { return filesort_tracker? true: false; }
+  /*
+    Non-null value of filesort_tracker means "using filesort"
+
+    if we are using filesort, then table_tracker is for the io done inside
+    filesort.
+    
+    'tracker' is for tracking post-filesort reads.
+  */
+  Filesort_tracker *filesort_tracker;
+
   /* ANALYZE members and methods */
   Table_access_tracker tracker;
 
   /* This tracks execution of the whole command */
   Time_and_counter_tracker command_tracker;
-  //psergey-todo: io-tracker here.
+  
+  /* TODO: This tracks time to read rows from the table */
+  Exec_time_tracker table_tracker;
 
   virtual int print_explain(Explain_query *query, select_result_sink *output, 
                             uint8 explain_flags, bool is_analyze);
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index c30fcd95d73..e5d261e02d4 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -4331,7 +4331,9 @@ void LEX::restore_set_statement_var()
 int st_select_lex_unit::save_union_explain(Explain_query *output)
 {
   SELECT_LEX *first= first_select();
-  Explain_union *eu= new (output->mem_root) Explain_union(output->mem_root);
+  Explain_union *eu= 
+    new (output->mem_root) Explain_union(output->mem_root, 
+                                         thd->lex->analyze_stmt);
 
   if (derived)
     eu->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
@@ -4374,12 +4376,7 @@ int st_select_lex_unit::save_union_explain_part2(Explain_query *output)
         eu->add_child(unit->first_select()->select_number);
       }
     }
-
-    /* 
-      Having a time tracker for reading UNION result is not very interesting
-      but is easier, as JOIN::exec now relies on having a tracker.
-    */
-    fake_select_lex->join->tracker= &eu->time_tracker;
+    fake_select_lex->join->explain= &eu->fake_select_lex_explain;
   }
   return 0;
 }
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 6ad0760c45c..64b07b1927d 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -2335,8 +2335,7 @@ public:
 
   Explain_update* save_explain_update_data(MEM_ROOT *mem_root, THD *thd);
 protected:
-  void save_explain_data_intern(MEM_ROOT *mem_root, Explain_query *query,
-                                Explain_update *eu);
+  void save_explain_data_intern(MEM_ROOT *mem_root, Explain_update *eu, bool is_analyze);
 public:
   virtual ~Update_plan() {}
 
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 207bee922e8..e7c2e91a15c 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1988,7 +1988,7 @@ int JOIN::init_execution()
 			   group_list && simple_group,
 			   select_options, tmp_rows_limit, "")))
       DBUG_RETURN(1);
-
+    explain->ops_tracker.report_tmp_table(exec_tmp_table1);
     /*
       We don't have to store rows in temp table that doesn't match HAVING if:
       - we are sorting the table and writing complete group rows to the
@@ -2378,8 +2378,8 @@ void JOIN::save_explain_data(Explain_query *output, bool can_overwrite,
     */
     uint nr= select_lex->master_unit()->first_select()->select_number;
     Explain_union *eu= output->get_union(nr);
+    explain= &eu->fake_select_lex_explain;
     join_tab[0].tracker= eu->get_fake_select_lex_tracker();
-    tracker= &eu->time_tracker;
   }
 }
 
@@ -2392,9 +2392,10 @@ void JOIN::exec()
                                                select_lex->select_number))
                         dbug_serve_apcs(thd, 1);
                  );
-  ANALYZE_START_TRACKING(tracker);
+  ANALYZE_START_TRACKING(&explain->time_tracker);
+  explain->ops_tracker.report_join_start();
   exec_inner();
-  ANALYZE_STOP_TRACKING(tracker);
+  ANALYZE_STOP_TRACKING(&explain->time_tracker);
 
   DBUG_EXECUTE_IF("show_explain_probe_join_exec_end", 
                   if (dbug_user_var_equals_int(thd, 
@@ -2768,6 +2769,7 @@ void JOIN::exec_inner()
 						HA_POS_ERROR, "")))
 	  DBUG_VOID_RETURN;
 	curr_join->exec_tmp_table2= exec_tmp_table2;
+        explain->ops_tracker.report_tmp_table(exec_tmp_table2);
       }
       if (curr_join->group_list)
       {
@@ -2869,6 +2871,7 @@ void JOIN::exec_inner()
       curr_join->select_distinct=0;
     }
     curr_tmp_table->reginfo.lock_type= TL_UNLOCK;
+    // psergey-todo: here is one place where we switch to
     if (curr_join->make_simple_join(this, curr_tmp_table))
       DBUG_VOID_RETURN;
     calc_group_buffer(curr_join, curr_join->group_list);
@@ -3057,7 +3060,6 @@ void JOIN::exec_inner()
                           curr_join->table_count,
                           (int) curr_join->select_limit,
                           (int) unit->select_limit_cnt));
-
       if (create_sort_index(thd,
                             curr_join,
                             order_arg,
@@ -20931,7 +20933,8 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     table->file->info(HA_STATUS_VARIABLE);	// Get record count
   filesort_retval= filesort(thd, table, join->sortorder, length,
                             select, filesort_limit, 0,
-                            &examined_rows, &found_rows);
+                            &examined_rows, &found_rows, 
+                            join->explain->ops_tracker.report_sorting());
   table->sort.found_records= filesort_retval;
   tab->records= found_rows;                     // For SQL_CALC_ROWS
 
@@ -23442,10 +23445,10 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab
   
   tab->tracker= &eta->tracker;
   tab->jbuf_tracker= &eta->jbuf_tracker;
-  
+
   /* Enable the table access time tracker only for "ANALYZE stmt" */
   if (thd->lex->analyze_stmt)
-    tab->table->file->tracker= &eta->op_tracker;
+    tab->table->file->set_time_tracker(&eta->op_tracker);
 
   /* No need to save id and select_type here, they are kept in Explain_select */
 
@@ -23849,7 +23852,6 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
                                    bool need_order, bool distinct, 
                                    const char *message)
 {
-  Explain_node *explain_node= 0;
   JOIN *join= this; /* Legacy: this code used to be a non-member function */
   int error= 0;
   DBUG_ENTER("JOIN::save_explain_data_intern");
@@ -23864,33 +23866,32 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
   DBUG_ASSERT(!join->select_lex->master_unit()->derived ||
               join->select_lex->master_unit()->derived->is_materialized_derived());
 
+  explain= NULL;
+
   /* Don't log this into the slow query log */
 
   if (message)
   {
-    Explain_select *xpl_sel;
-    explain_node= xpl_sel= 
-      new (output->mem_root) Explain_select(output->mem_root, 
-                                            thd->lex->analyze_stmt);
+    explain= new (output->mem_root) Explain_select(output->mem_root, 
+                                                   thd->lex->analyze_stmt);
     join->select_lex->set_explain_type(true);
 
-    xpl_sel->select_id= join->select_lex->select_number;
-    xpl_sel->select_type= join->select_lex->type;
-    xpl_sel->message= message;
-    tracker= &xpl_sel->time_tracker;
+    explain->select_id= join->select_lex->select_number;
+    explain->select_type= join->select_lex->type;
+    /* Setting explain->message means that all other members are invalid */
+    explain->message= message;
+
     if (select_lex->master_unit()->derived)
-      xpl_sel->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
-    /* Setting xpl_sel->message means that all other members are invalid */
-    output->add_node(xpl_sel);
+      explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
+    output->add_node(explain);
   }
   else
   {
     Explain_select *xpl_sel;
-    explain_node= xpl_sel= 
+    explain= xpl_sel= 
       new (output->mem_root) Explain_select(output->mem_root, 
                                             thd->lex->analyze_stmt);
     table_map used_tables=0;
-    tracker= &xpl_sel->time_tracker;
 
     join->select_lex->set_explain_type(true);
     xpl_sel->select_id= join->select_lex->select_number;
@@ -23986,13 +23987,12 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
     if (!(unit->item && unit->item->eliminated) &&                    // (1)
         (!unit->derived || unit->derived->is_materialized_derived())) // (2)
     {
-      explain_node->add_child(unit->first_select()->select_number);
+      explain->add_child(unit->first_select()->select_number);
     }
   }
 
   if (!error && select_lex->is_top_level_node())
     output->query_plan_ready();
-    
 
   DBUG_RETURN(error);
 }
diff --git a/sql/sql_select.h b/sql/sql_select.h
index c1db919ae2c..33468dcfdc7 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1225,7 +1225,8 @@ public:
   /** Is set if we have a GROUP BY and we have ORDER BY on a constant. */
   bool          skip_sort_order;
 
-  bool need_tmp, hidden_group_fields;
+  bool need_tmp; 
+  bool hidden_group_fields;
   /* TRUE if there was full cleunap of the JOIN */
   bool cleaned;
   DYNAMIC_ARRAY keyuse;
@@ -1282,7 +1283,7 @@ public:
   bool optimized; ///< flag to avoid double optimization in EXPLAIN
   bool initialized; ///< flag to avoid double init_execution calls
 
-  Time_and_counter_tracker *tracker;
+  Explain_select *explain;
   
   enum { QEP_NOT_PRESENT_YET, QEP_AVAILABLE, QEP_DELETED} have_query_plan;
 
@@ -1377,7 +1378,7 @@ public:
     no_rows_in_result_called= 0;
     positions= best_positions= 0;
 
-    tracker= NULL;
+    explain= NULL;
 
     all_fields= fields_arg;
     if (&fields_list != &fields_arg)      /* Avoid valgrind-warning */
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 4550b10c40c..bc25951bafa 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -9340,6 +9340,7 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
       tables.db= from->s->db.str;
 
       THD_STAGE_INFO(thd, stage_sorting);
+      Filesort_tracker dummy_tracker;
       if (thd->lex->select_lex.setup_ref_array(thd, order_num) ||
           setup_order(thd, thd->lex->select_lex.ref_pointer_array,
                       &tables, fields, all_fields, order) ||
@@ -9347,7 +9348,8 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
           (from->sort.found_records= filesort(thd, from, sortorder, length,
                                               NULL, HA_POS_ERROR,
                                               true,
-                                              &examined_rows, &found_rows)) ==
+                                              &examined_rows, &found_rows,
+                                              &dummy_tracker)) ==
           HA_POS_ERROR)
         goto err;
     }
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index be2a73dabb7..2ac83773c34 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -969,7 +969,7 @@ bool st_select_lex_unit::exec()
             1st execution sets certain members (e.g. select_result) to perform
             subquery execution rather than EXPLAIN line production. In order 
             to reset them back, we re-do all of the actions (yes it is ugly):
-          */
+          */ // psergey-todo: is the above really necessary anymore?? 
 	  join->init(thd, item_list, fake_select_lex->options, result);
           saved_error= mysql_select(thd, &fake_select_lex->ref_pointer_array,
                                 &result_table_list,
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 20a9a1fa915..cf6a0f0f140 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -554,11 +554,15 @@ int mysql_update(THD *thd,
       table->sort.io_cache = (IO_CACHE *) my_malloc(sizeof(IO_CACHE),
 						    MYF(MY_FAE | MY_ZEROFILL |
                                                         MY_THREAD_SPECIFIC));
+      Filesort_tracker *fs_tracker= 
+        thd->lex->explain->get_upd_del_plan()->filesort_tracker;
+
       if (!(sortorder=make_unireg_sortorder(order, &length, NULL)) ||
           (table->sort.found_records= filesort(thd, table, sortorder, length,
                                                select, limit,
                                                true,
-                                               &examined_rows, &found_rows))
+                                               &examined_rows, &found_rows,
+                                               fs_tracker))
           == HA_POS_ERROR)
       {
 	goto err;
@@ -578,7 +582,7 @@ int mysql_update(THD *thd,
 	we go trough the matching rows, save a pointer to them and
 	update these in a separate loop based on the pointer.
       */
-
+      explain->buf_tracker.on_scan_init();
       IO_CACHE tempfile;
       if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX,
 			   DISK_BUFFER_SIZE, MYF(MY_WME)))
@@ -619,6 +623,7 @@ int mysql_update(THD *thd,
 
       while (!(error=info.read_record(&info)) && !thd->killed)
       {
+        explain->buf_tracker.on_record_read();
         if (table->vfield)
           update_virtual_fields(thd, table,
                                 table->triggers ? VCOL_UPDATE_ALL :
@@ -629,6 +634,7 @@ int mysql_update(THD *thd,
           if (table->file->was_semi_consistent_read())
 	    continue;  /* repeat the read of the same row if it still exists */
 
+          explain->buf_tracker.on_record_after_where();
 	  table->file->position(table->record[0]);
 	  if (my_b_write(&tempfile,table->file->ref,
 			 table->file->ref_length))
author	Sergei Petrunia <psergey@askmonty.org>	2015-04-12 04:48:42 +0300
committer	Sergei Petrunia <psergey@askmonty.org>	2015-04-12 04:48:42 +0300
commit	4938b822634b173c0d7ef882f721b553b223fadd (patch)
tree	f7fc465829170e1afb3816e940e4b8304502ff28
parent	66ff1632f53ef2378c2f2546e0716547ee5d3217 (diff)
download	mariadb-git-4938b822634b173c0d7ef882f721b553b223fadd.tar.gz