summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/include/unique_packed_eits.inc63
-rw-r--r--mysql-test/main/unique_packed.result599
-rw-r--r--mysql-test/main/unique_packed.test251
-rw-r--r--sql/field.h27
-rw-r--r--sql/filesort.cc195
-rw-r--r--sql/item_jsonfunc.cc82
-rw-r--r--sql/item_jsonfunc.h1
-rw-r--r--sql/item_sum.cc373
-rw-r--r--sql/item_sum.h15
-rw-r--r--sql/opt_range.cc4
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_delete.cc6
-rw-r--r--sql/sql_sort.h17
-rw-r--r--sql/sql_statistics.cc114
-rw-r--r--sql/uniques.cc179
-rw-r--r--sql/uniques.h107
16 files changed, 1918 insertions, 116 deletions
diff --git a/mysql-test/include/unique_packed_eits.inc b/mysql-test/include/unique_packed_eits.inc
new file mode 100644
index 00000000000..ebcf3b304a1
--- /dev/null
+++ b/mysql-test/include/unique_packed_eits.inc
@@ -0,0 +1,63 @@
+--echo #
+--echo # Tests below show the avg_frequency calculated during EITS collection and
+--echo # we can compare that value by calculating distinct values for the same columns
+--echo #
+
+
+--echo #
+--echo # Running ANALYZE for customer table
+--echo #
+
+ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES ();
+
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey');
+
+SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone)
+FROM customer;
+
+--echo #
+--echo # Running ANALYZE for lineitem table
+--echo #
+
+ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES ();
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct');
+
+SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem;
+
+--echo #
+--echo # Running ANALYZE for nation table
+--echo #
+
+ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES ();
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey');
+
+SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation;
+
+--echo #
+--echo # Running ANALYZE for part table
+--echo #
+
+ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES ();
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr');
+
+SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part;
+
+--echo #
+--echo # Running ANALYZE for supplier table
+--echo #
+
+ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES ();
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone');
+
+SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone)
+FROM supplier;
diff --git a/mysql-test/main/unique_packed.result b/mysql-test/main/unique_packed.result
new file mode 100644
index 00000000000..b7b2b182b1c
--- /dev/null
+++ b/mysql-test/main/unique_packed.result
@@ -0,0 +1,599 @@
+create database dbt3;
+use dbt3;
+SET @save_tmp_table_size= @@tmp_table_size;
+CREATE VIEW v1 AS SELECT DISTINCT p_mfgr, p_brand FROM part;
+#
+# The two queries should return the same result
+#
+SELECT GROUP_CONCAT(p_brand order by p_brand) FROM v1 GROUP BY p_mfgr;
+GROUP_CONCAT(p_brand order by p_brand)
+Brand#11,Brand#12,Brand#13,Brand#14,Brand#15
+Brand#21,Brand#22,Brand#23,Brand#24,Brand#25
+Brand#31,Brand#32,Brand#33,Brand#34,Brand#35
+Brand#41,Brand#42,Brand#43,Brand#44,Brand#45
+Brand#51,Brand#52,Brand#53,Brand#54,Brand#55
+SELECT GROUP_CONCAT(DISTINCT p_brand) FROM part GROUP BY p_mfgr;
+GROUP_CONCAT(DISTINCT p_brand)
+Brand#11,Brand#12,Brand#13,Brand#14,Brand#15
+Brand#21,Brand#22,Brand#23,Brand#24,Brand#25
+Brand#31,Brand#32,Brand#33,Brand#34,Brand#35
+Brand#41,Brand#42,Brand#43,Brand#44,Brand#45
+Brand#51,Brand#52,Brand#53,Brand#54,Brand#55
+#
+# The two queries should return the same result
+#
+SELECT COUNT(*) FROM v1 GROUP BY p_mfgr;
+COUNT(*)
+5
+5
+5
+5
+5
+SELECT COUNT(DISTINCT p_brand) FROM part GROUP BY p_mfgr;
+COUNT(DISTINCT p_brand)
+5
+5
+5
+5
+5
+DROP VIEW v1;
+CREATE VIEW v1 AS
+SELECT DISTINCT p_mfgr, p_name FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey;
+SELECT p_mfgr, COUNT(*) FROM v1 GROUP BY p_mfgr;
+p_mfgr COUNT(*)
+Manufacturer#1 40
+Manufacturer#2 36
+Manufacturer#3 45
+Manufacturer#4 37
+Manufacturer#5 42
+#
+# The two queries should return the same result as the above one
+#
+SELECT p_mfgr, COUNT(DISTINCT p_name) FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey
+GROUP BY p.p_mfgr;
+p_mfgr COUNT(DISTINCT p_name)
+Manufacturer#1 40
+Manufacturer#2 36
+Manufacturer#3 45
+Manufacturer#4 37
+Manufacturer#5 42
+SET tmp_table_size=1024;
+#
+# merge_walk() get called, that is merging of sorted chunks of file happens
+#
+SELECT p_mfgr, COUNT(DISTINCT p_name) FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey
+GROUP BY p.p_mfgr;
+p_mfgr COUNT(DISTINCT p_name)
+Manufacturer#1 40
+Manufacturer#2 36
+Manufacturer#3 45
+Manufacturer#4 37
+Manufacturer#5 42
+SET tmp_table_size = @save_tmp_table_size;
+DROP VIEW v1;
+# The result should be 200
+SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p;
+COUNT(DISTINCT p_name)
+200
+#
+# merge_many_buffers get called, followed by merge_walk()
+#
+SET tmp_table_size=1024;
+SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p;
+COUNT(DISTINCT p_name)
+200
+SET tmp_table_size = @save_tmp_table_size;
+CREATE VIEW v1 AS SELECT DISTINCT n_name, c_name FROM customer, orders, nation
+WHERE o_custkey= c_custkey and c_nationkey= n_nationkey;
+#
+# The two queries should return the same result
+#
+SELECT n_name, GROUP_CONCAT(c_name), COUNT(c_name) FROM v1 GROUP BY n_name;
+n_name GROUP_CONCAT(c_name) COUNT(c_name)
+ALGERIA Customer#000000086,Customer#000000073,Customer#000000080,Customer#000000076,Customer#000000029 5
+ARGENTINA Customer#000000059,Customer#000000014,Customer#000000106 3
+BRAZIL Customer#000000101,Customer#000000047,Customer#000000092,Customer#000000017 4
+CANADA Customer#000000005,Customer#000000040,Customer#000000122,Customer#000000013,Customer#000000022,Customer#000000064,Customer#000000146,Customer#000000023 8
+CHINA Customer#000000019,Customer#000000082,Customer#000000007,Customer#000000124,Customer#000000118 5
+EGYPT Customer#000000128,Customer#000000074,Customer#000000140,Customer#000000004 4
+ETHIOPIA Customer#000000085,Customer#000000010 2
+FRANCE Customer#000000046,Customer#000000050 2
+GERMANY Customer#000000136,Customer#000000062,Customer#000000071,Customer#000000119 4
+INDIA Customer#000000115,Customer#000000028,Customer#000000091,Customer#000000037 4
+INDONESIA Customer#000000094,Customer#000000142,Customer#000000067,Customer#000000130,Customer#000000103,Customer#000000139 6
+IRAN Customer#000000055,Customer#000000041,Customer#000000110,Customer#000000049,Customer#000000056,Customer#000000104,Customer#000000016 7
+IRAQ Customer#000000134,Customer#000000131,Customer#000000052,Customer#000000148 4
+JAPAN Customer#000000038,Customer#000000025,Customer#000000068,Customer#000000098,Customer#000000113 5
+JORDAN Customer#000000145,Customer#000000002,Customer#000000058 3
+KENYA Customer#000000089 1
+MOROCCO Customer#000000107,Customer#000000032,Customer#000000053,Customer#000000095,Customer#000000034,Customer#000000001,Customer#000000079 7
+MOZAMBIQUE Customer#000000088,Customer#000000109,Customer#000000143,Customer#000000116,Customer#000000137,Customer#000000044 6
+PERU Customer#000000121,Customer#000000061,Customer#000000097,Customer#000000035,Customer#000000008,Customer#000000077,Customer#000000133 7
+ROMANIA Customer#000000149,Customer#000000043,Customer#000000125,Customer#000000112 4
+RUSSIA Customer#000000026,Customer#000000020,Customer#000000083,Customer#000000070 4
+SAUDI ARABIA Customer#000000100 1
+UNITED KINGDOM Customer#000000011,Customer#000000031,Customer#000000065 3
+VIETNAM Customer#000000127 1
+SELECT n_name, GROUP_CONCAT(DISTINCT c_name), COUNT(DISTINCT c_name)
+FROM customer, orders, nation
+WHERE o_custkey= c_custkey and c_nationkey= n_nationkey GROUP BY n_name;
+n_name GROUP_CONCAT(DISTINCT c_name) COUNT(DISTINCT c_name)
+ALGERIA Customer#000000029,Customer#000000073,Customer#000000076,Customer#000000080,Customer#000000086 5
+ARGENTINA Customer#000000014,Customer#000000059,Customer#000000106 3
+BRAZIL Customer#000000017,Customer#000000047,Customer#000000092,Customer#000000101 4
+CANADA Customer#000000005,Customer#000000013,Customer#000000022,Customer#000000023,Customer#000000040,Customer#000000064,Customer#000000122,Customer#000000146 8
+CHINA Customer#000000007,Customer#000000019,Customer#000000082,Customer#000000118,Customer#000000124 5
+EGYPT Customer#000000004,Customer#000000074,Customer#000000128,Customer#000000140 4
+ETHIOPIA Customer#000000010,Customer#000000085 2
+FRANCE Customer#000000046,Customer#000000050 2
+GERMANY Customer#000000062,Customer#000000071,Customer#000000119,Customer#000000136 4
+INDIA Customer#000000028,Customer#000000037,Customer#000000091,Customer#000000115 4
+INDONESIA Customer#000000067,Customer#000000094,Customer#000000103,Customer#000000130,Customer#000000139,Customer#000000142 6
+IRAN Customer#000000016,Customer#000000041,Customer#000000049,Customer#000000055,Customer#000000056,Customer#000000104,Customer#000000110 7
+IRAQ Customer#000000052,Customer#000000131,Customer#000000134,Customer#000000148 4
+JAPAN Customer#000000025,Customer#000000038,Customer#000000068,Customer#000000098,Customer#000000113 5
+JORDAN Customer#000000002,Customer#000000058,Customer#000000145 3
+KENYA Customer#000000089 1
+MOROCCO Customer#000000001,Customer#000000032,Customer#000000034,Customer#000000053,Customer#000000079,Customer#000000095,Customer#000000107 7
+MOZAMBIQUE Customer#000000044,Customer#000000088,Customer#000000109,Customer#000000116,Customer#000000137,Customer#000000143 6
+PERU Customer#000000008,Customer#000000035,Customer#000000061,Customer#000000077,Customer#000000097,Customer#000000121,Customer#000000133 7
+ROMANIA Customer#000000043,Customer#000000112,Customer#000000125,Customer#000000149 4
+RUSSIA Customer#000000020,Customer#000000026,Customer#000000070,Customer#000000083 4
+SAUDI ARABIA Customer#000000100 1
+UNITED KINGDOM Customer#000000011,Customer#000000031,Customer#000000065 3
+VIETNAM Customer#000000127 1
+DROP VIEW v1;
+#
+# Tests for packed unique during EITS collection
+#
+SET @save_histogram_size= @@histogram_size;
+SET histogram_size=0;
+#
+# Testing when histograms are not created, this tests the function count_distinct_single_occurence_walk
+# passed as a callback function to walk
+#
+#
+# Tests below show the avg_frequency calculated during EITS collection and
+# we can compare that value by calculating distinct values for the same columns
+#
+#
+# Running ANALYZE for customer table
+#
+ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.customer analyze status Engine-independent statistics collected
+dbt3.customer analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey');
+column_name avg_frequency
+c_name 1.0000
+c_nationkey 6.0000
+c_phone 1.0000
+SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone)
+FROM customer;
+COUNT(*) COUNT(DISTINCT c_name) COUNT(DISTINCT c_nationkey) COUNT(DISTINCT c_phone)
+150 150 25 150
+#
+# Running ANALYZE for lineitem table
+#
+ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.lineitem analyze status Engine-independent statistics collected
+dbt3.lineitem analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct');
+column_name avg_frequency
+l_shipDATE 2.6500
+l_shipinstruct 1501.2500
+SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem;
+COUNT(*) COUNT(DISTINCT l_shipDATE) COUNT(DISTINCT l_shipinstruct)
+6005 2266 4
+#
+# Running ANALYZE for nation table
+#
+ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.nation analyze status Engine-independent statistics collected
+dbt3.nation analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey');
+column_name avg_frequency
+n_name 1.0000
+n_regionkey 5.0000
+SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation;
+COUNT(*) COUNT(DISTINCT n_name) COUNT(DISTINCT n_regionkey)
+25 25 5
+#
+# Running ANALYZE for part table
+#
+ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.part analyze status Engine-independent statistics collected
+dbt3.part analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr');
+column_name avg_frequency
+p_name 1.0000
+p_mfgr 40.0000
+SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part;
+COUNT(*) COUNT(DISTINCT p_name) COUNT(DISTINCT p_mfgr)
+200 200 5
+#
+# Running ANALYZE for supplier table
+#
+ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.supplier analyze status Engine-independent statistics collected
+dbt3.supplier analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone');
+column_name avg_frequency
+s_name 1.0000
+s_phone 1.0000
+s_acctbal 1.0000
+SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone)
+FROM supplier;
+COUNT(*) COUNT(DISTINCT s_acctbal) COUNT(DISTINCT s_name) COUNT(DISTINCT s_phone)
+10 10 10 10
+#
+# Testing when histograms are created,this tests the function histogram_build_walk
+# passed as a callback function to walk
+#
+SET histogram_size=255;
+#
+# Tests below show the avg_frequency calculated during EITS collection and
+# we can compare that value by calculating distinct values for the same columns
+#
+#
+# Running ANALYZE for customer table
+#
+ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.customer analyze status Engine-independent statistics collected
+dbt3.customer analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey');
+column_name avg_frequency
+c_name 1.0000
+c_nationkey 6.0000
+c_phone 1.0000
+SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone)
+FROM customer;
+COUNT(*) COUNT(DISTINCT c_name) COUNT(DISTINCT c_nationkey) COUNT(DISTINCT c_phone)
+150 150 25 150
+#
+# Running ANALYZE for lineitem table
+#
+ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.lineitem analyze status Engine-independent statistics collected
+dbt3.lineitem analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct');
+column_name avg_frequency
+l_shipDATE 2.6500
+l_shipinstruct 1501.2500
+SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem;
+COUNT(*) COUNT(DISTINCT l_shipDATE) COUNT(DISTINCT l_shipinstruct)
+6005 2266 4
+#
+# Running ANALYZE for nation table
+#
+ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.nation analyze status Engine-independent statistics collected
+dbt3.nation analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey');
+column_name avg_frequency
+n_name 1.0000
+n_regionkey 5.0000
+SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation;
+COUNT(*) COUNT(DISTINCT n_name) COUNT(DISTINCT n_regionkey)
+25 25 5
+#
+# Running ANALYZE for part table
+#
+ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.part analyze status Engine-independent statistics collected
+dbt3.part analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr');
+column_name avg_frequency
+p_name 1.0000
+p_mfgr 40.0000
+SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part;
+COUNT(*) COUNT(DISTINCT p_name) COUNT(DISTINCT p_mfgr)
+200 200 5
+#
+# Running ANALYZE for supplier table
+#
+ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.supplier analyze status Engine-independent statistics collected
+dbt3.supplier analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone');
+column_name avg_frequency
+s_name 1.0000
+s_phone 1.0000
+s_acctbal 1.0000
+SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone)
+FROM supplier;
+COUNT(*) COUNT(DISTINCT s_acctbal) COUNT(DISTINCT s_name) COUNT(DISTINCT s_phone)
+10 10 10 10
+#
+# Testing with very small memory for Unique, will do merging
+#
+SET @save_max_heap_table_size= @@max_heap_table_size;
+SET max_heap_table_size=16384;
+#
+# Tests below show the avg_frequency calculated during EITS collection and
+# we can compare that value by calculating distinct values for the same columns
+#
+#
+# Running ANALYZE for customer table
+#
+ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.customer analyze status Engine-independent statistics collected
+dbt3.customer analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey');
+column_name avg_frequency
+c_name 1.0000
+c_nationkey 6.0000
+c_phone 1.0000
+SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone)
+FROM customer;
+COUNT(*) COUNT(DISTINCT c_name) COUNT(DISTINCT c_nationkey) COUNT(DISTINCT c_phone)
+150 150 25 150
+#
+# Running ANALYZE for lineitem table
+#
+ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.lineitem analyze status Engine-independent statistics collected
+dbt3.lineitem analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct');
+column_name avg_frequency
+l_shipDATE 2.6500
+l_shipinstruct 1501.2500
+SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem;
+COUNT(*) COUNT(DISTINCT l_shipDATE) COUNT(DISTINCT l_shipinstruct)
+6005 2266 4
+#
+# Running ANALYZE for nation table
+#
+ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.nation analyze status Engine-independent statistics collected
+dbt3.nation analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey');
+column_name avg_frequency
+n_name 1.0000
+n_regionkey 5.0000
+SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation;
+COUNT(*) COUNT(DISTINCT n_name) COUNT(DISTINCT n_regionkey)
+25 25 5
+#
+# Running ANALYZE for part table
+#
+ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.part analyze status Engine-independent statistics collected
+dbt3.part analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr');
+column_name avg_frequency
+p_name 1.0000
+p_mfgr 40.0000
+SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part;
+COUNT(*) COUNT(DISTINCT p_name) COUNT(DISTINCT p_mfgr)
+200 200 5
+#
+# Running ANALYZE for supplier table
+#
+ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.supplier analyze status Engine-independent statistics collected
+dbt3.supplier analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone');
+column_name avg_frequency
+s_name 1.0000
+s_phone 1.0000
+s_acctbal 1.0000
+SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone)
+FROM supplier;
+COUNT(*) COUNT(DISTINCT s_acctbal) COUNT(DISTINCT s_name) COUNT(DISTINCT s_phone)
+10 10 10 10
+SET max_heap_table_size= @save_max_heap_table_size;
+SET histogram_size= @save_histogram_size;
+#
+# Test with CHAR types
+#
+CREATE VIEW v1 AS
+SELECT DISTINCT c_phone FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT COUNT(*) FROM v1;
+COUNT(*)
+100
+SELECT COUNT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+COUNT(DISTINCT c_phone)
+100
+SELECT GROUP_CONCAT(c_phone ORDER BY c_phone) FROM v1;
+GROUP_CONCAT(c_phone ORDER BY c_phone)
+10-267-172-7101,10-349-718-3044,10-473-439-3214,10-677-951-2353,10-773-203-7342,11-355-584-3112,11-751-989-4627,11-845-129-3851,12-427-271-9466,12-446-416-8471,12-514-298-3699,12-970-682-3487,13-312-472-8245,13-558-731-7204,13-652-915-8939,13-702-694-4520,13-750-942-6364,13-761-547-5974,13-806-545-9701,13-835-723-3223,14-128-190-5944,14-199-862-7209,14-273-885-6505,14-280-874-8044,15-741-346-9870,15-745-585-8219,16-357-681-2007,16-658-112-3221,17-361-978-7059,17-501-210-4726,17-697-919-8406,17-710-812-5403,18-239-400-3677,18-385-235-7162,18-774-241-1462,18-971-699-1843,19-140-352-1403,19-190-993-9281,19-216-107-2107,19-403-114-4356,19-407-425-2584,19-953-499-8833,20-180-440-8525,20-781-609-3107,20-893-536-2069,20-895-685-6920,20-908-631-4424,20-917-711-4011,20-966-284-8065,21-186-284-5998,21-200-159-5932,21-562-498-6636,21-840-210-3572,22-302-930-4756,22-306-880-7212,22-603-468-3533,22-885-845-6889,22-918-832-2411,23-244-493-2508,23-562-444-8454,23-768-687-3665,24-394-451-5404,25-147-850-4166,25-168-852-5363,25-336-529-9919,25-344-968-5422,25-430-914-2194,25-923-255-2929,25-989-741-2988,26-190-260-5375,26-314-406-7725,26-516-273-2566,26-632-309-5792,26-777-409-5654,26-992-422-8153,27-147-574-9335,27-269-357-4674,27-408-997-8430,27-411-990-2959,27-566-888-7431,27-588-919-5638,27-626-559-8599,28-159-442-5305,28-183-750-7809,28-190-982-9759,28-396-526-5053,28-639-943-7051,29-233-262-8382,29-261-996-3120,29-316-665-2897,29-797-439-6760,30-749-445-4907,31-101-672-2951,32-363-455-4837,32-817-154-4122,32-828-107-2832,32-957-234-8742,33-197-837-7094,33-464-151-3439,33-733-623-5267
+SELECT GROUP_CONCAT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+GROUP_CONCAT(DISTINCT c_phone)
+10-267-172-7101,10-349-718-3044,10-473-439-3214,10-677-951-2353,10-773-203-7342,11-355-584-3112,11-751-989-4627,11-845-129-3851,12-427-271-9466,12-446-416-8471,12-514-298-3699,12-970-682-3487,13-312-472-8245,13-558-731-7204,13-652-915-8939,13-702-694-4520,13-750-942-6364,13-761-547-5974,13-806-545-9701,13-835-723-3223,14-128-190-5944,14-199-862-7209,14-273-885-6505,14-280-874-8044,15-741-346-9870,15-745-585-8219,16-357-681-2007,16-658-112-3221,17-361-978-7059,17-501-210-4726,17-697-919-8406,17-710-812-5403,18-239-400-3677,18-385-235-7162,18-774-241-1462,18-971-699-1843,19-140-352-1403,19-190-993-9281,19-216-107-2107,19-403-114-4356,19-407-425-2584,19-953-499-8833,20-180-440-8525,20-781-609-3107,20-893-536-2069,20-895-685-6920,20-908-631-4424,20-917-711-4011,20-966-284-8065,21-186-284-5998,21-200-159-5932,21-562-498-6636,21-840-210-3572,22-302-930-4756,22-306-880-7212,22-603-468-3533,22-885-845-6889,22-918-832-2411,23-244-493-2508,23-562-444-8454,23-768-687-3665,24-394-451-5404,25-147-850-4166,25-168-852-5363,25-336-529-9919,25-344-968-5422,25-430-914-2194,25-923-255-2929,25-989-741-2988,26-190-260-5375,26-314-406-7725,26-516-273-2566,26-632-309-5792,26-777-409-5654,26-992-422-8153,27-147-574-9335,27-269-357-4674,27-408-997-8430,27-411-990-2959,27-566-888-7431,27-588-919-5638,27-626-559-8599,28-159-442-5305,28-183-750-7809,28-190-982-9759,28-396-526-5053,28-639-943-7051,29-233-262-8382,29-261-996-3120,29-316-665-2897,29-797-439-6760,30-749-445-4907,31-101-672-2951,32-363-455-4837,32-817-154-4122,32-828-107-2832,32-957-234-8742,33-197-837-7094,33-464-151-3439,33-733-623-5267
+SELECT JSON_ARRAYAGG(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+JSON_ARRAYAGG(DISTINCT c_phone)
+["10-267-172-7101","10-349-718-3044","10-473-439-3214","10-677-951-2353","10-773-203-7342","11-355-584-3112","11-751-989-4627","11-845-129-3851","12-427-271-9466","12-446-416-8471","12-514-298-3699","12-970-682-3487","13-312-472-8245","13-558-731-7204","13-652-915-8939","13-702-694-4520","13-750-942-6364","13-761-547-5974","13-806-545-9701","13-835-723-3223","14-128-190-5944","14-199-862-7209","14-273-885-6505","14-280-874-8044","15-741-346-9870","15-745-585-8219","16-357-681-2007","16-658-112-3221","17-361-978-7059","17-501-210-4726","17-697-919-8406","17-710-812-5403","18-239-400-3677","18-385-235-7162","18-774-241-1462","18-971-699-1843","19-140-352-1403","19-190-993-9281","19-216-107-2107","19-403-114-4356","19-407-425-2584","19-953-499-8833","20-180-440-8525","20-781-609-3107","20-893-536-2069","20-895-685-6920","20-908-631-4424","20-917-711-4011","20-966-284-8065","21-186-284-5998","21-200-159-5932","21-562-498-6636","21-840-210-3572","22-302-930-4756","22-306-880-7212","22-603-468-3533","22-885-845-6889","22-918-832-2411","23-244-493-2508","23-562-444-8454","23-768-687-3665","24-394-451-5404","25-147-850-4166","25-168-852-5363","25-336-529-9919","25-344-968-5422","25-430-914-2194","25-923-255-2929","25-989-741-2988","26-190-260-5375","26-314-406-7725","26-516-273-2566","26-632-309-5792","26-777-409-5654","26-992-422-8153","27-147-574-9335","27-269-357-4674","27-408-997-8430","27-411-990-2959","27-566-888-7431","27-588-919-5638","27-626-559-8599","28-159-442-5305","28-183-750-7809","28-190-982-9759","28-396-526-5053","28-639-943-7051","29-233-262-8382","29-261-996-3120","29-316-665-2897","29-797-439-6760","30-749-445-4907","31-101-672-2951","32-363-455-4837","32-817-154-4122","32-828-107-2832","32-957-234-8742","33-197-837-7094","33-464-151-3439","33-733-623-5267"]
+DROP VIEW v1;
+CREATE VIEW v1 AS
+SELECT DISTINCT c_name FROM customer, orders WHERE c_custkey=o_custkey;
+#
+# Test with VARCHAR types
+#
+SELECT GROUP_CONCAT(c_name ORDER BY c_name) FROM v1;
+GROUP_CONCAT(c_name ORDER BY c_name)
+Customer#000000001,Customer#000000002,Customer#000000004,Customer#000000005,Customer#000000007,Customer#000000008,Customer#000000010,Customer#000000011,Customer#000000013,Customer#000000014,Customer#000000016,Customer#000000017,Customer#000000019,Customer#000000020,Customer#000000022,Customer#000000023,Customer#000000025,Customer#000000026,Customer#000000028,Customer#000000029,Customer#000000031,Customer#000000032,Customer#000000034,Customer#000000035,Customer#000000037,Customer#000000038,Customer#000000040,Customer#000000041,Customer#000000043,Customer#000000044,Customer#000000046,Customer#000000047,Customer#000000049,Customer#000000050,Customer#000000052,Customer#000000053,Customer#000000055,Customer#000000056,Customer#000000058,Customer#000000059,Customer#000000061,Customer#000000062,Customer#000000064,Customer#000000065,Customer#000000067,Customer#000000068,Customer#000000070,Customer#000000071,Customer#000000073,Customer#000000074,Customer#000000076,Customer#000000077,Customer#000000079,Customer#000000080,Customer#000000082,Customer#000000083,Customer#000000085,Customer#000000086,Customer#000000088,Customer#000000089,Customer#000000091,Customer#000000092,Customer#000000094,Customer#000000095,Customer#000000097,Customer#000000098,Customer#000000100,Customer#000000101,Customer#000000103,Customer#000000104,Customer#000000106,Customer#000000107,Customer#000000109,Customer#000000110,Customer#000000112,Customer#000000113,Customer#000000115,Customer#000000116,Customer#000000118,Customer#000000119,Customer#000000121,Customer#000000122,Customer#000000124,Customer#000000125,Customer#000000127,Customer#000000128,Customer#000000130,Customer#000000131,Customer#000000133,Customer#000000134,Customer#000000136,Customer#000000137,Customer#000000139,Customer#000000140,Customer#000000142,Customer#000000143,Customer#000000145,Customer#000000146,Customer#000000148,Customer#000000149
+SELECT GROUP_CONCAT(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey;
+GROUP_CONCAT(DISTINCT c_name)
+Customer#000000001,Customer#000000002,Customer#000000004,Customer#000000005,Customer#000000007,Customer#000000008,Customer#000000010,Customer#000000011,Customer#000000013,Customer#000000014,Customer#000000016,Customer#000000017,Customer#000000019,Customer#000000020,Customer#000000022,Customer#000000023,Customer#000000025,Customer#000000026,Customer#000000028,Customer#000000029,Customer#000000031,Customer#000000032,Customer#000000034,Customer#000000035,Customer#000000037,Customer#000000038,Customer#000000040,Customer#000000041,Customer#000000043,Customer#000000044,Customer#000000046,Customer#000000047,Customer#000000049,Customer#000000050,Customer#000000052,Customer#000000053,Customer#000000055,Customer#000000056,Customer#000000058,Customer#000000059,Customer#000000061,Customer#000000062,Customer#000000064,Customer#000000065,Customer#000000067,Customer#000000068,Customer#000000070,Customer#000000071,Customer#000000073,Customer#000000074,Customer#000000076,Customer#000000077,Customer#000000079,Customer#000000080,Customer#000000082,Customer#000000083,Customer#000000085,Customer#000000086,Customer#000000088,Customer#000000089,Customer#000000091,Customer#000000092,Customer#000000094,Customer#000000095,Customer#000000097,Customer#000000098,Customer#000000100,Customer#000000101,Customer#000000103,Customer#000000104,Customer#000000106,Customer#000000107,Customer#000000109,Customer#000000110,Customer#000000112,Customer#000000113,Customer#000000115,Customer#000000116,Customer#000000118,Customer#000000119,Customer#000000121,Customer#000000122,Customer#000000124,Customer#000000125,Customer#000000127,Customer#000000128,Customer#000000130,Customer#000000131,Customer#000000133,Customer#000000134,Customer#000000136,Customer#000000137,Customer#000000139,Customer#000000140,Customer#000000142,Customer#000000143,Customer#000000145,Customer#000000146,Customer#000000148,Customer#000000149
+SELECT JSON_ARRAYAGG(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey;
+JSON_ARRAYAGG(DISTINCT c_name)
+["Customer#000000001","Customer#000000002","Customer#000000004","Customer#000000005","Customer#000000007","Customer#000000008","Customer#000000010","Customer#000000011","Customer#000000013","Customer#000000014","Customer#000000016","Customer#000000017","Customer#000000019","Customer#000000020","Customer#000000022","Customer#000000023","Customer#000000025","Customer#000000026","Customer#000000028","Customer#000000029","Customer#000000031","Customer#000000032","Customer#000000034","Customer#000000035","Customer#000000037","Customer#000000038","Customer#000000040","Customer#000000041","Customer#000000043","Customer#000000044","Customer#000000046","Customer#000000047","Customer#000000049","Customer#000000050","Customer#000000052","Customer#000000053","Customer#000000055","Customer#000000056","Customer#000000058","Customer#000000059","Customer#000000061","Customer#000000062","Customer#000000064","Customer#000000065","Customer#000000067","Customer#000000068","Customer#000000070","Customer#000000071","Customer#000000073","Customer#000000074","Customer#000000076","Customer#000000077","Customer#000000079","Customer#000000080","Customer#000000082","Customer#000000083","Customer#000000085","Customer#000000086","Customer#000000088","Customer#000000089","Customer#000000091","Customer#000000092","Customer#000000094","Customer#000000095","Customer#000000097","Customer#000000098","Customer#000000100","Customer#000000101","Customer#000000103","Customer#000000104","Customer#000000106","Customer#000000107","Customer#000000109","Customer#000000110","Customer#000000112","Customer#000000113","Customer#000000115","Customer#000000116","Customer#000000118","Customer#000000119","Customer#000000121","Customer#000000122","Customer#000000124","Customer#000000125","Customer#000000127","Customer#000000128","Customer#000000130","Customer#000000131","Customer#000000133","Customer#000000134","Customer#000000136","Customer#000000137","Customer#000000139","Customer#000000140","Customer#000000142","Customer#000000143","Customer#000000145","Customer#000000146","Customer#000000148","Customer#000000149"]
+DROP VIEW v1;
+#
+# Test with FIXED TYPES
+#
+CREATE VIEW v1 AS
+SELECT DISTINCT c_nationkey FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT GROUP_CONCAT(c_nationkey ORDER BY c_nationkey) FROM v1;
+GROUP_CONCAT(c_nationkey ORDER BY c_nationkey)
+0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
+SELECT GROUP_CONCAT(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey;
+GROUP_CONCAT(DISTINCT c_nationkey)
+0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
+SELECT JSON_ARRAYAGG(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey;
+JSON_ARRAYAGG(DISTINCT c_nationkey)
+[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
+DROP VIEW v1;
+DROP DATABASE dbt3;
+USE test;
+#
+# Testing for merge_buffers when Unique is packed
+#
+CREATE TABLE t1 (a VARCHAR(1500));
+INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100;
+SET @save_max_heap_table_size= @@max_heap_table_size;
+SET max_heap_table_size=16384;
+ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (a) INDEXES();
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+SELECT column_name, avg_frequency FROM mysql.column_stats where table_name='t1' AND column_name='a';
+column_name avg_frequency
+a 2.0000
+SELECT COUNT(*), COUNT(DISTINCT a) FROM t1;
+COUNT(*) COUNT(DISTINCT a)
+2400 1200
+SET max_heap_table_size= @save_max_heap_table_size;
+DROP TABLE t1;
+#
+# Simple test for DISTINCT with VARCHAR, CHAR and BLOBS
+#
+#
+# Test with VARCHAR types
+#
+CREATE TABLE t1 (a VARCHAR(2056), b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+[null,"a","b","c"]
+DROP TABLE t1;
+CREATE TABLE t1 (a VARCHAR(2056) NOT NULL, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+["a","b","c"]
+DROP TABLE t1;
+#
+# Test with CHAR types
+#
+CREATE TABLE t1 (a CHAR(255), b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+[null,"a","b","c"]
+DROP TABLE t1;
+CREATE TABLE t1 (a CHAR(255) NOT NULL, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+["a","b","c"]
+#
+# Test with INT types (fixed size datatypes)
+#
+SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
+GROUP_CONCAT(DISTINCT b)
+1,2,3,4
+SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
+JSON_ARRAYAGG(DISTINCT b)
+[1,2,3,4]
+DROP TABLE t1;
+#
+# Test with BLOB types
+#
+CREATE TABLE t1 (a BLOB, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+[null,"a","b","c"]
+DROP TABLE t1;
diff --git a/mysql-test/main/unique_packed.test b/mysql-test/main/unique_packed.test
new file mode 100644
index 00000000000..984aac0a4cd
--- /dev/null
+++ b/mysql-test/main/unique_packed.test
@@ -0,0 +1,251 @@
+--source include/have_sequence.inc
+create database dbt3;
+use dbt3;
+
+--disable_result_log
+--disable_query_log
+--source include/dbt3_s001.inc
+--enable_result_log
+--enable_query_log
+
+SET @save_tmp_table_size= @@tmp_table_size;
+
+CREATE VIEW v1 AS SELECT DISTINCT p_mfgr, p_brand FROM part;
+
+--echo #
+--echo # The two queries should return the same result
+--echo #
+SELECT GROUP_CONCAT(p_brand order by p_brand) FROM v1 GROUP BY p_mfgr;
+SELECT GROUP_CONCAT(DISTINCT p_brand) FROM part GROUP BY p_mfgr;
+
+--echo #
+--echo # The two queries should return the same result
+--echo #
+SELECT COUNT(*) FROM v1 GROUP BY p_mfgr;
+SELECT COUNT(DISTINCT p_brand) FROM part GROUP BY p_mfgr;
+
+DROP VIEW v1;
+
+CREATE VIEW v1 AS
+SELECT DISTINCT p_mfgr, p_name FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey;
+
+SELECT p_mfgr, COUNT(*) FROM v1 GROUP BY p_mfgr;
+
+--echo #
+--echo # The two queries should return the same result as the above one
+--echo #
+
+LET $query= SELECT p_mfgr, COUNT(DISTINCT p_name) FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey
+GROUP BY p.p_mfgr;
+
+eval $query;
+
+SET tmp_table_size=1024;
+--echo #
+--echo # merge_walk() get called, that is merging of sorted chunks of file happens
+--echo #
+eval $query;
+SET tmp_table_size = @save_tmp_table_size;
+DROP VIEW v1;
+
+--echo # The result should be 200
+SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p;
+--echo #
+--echo # merge_many_buffers get called, followed by merge_walk()
+--echo #
+SET tmp_table_size=1024;
+SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p;
+SET tmp_table_size = @save_tmp_table_size;
+
+CREATE VIEW v1 AS SELECT DISTINCT n_name, c_name FROM customer, orders, nation
+WHERE o_custkey= c_custkey and c_nationkey= n_nationkey;
+
+--echo #
+--echo # The two queries should return the same result
+--echo #
+SELECT n_name, GROUP_CONCAT(c_name), COUNT(c_name) FROM v1 GROUP BY n_name;
+
+SELECT n_name, GROUP_CONCAT(DISTINCT c_name), COUNT(DISTINCT c_name)
+FROM customer, orders, nation
+WHERE o_custkey= c_custkey and c_nationkey= n_nationkey GROUP BY n_name;
+
+DROP VIEW v1;
+
+--echo #
+--echo # Tests for packed unique during EITS collection
+--echo #
+
+SET @save_histogram_size= @@histogram_size;
+SET histogram_size=0;
+--echo #
+--echo # Testing when histograms are not created, this tests the function count_distinct_single_occurence_walk
+--echo # passed as a callback function to walk
+--echo #
+
+--source include/unique_packed_eits.inc
+
+--echo #
+--echo # Testing when histograms are created,this tests the function histogram_build_walk
+--echo # passed as a callback function to walk
+--echo #
+
+SET histogram_size=255;
+--source include/unique_packed_eits.inc
+
+
+--echo #
+--echo # Testing with very small memory for Unique, will do merging
+--echo #
+
+SET @save_max_heap_table_size= @@max_heap_table_size;
+SET max_heap_table_size=16384;
+--source include/unique_packed_eits.inc
+SET max_heap_table_size= @save_max_heap_table_size;
+
+SET histogram_size= @save_histogram_size;
+
+--echo #
+--echo # Test with CHAR types
+--echo #
+
+CREATE VIEW v1 AS
+SELECT DISTINCT c_phone FROM customer, orders WHERE c_custkey=o_custkey;
+
+SELECT COUNT(*) FROM v1;
+SELECT COUNT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+
+SELECT GROUP_CONCAT(c_phone ORDER BY c_phone) FROM v1;
+SELECT GROUP_CONCAT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT JSON_ARRAYAGG(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+DROP VIEW v1;
+
+CREATE VIEW v1 AS
+SELECT DISTINCT c_name FROM customer, orders WHERE c_custkey=o_custkey;
+
+--echo #
+--echo # Test with VARCHAR types
+--echo #
+
+SELECT GROUP_CONCAT(c_name ORDER BY c_name) FROM v1;
+SELECT GROUP_CONCAT(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT JSON_ARRAYAGG(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey;
+DROP VIEW v1;
+
+--echo #
+--echo # Test with FIXED TYPES
+--echo #
+
+CREATE VIEW v1 AS
+SELECT DISTINCT c_nationkey FROM customer, orders WHERE c_custkey=o_custkey;
+
+SELECT GROUP_CONCAT(c_nationkey ORDER BY c_nationkey) FROM v1;
+SELECT GROUP_CONCAT(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT JSON_ARRAYAGG(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey;
+
+DROP VIEW v1;
+
+DROP DATABASE dbt3;
+
+USE test;
+
+--echo #
+--echo # Testing for merge_buffers when Unique is packed
+--echo #
+
+CREATE TABLE t1 (a VARCHAR(1500));
+INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100;
+
+SET @save_max_heap_table_size= @@max_heap_table_size;
+SET max_heap_table_size=16384;
+ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (a) INDEXES();
+
+SELECT column_name, avg_frequency FROM mysql.column_stats where table_name='t1' AND column_name='a';
+SELECT COUNT(*), COUNT(DISTINCT a) FROM t1;
+SET max_heap_table_size= @save_max_heap_table_size;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Simple test for DISTINCT with VARCHAR, CHAR and BLOBS
+--echo #
+
+--echo #
+--echo # Test with VARCHAR types
+--echo #
+
+CREATE TABLE t1 (a VARCHAR(2056), b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a VARCHAR(2056) NOT NULL, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # Test with CHAR types
+--echo #
+
+CREATE TABLE t1 (a CHAR(255), b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a CHAR(255) NOT NULL, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+
+
+--echo #
+--echo # Test with INT types (fixed size datatypes)
+--echo #
+
+SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Test with BLOB types
+--echo #
+
+CREATE TABLE t1 (a BLOB, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+DROP TABLE t1;
diff --git a/sql/field.h b/sql/field.h
index e4c5ffcc0de..693d0b63ded 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1109,6 +1109,11 @@ public:
*/
virtual uint32 data_length() { return pack_length(); }
virtual uint32 sort_length() const { return pack_length(); }
+ /*
+ returns the sort_length for a field without the suffix length bytes
+ for fields with binary charset.
+ */
+ virtual uint32 sort_length_without_suffix() const { return pack_length(); }
/*
sort_suffix_length() return the length bytes needed to store the length
@@ -1316,9 +1321,21 @@ public:
}
return update_fl;
}
+
+ /*
+ @brief
+ Make a packed value for a field
+ @param
+ to buffer to store the value
+ */
+ virtual uint make_packed_record_field(uchar *to)
+ {
+ uchar* end= pack(to, ptr);
+ return static_cast<uint>(end - to);
+ }
virtual void store_field_value(uchar *val, uint len)
{
- memcpy(ptr, val, len);
+ memcpy(ptr, val, len);
}
virtual uint decimals() const { return 0; }
virtual Information_schema_numeric_attributes
@@ -4122,6 +4139,10 @@ public:
{
return (uint32) field_length + sort_suffix_length();
}
+ uint32 sort_length_without_suffix() const override
+ {
+ return (uint32) field_length;
+ }
virtual uint32 sort_suffix_length() const override
{
return (field_charset() == &my_charset_bin ? length_bytes : 0);
@@ -4463,6 +4484,10 @@ public:
{ return (uint32) (packlength); }
uint row_pack_length() const override { return pack_length_no_ptr(); }
uint32 sort_length() const override;
+ uint32 sort_length_without_suffix() const override
+ {
+ return field_length;
+ }
uint32 sort_suffix_length() const override;
uint32 value_length() override { return get_length(); }
virtual uint32 max_data_length() const override
diff --git a/sql/filesort.cc b/sql/filesort.cc
index ac43c96b0e0..e0389bb0343 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -35,6 +35,7 @@
#include "filesort_utils.h"
#include "sql_select.h"
#include "debug_sync.h"
+#include "uniques.h"
/* functions defined in this file */
@@ -1694,6 +1695,8 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek,
uint size_of_sort_length= param->using_packed_sortkeys() ?
Sort_keys::size_of_length_field : 0;
+ uint size_of_dupl_count= param->min_dupl_count ?
+ sizeof(element_count) : 0;
for (; ix < count; ++ix)
{
@@ -1709,14 +1712,16 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek,
buffpek->buffer_end())
break; // Incomplete record.
- uchar *plen= record + sort_length;
+ uchar *plen= record + sort_length + size_of_dupl_count;
+
uint res_length= param->get_result_length(plen);
if (plen + res_length > buffpek->buffer_end())
break; // Incomplete record.
- DBUG_ASSERT(res_length > 0);
+ DBUG_ASSERT(!param->sort_keys || res_length > 0);
DBUG_ASSERT(sort_length + res_length <= param->rec_length);
record+= sort_length;
record+= res_length;
+ record+= size_of_dupl_count;
}
DBUG_ASSERT(ix > 0);
count= ix;
@@ -1812,12 +1817,12 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
rec_length= param->rec_length;
res_length= param->res_length;
sort_length= param->sort_length;
- uint dupl_count_ofs= rec_length-sizeof(element_count);
uint min_dupl_count= param->min_dupl_count;
+ uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0;
+
bool check_dupl_count= flag && min_dupl_count;
offset= (rec_length-
(flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length);
- uint wr_len= flag ? res_length : rec_length;
uint wr_offset= flag ? offset : 0;
const bool using_packed_sortkeys= param->using_packed_sortkeys();
@@ -1867,10 +1872,16 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
Store it also in 'to_file'.
*/
buffpek= (Merge_chunk*) queue_top(&queue);
+ rec_length= param->get_record_length_for_unique(buffpek->current_key(),
+ size_of_dupl_count);
+
+ DBUG_ASSERT(rec_length <= param->sort_length);
+
memcpy(unique_buff, buffpek->current_key(), rec_length);
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
if (min_dupl_count)
- memcpy(&dupl_count, unique_buff+dupl_count_ofs,
- sizeof(dupl_count));
+ memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count));
+
buffpek->advance_current_key(rec_length);
buffpek->decrement_mem_count();
if (buffpek->mem_count() == 0)
@@ -1900,28 +1911,31 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
src= buffpek->current_key();
if (cmp) // Remove duplicates
{
- uchar *current_key= buffpek->current_key();
- if (!(*cmp)(first_cmp_arg, &unique_buff, &current_key))
+ if (!(*cmp)(first_cmp_arg, &unique_buff, &src))
{
if (min_dupl_count)
{
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
element_count cnt;
memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt));
dupl_count+= cnt;
}
goto skip_duplicate;
}
+
if (min_dupl_count)
{
- memcpy(unique_buff+dupl_count_ofs, &dupl_count,
- sizeof(dupl_count));
+ DBUG_ASSERT(rec_length <= param->sort_length);
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count));
}
+ res_length= rec_length - size_of_dupl_count;
src= unique_buff;
}
+ else
+ param->get_rec_and_res_len(src, &rec_length, &res_length);
{
- param->get_rec_and_res_len(buffpek->current_key(),
- &rec_length, &res_length);
const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
/*
@@ -1943,10 +1957,15 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
}
if (cmp)
{
+ rec_length= param->get_record_length_for_unique(buffpek->current_key(),
+ size_of_dupl_count);
+ DBUG_ASSERT(rec_length <= param->sort_length);
memcpy(unique_buff, buffpek->current_key(), rec_length);
if (min_dupl_count)
- memcpy(&dupl_count, unique_buff+dupl_count_ofs,
- sizeof(dupl_count));
+ {
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count));
+ }
}
if (!--max_rows)
{
@@ -1989,6 +2008,7 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
{
if (min_dupl_count)
{
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
element_count cnt;
memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt));
dupl_count+= cnt;
@@ -1998,13 +2018,22 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
}
if (min_dupl_count)
- memcpy(unique_buff+dupl_count_ofs, &dupl_count,
- sizeof(dupl_count));
+ {
+ DBUG_ASSERT(rec_length <= param->sort_length);
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count));
+ }
if (!check_dupl_count || dupl_count >= min_dupl_count)
{
src= unique_buff;
- if (my_b_write(to_file, src+wr_offset, wr_len))
+ res_length = rec_length - size_of_dupl_count;
+ const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
+ if (my_b_write(to_file,
+ src + (offset_for_packing ?
+ rec_length - res_length : // sort length
+ wr_offset),
+ bytes_to_write))
goto err; /* purecov: inspected */
if (!--max_rows)
goto end;
@@ -2022,17 +2051,28 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
for (uint ix= 0; ix < buffpek->mem_count(); ++ix)
{
uchar *src= buffpek->current_key();
- param->get_rec_and_res_len(src,
- &rec_length, &res_length);
- const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
- if (check_dupl_count)
+ if (cmp)
{
- memcpy((uchar *) &dupl_count,
- buffpek->current_key() + offset + dupl_count_ofs,
- sizeof(dupl_count));
- if (dupl_count < min_dupl_count)
- continue;
+ rec_length= param->get_record_length_for_unique(src,
+ size_of_dupl_count);
+ res_length= rec_length - size_of_dupl_count;
+ if (check_dupl_count)
+ {
+ /*
+ TODO varun: this looks incorrect to me
+ */
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(&dupl_count, src + dupl_count_ofs, sizeof(dupl_count));
+
+ if (dupl_count < min_dupl_count)
+ continue;
+ }
}
+ else
+ param->get_rec_and_res_len(src, &rec_length, &res_length);
+
+ const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
+
if(my_b_write(to_file,
src + (offset_for_packing ?
rec_length - res_length : // sort length
@@ -2532,6 +2572,23 @@ void Sort_param::try_to_pack_sortkeys()
rec_length= sort_length + addon_length;
}
+/*
+ @brief
+ Return the length of the record in the Unique tree
+
+ @param
+ to key value
+ size_of_dupl_count if min_dupl_count > 0, then the record length
+ needs size_of_dupl_count to store the counter
+*/
+uint32 Sort_param::get_record_length_for_unique(uchar *to,
+ uint size_of_dupl_count)
+{
+ return using_packed_sortkeys() ?
+ Unique::read_packed_length(to) + size_of_dupl_count :
+ rec_length;
+}
+
uint
Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item,
@@ -2742,6 +2799,45 @@ bool SORT_FIELD_ATTR::check_if_packing_possible(THD *thd) const
/*
+ @brief
+ Setup the SORT_FIELD structure
+
+ @param
+ fld field structure
+ exclude_nulls TRUE if nulls are not to be considered
+ with_suffix TRUE if length bytes needed to store the length
+ for binary charset
+
+ @note
+ Currently used only by Unique object
+ TODD varun: we can refactor the code for filesort to use this function.
+
+*/
+void SORT_FIELD::setup(Field *fld, bool exclude_nulls, bool with_suffix)
+{
+ field= fld;
+ item= NULL;
+ /*
+ For unique needs to be set to FALSE always
+ but we can even pass the reverse as an argument to the function
+ */
+ reverse= false;
+ original_length= length= (with_suffix ?
+ field->sort_length() :
+ field->sort_length_without_suffix());
+
+ cs= field->sort_charset();
+ suffix_length= with_suffix ? field->sort_suffix_length() : 0;
+ type= field->is_packable() ?
+ SORT_FIELD_ATTR::VARIABLE_SIZE :
+ SORT_FIELD_ATTR::FIXED_SIZE;
+ maybe_null= exclude_nulls ? false : field->maybe_null();
+ length_bytes= is_variable_sized() ?
+ number_storage_requirement(length) : 0;
+}
+
+
+/*
Compare function used for packing sort keys
*/
@@ -2871,16 +2967,45 @@ int compare_packed_sort_keys(void *sort_param,
unsigned char **a_ptr, unsigned char **b_ptr)
{
int retval= 0;
- size_t a_len, b_len;
Sort_param *param= (Sort_param*)sort_param;
Sort_keys *sort_keys= param->sort_keys;
uchar *a= *a_ptr;
uchar *b= *b_ptr;
- a+= Sort_keys::size_of_length_field;
- b+= Sort_keys::size_of_length_field;
- for (SORT_FIELD *sort_field= sort_keys->begin();
- sort_field != sort_keys->end(); sort_field++)
+ if ((retval= sort_keys->compare_keys(a + Sort_keys::size_of_length_field,
+ b + Sort_keys::size_of_length_field)))
+ return retval;
+
+ /*
+ this comparison is done for the case when the sort keys is appended with
+ the ROW_ID pointer. For such cases we don't have addon fields
+ so we can make a memcmp check over both the sort keys
+ */
+ if (!param->using_addon_fields())
+ {
+ a+= Sort_keys::read_sortkey_length(a);
+ b+= Sort_keys::read_sortkey_length(b);
+ retval= memcmp(a, b, param->res_length);
+ }
+ return retval;
+}
+
+
+/*
+ @brief
+ Compare two sort keys
+
+ @retval
+ >0 key a greater than b
+ =0 key a equal to b
+ <0 key a less than b
+*/
+
+int Sort_keys::compare_keys(uchar *a, uchar *b)
+{
+ int retval= 0;
+ size_t a_len, b_len;
+ for (SORT_FIELD *sort_field= begin(); sort_field != end(); sort_field++)
{
retval= sort_field->is_variable_sized() ?
sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) :
@@ -2891,15 +3016,7 @@ int compare_packed_sort_keys(void *sort_param,
a+= a_len;
b+= b_len;
-
}
- /*
- this comparison is done for the case when the sort keys is appended with
- the ROW_ID pointer. For such cases we don't have addon fields
- so we can make a memcmp check over both the sort keys
- */
- if (!param->using_addon_fields())
- retval= memcmp(a, b, param->res_length);
return retval;
}
diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc
index b9a84775311..48075106206 100644
--- a/sql/item_jsonfunc.cc
+++ b/sql/item_jsonfunc.cc
@@ -1452,7 +1452,7 @@ append_null:
}
-static int append_json_value_from_field(String *str,
+static bool append_json_value_from_field(String *str,
Item *i, Field *f, const uchar *key, size_t offset, String *tmp_val)
{
if (i->type_handler()->is_bool_type())
@@ -1498,6 +1498,73 @@ append_null:
}
+/*
+ @brief
+ Append the value of a field in JSON format
+
+ @param
+ str buffer to write the value
+ item JSON_ARRAYAGG item
+ field field whose value needs to be appended
+ tmp_val temp buffer
+
+ @note
+ Use this function when the field has the value in its ptr.
+ This is currently used when packing is done for JSON_ARRAYAGG function.
+ In the case of packing, we have to unpack the value to Field::ptr.
+
+ @retval
+ FALSE value appended in JSON format
+ TRUE error
+*/
+
+static bool append_json_value_from_field(String *str, Item *item, Field *field,
+ String *tmp_val)
+{
+ if (item->type_handler()->is_bool_type())
+ {
+ if (field->is_null())
+ goto append_null;
+
+ longlong v_int= field->val_int();
+ const char *t_f;
+ int t_f_len;
+
+ if (v_int)
+ {
+ t_f= "true";
+ t_f_len= 4;
+ }
+ else
+ {
+ t_f= "false";
+ t_f_len= 5;
+ }
+
+ return str->append(t_f, t_f_len);
+ }
+ {
+ if (field->is_null())
+ goto append_null;
+ String *sv= field->val_str(tmp_val);
+
+ if (item->is_json_type())
+ return str->append(sv->ptr(), sv->length());
+
+ if (item->result_type() == STRING_RESULT)
+ {
+ return str->append("\"", 1) ||
+ st_append_escaped(str, sv) ||
+ str->append("\"", 1);
+ }
+ return st_append_escaped(str, sv);
+ }
+
+append_null:
+ return str->append("null", 4);
+}
+
+
static int append_json_keyname(String *str, Item *item, String *tmp_val)
{
String *sv= item->val_str(tmp_val);
@@ -3689,6 +3756,19 @@ String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f,
}
+String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f,
+ String *tmp)
+{
+ m_tmp_json.length(0);
+
+ if (append_json_value_from_field(&m_tmp_json, i, f, tmp))
+ return NULL;
+
+ return &m_tmp_json;
+
+}
+
+
void Item_func_json_arrayagg::cut_max_length(String *result,
uint old_length, uint max_length) const
{
diff --git a/sql/item_jsonfunc.h b/sql/item_jsonfunc.h
index 0408b3ad72f..16f700724bb 100644
--- a/sql/item_jsonfunc.h
+++ b/sql/item_jsonfunc.h
@@ -547,6 +547,7 @@ protected:
String *get_str_from_item(Item *i, String *tmp);
String *get_str_from_field(Item *i, Field *f, String *tmp,
const uchar *key, size_t offset);
+ String *get_str_from_field(Item *i, Field *f, String *tmp);
void cut_max_length(String *result,
uint old_length, uint max_length) const;
public:
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index e79344507b2..96b17e74019 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -701,6 +701,32 @@ int Aggregator_distinct::composite_key_cmp(void* arg, uchar* key1, uchar* key2)
}
+/**
+ Correctly compare composite keys.
+
+ Used by the Unique class to compare packed keys. Will do correct comparisons
+ for composite keys with various field types.
+
+ @param arg Pointer to the relevant Aggregator_distinct instance
+ @param key1 left key image
+ @param key2 right key image
+
+ @return comparison result
+ @retval <0 if key1 < key2
+ @retval =0 if key1 = key2
+ @retval >0 if key1 > key2
+*/
+
+int
+Aggregator_distinct::composite_packed_key_cmp(void* arg,
+ uchar* key1, uchar* key2)
+{
+ Aggregator_distinct *aggr= (Aggregator_distinct *) arg;
+ DBUG_ASSERT(aggr->tree);
+ return aggr->tree->compare_packed_keys(key1, key2);
+}
+
+
/***************************************************************************/
C_MODE_START
@@ -766,13 +792,19 @@ bool Aggregator_distinct::setup(THD *thd)
return TRUE;
/* Create a table with an unique key over all parameters */
+ uint non_const_items= 0;
for (uint i=0; i < item_sum->get_arg_count() ; i++)
{
Item *item=item_sum->get_arg(i);
if (list.push_back(item, thd->mem_root))
return TRUE; // End of memory
- if (item->const_item() && item->is_null())
- always_null= true;
+ if (item->const_item())
+ {
+ if (item->is_null())
+ always_null= true;
+ }
+ else
+ non_const_items++;
}
if (always_null)
return FALSE;
@@ -861,16 +893,27 @@ bool Aggregator_distinct::setup(THD *thd)
}
}
}
+
+ bool allow_packing= item_sum->is_packing_allowed(table, &tree_key_length);
+
+ if (allow_packing)
+ {
+ compare_key= (qsort_cmp2) composite_packed_key_cmp;
+ cmp_arg= (void*)this;
+ }
+
DBUG_ASSERT(tree == 0);
tree= new Unique(compare_key, cmp_arg, tree_key_length,
- item_sum->ram_limitation(thd));
+ item_sum->ram_limitation(thd), 0, allow_packing);
/*
The only time tree_key_length could be 0 is if someone does
count(distinct) on a char(0) field - stupid thing to do,
but this has to be handled - otherwise someone can crash
the server with a DoS attack
*/
- if (! tree)
+ if (!tree || tree->setup(thd, item_sum,
+ non_const_items, item_sum->get_arg_count(),
+ TRUE))
return TRUE;
}
return FALSE;
@@ -987,9 +1030,23 @@ bool Aggregator_distinct::add()
if (copy_funcs(tmp_table_param->items_to_copy, table->in_use))
return TRUE;
- for (Field **field=table->field ; *field ; field++)
- if ((*field)->is_real_null(0))
- return 0; // Don't count NULL
+ uchar *to= NULL, *orig_to= table->record[0] + table->s->null_bytes;
+ uint packed_length= 0;
+
+ if (is_distinct_packed())
+ {
+ orig_to= to= tree->get_packed_rec_ptr();
+ if ((to= make_packed_record(to)) == NULL)
+ return false;
+ packed_length= static_cast<uint>(to - orig_to);
+ Unique::store_packed_length(orig_to, packed_length);
+ }
+ else
+ {
+ for (Field **field=table->field ; *field ; field++)
+ if ((*field)->is_real_null(0))
+ return 0; // Don't count NULL
+ }
if (tree)
{
@@ -999,7 +1056,8 @@ bool Aggregator_distinct::add()
bloat the tree without providing any valuable info. Besides,
key_length used to initialize the tree didn't include space for them.
*/
- return tree->unique_add(table->record[0] + table->s->null_bytes);
+ return tree->unique_add(orig_to, tree->is_packed() ?
+ packed_length : tree->get_size());
}
if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))) &&
table->file->is_fatal_error(error, HA_CHECK_DUP))
@@ -1017,7 +1075,7 @@ bool Aggregator_distinct::add()
'0' values are also stored in the tree. This doesn't matter
for SUM(DISTINCT), but is important for AVG(DISTINCT)
*/
- return tree->unique_add(table->field[0]->ptr);
+ return tree->unique_add(table->field[0]->ptr, tree->get_size());
}
}
@@ -1778,6 +1836,48 @@ bool Aggregator_distinct::unique_walk_function_for_count(void *element)
}
+/*
+ @brief
+ Checks whether the unique tree is packed or not
+
+ @retval
+ TRUE tree stored packed values
+ FALSE otherwise
+*/
+
+bool Aggregator_distinct::is_distinct_packed()
+{
+ return tree && tree->is_packed();
+
+}
+
+
+/*
+ @brief
+ Make a record with packed values
+
+ @param
+ to buffer to store the packed value
+*/
+uchar *Aggregator_distinct::make_packed_record(uchar *to)
+{
+ to+= Unique::size_of_length_field;
+
+ for (Field **field=table->field ; *field ; field++)
+ {
+ Field *fld= (*field);
+ if (fld->is_real_null(0))
+ return NULL; // Don't count NULL
+
+ /*
+ Storing the packed values for the non-const fields of the record
+ */
+ to+= fld->make_packed_record_field(to);
+ }
+ return to;
+}
+
+
Aggregator_distinct::~Aggregator_distinct()
{
if (tree)
@@ -3612,6 +3712,33 @@ int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
/**
+ Compares the packed values for fields in expr list of GROUP_CONCAT.
+ @note
+
+ GROUP_CONCAT([DISTINCT] expr [,expr ...]
+ [ORDER BY {unsigned_integer | col_name | expr}
+ [ASC | DESC] [,col_name ...]]
+ [SEPARATOR str_val])
+
+ @return
+ @retval -1 : key1 < key2
+ @retval 0 : key1 = key2
+ @retval 1 : key1 > key2
+*/
+int group_concat_packed_key_cmp_with_distinct(void *arg,
+ const void *a_ptr,
+ const void *b_ptr)
+{
+ Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
+
+ DBUG_ASSERT(item_func->unique_filter);
+ uchar *a= (uchar*)a_ptr;
+ uchar *b= (uchar*)b_ptr;
+ return item_func->unique_filter->compare_packed_keys(a, b);
+}
+
+
+/**
function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... )
*/
@@ -3778,10 +3905,21 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
String tmp((char *)table->record[1], table->s->reclength,
default_charset_info);
String tmp2;
- uchar *key= (uchar *) key_arg;
+ const uchar *key= (const uchar *) key_arg;
+ const uchar *key_end= NULL;
String *result= &item->result;
Item **arg= item->args, **arg_end= item->args + item->arg_count_field;
uint old_length= result->length();
+ SORT_FIELD *pos;
+
+ const bool packed= item->is_distinct_packed();
+
+ if (packed)
+ {
+ pos= item->unique_filter->get_sortorder();
+ key_end= key + item->unique_filter->get_full_size();
+ key+= Unique::size_of_length_field;
+ }
ulonglong *offset_limit= &item->copy_offset_limit;
ulonglong *row_limit = &item->copy_row_limit;
@@ -3822,11 +3960,43 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
Field *field= (*arg)->get_tmp_table_field();
if (field)
{
- uint offset= (field->offset(field->table->record[0]) -
- table->s->null_bytes);
- DBUG_ASSERT(offset < table->s->reclength);
- res= item->get_str_from_field(*arg, field, &tmp, key,
- offset + item->get_null_bytes());
+ if (packed)
+ {
+ DBUG_ASSERT(pos->field == field);
+ if (!item->skip_nulls() && field->maybe_null())
+ {
+ if (*key == 0)
+ {
+ // Case with NULL value
+ field->set_null();
+ res= item->get_str_from_field(*arg, field, &tmp);
+ key++;
+ }
+ else
+ {
+ // Case with NOT NULL value
+ field->set_notnull();
+ const uchar *end= field->unpack(field->ptr, key+1, key_end, 0);
+ res= item->get_str_from_field(*arg, field, &tmp);
+ key= end;
+ }
+ }
+ else
+ {
+ const uchar *end= field->unpack(field->ptr, key, key_end, 0);
+ res= item->get_str_from_field(*arg, field, &tmp);
+ key= end;
+ }
+ pos++;
+ }
+ else
+ {
+ uint offset= (field->offset(field->table->record[0]) -
+ table->s->null_bytes);
+ DBUG_ASSERT(offset < table->s->reclength);
+ res= item->get_str_from_field(*arg, field, &tmp, key,
+ offset + item->get_null_bytes());
+ }
}
else
res= item->get_str_from_item(*arg, &tmp);
@@ -4137,6 +4307,14 @@ bool Item_func_group_concat::add(bool exclude_nulls)
size_t row_str_len= 0;
StringBuffer<MAX_FIELD_WIDTH> buf;
String *res;
+ uchar *to= NULL, *orig_to= get_record_pointer();
+
+ if (is_distinct_packed())
+ {
+ orig_to= to= unique_filter->get_packed_rec_ptr();
+ to+= Unique::size_of_length_field;
+ }
+
for (uint i= 0; i < arg_count_field; i++)
{
Item *show_item= args[i];
@@ -4151,6 +4329,24 @@ bool Item_func_group_concat::add(bool exclude_nulls)
return 0; // Skip row if it contains null
if (tree && (res= field->val_str(&buf)))
row_str_len+= res->length();
+
+ /*
+ Storing the packed values for the non-const fields of the record
+ */
+ if (is_distinct_packed())
+ {
+ if (!exclude_nulls && field->maybe_null())
+ {
+ if (field->is_null_in_record((const uchar*) table->record[0]))
+ {
+ *to++=0;
+ continue;
+ }
+ *to++=1;
+ }
+ uchar* end= field->pack(to, field->ptr);
+ to+= static_cast<uint>(end - to);
+ }
}
else
{
@@ -4167,9 +4363,17 @@ bool Item_func_group_concat::add(bool exclude_nulls)
if (distinct)
{
+ DBUG_ASSERT(unique_filter);
+ uint packed_length= unique_filter->get_size();
+
+ if (unique_filter->is_packed())
+ {
+ packed_length= static_cast<uint>(to - orig_to);
+ Unique::store_packed_length(orig_to, packed_length);
+ }
/* Filter out duplicate rows. */
uint count= unique_filter->elements_in_tree();
- unique_filter->unique_add(get_record_pointer());
+ unique_filter->unique_add(get_record_pointer(), packed_length);
if (count == unique_filter->elements_in_tree())
row_eligible= FALSE;
}
@@ -4285,16 +4489,22 @@ bool Item_func_group_concat::setup(THD *thd)
/* Push all not constant fields to the list and create a temp table */
always_null= 0;
+ uint non_const_items= 0;
for (uint i= 0; i < arg_count_field; i++)
{
Item *item= args[i];
if (list.push_back(item, thd->mem_root))
DBUG_RETURN(TRUE);
- if (item->const_item() && item->is_null() && skip_nulls())
+ if (item->const_item())
{
- always_null= 1;
- DBUG_RETURN(FALSE);
+ if (item->is_null() && skip_nulls())
+ {
+ always_null= 1;
+ DBUG_RETURN(FALSE);
+ }
}
+ else
+ non_const_items++;
}
List<Item> all_fields(list);
@@ -4377,6 +4587,7 @@ bool Item_func_group_concat::setup(THD *thd)
the row is not added to the result.
*/
uint tree_key_length= table->s->reclength - table->s->null_bytes;
+ bool allow_packing= is_packing_allowed(&tree_key_length);
if (arg_count_order)
{
@@ -4395,10 +4606,16 @@ bool Item_func_group_concat::setup(THD *thd)
}
if (distinct)
- unique_filter= new Unique(get_comparator_function_for_distinct(),
+ {
+ unique_filter= new Unique(get_comparator_function_for_distinct(allow_packing),
(void*)this,
tree_key_length + get_null_bytes(),
- ram_limitation(thd));
+ ram_limitation(thd), 0, allow_packing);
+
+ if (!unique_filter || unique_filter->setup(thd, this, non_const_items,
+ arg_count_field, skip_nulls()))
+ DBUG_RETURN(TRUE);
+ }
if ((row_limit && row_limit->cmp_type() != INT_RESULT) ||
(offset_limit && offset_limit->cmp_type() != INT_RESULT))
{
@@ -4456,11 +4673,13 @@ String* Item_func_group_concat::val_str(String* str)
Get the comparator function for DISTINT clause
*/
-qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct()
+qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct(bool packed)
{
- return skip_nulls() ?
- group_concat_key_cmp_with_distinct :
- group_concat_key_cmp_with_distinct_with_nulls;
+ return packed ?
+ group_concat_packed_key_cmp_with_distinct :
+ (skip_nulls() ?
+ group_concat_key_cmp_with_distinct :
+ group_concat_key_cmp_with_distinct_with_nulls);
}
@@ -4488,9 +4707,11 @@ qsort_cmp2 Item_func_group_concat::get_comparator_function_for_order_by()
uchar* Item_func_group_concat::get_record_pointer()
{
- return skip_nulls() ?
- table->record[0] + table->s->null_bytes :
- table->record[0];
+ return is_distinct_packed() ?
+ unique_filter->get_packed_rec_ptr() :
+ (skip_nulls() ?
+ table->record[0] + table->s->null_bytes :
+ table->record[0]);
}
@@ -4511,6 +4732,102 @@ uint Item_func_group_concat::get_null_bytes()
}
+/*
+ @brief
+ Checks whether the unique tree is packed or not
+
+ @retval
+ TRUE tree stored packed values
+ FALSE otherwise
+*/
+
+bool Item_func_group_concat::is_distinct_packed()
+{
+ return unique_filter && unique_filter->is_packed();
+}
+
+
+/*
+ @brief
+ Checks if one can store packed values in the Unique tree
+
+ @param
+ total_length [OUT] length of the key in the tree
+
+ @retval
+ TRUE packing allowed
+ FALSE packing not allowed
+*/
+
+bool Item_func_group_concat::is_packing_allowed(uint* total_length)
+{
+ /*
+ TODO varun:
+ Currently Unique is not packed if ORDER BY clause is used
+ This needs to be implemented when MDEV-22089 is fixed
+ */
+ if (!distinct || arg_count_order)
+ return false;
+
+ return Item_sum::is_packing_allowed(table, total_length);
+}
+
+
+/*
+ @brief
+ Check if storing packed values inside the Unique tree is allowed
+
+ @param table Table structure
+ @total_length [OUT] max length of the packed key(takes into account
+ the length bytes also)
+*/
+
+
+bool Item_sum::is_packing_allowed(TABLE *table, uint* total_length)
+{
+ uint size_of_packable_fields= 0;
+ uint tot_length= 0;
+ for (uint i= 0; i < get_arg_count(); i++)
+ {
+ Item *item= args[i];
+ if (item->const_item())
+ continue;
+ Field *field= item->get_tmp_table_field();
+ if (field)
+ {
+ /*
+ For blob columns packing is not allowed
+ */
+ if (field->flags & BLOB_FLAG)
+ return false;
+
+ tot_length+= field->sort_length_without_suffix();
+ if (field->is_packable())
+ {
+ size_of_packable_fields+=
+ number_storage_requirement(field->sort_length_without_suffix());
+ }
+ }
+ }
+
+ // All fields are of fixed size
+ if (size_of_packable_fields == 0)
+ return false;
+
+ /*
+ TODO varun: we can introduce a heuristic here, no need to do packing if we
+ have small VARCHAR or CHAR
+ */
+ *total_length= tot_length;
+ /*
+ Unique::size_of_lengt_field is the length bytes to store the packed length
+ for each record inserted in the Unique tree
+ */
+ (*total_length)+= Unique::size_of_length_field + size_of_packable_fields;
+ return true;
+}
+
+
void Item_func_group_concat::print(String *str, enum_query_type query_type)
{
str->append(func_name());
diff --git a/sql/item_sum.h b/sql/item_sum.h
index dc520ce2578..39d5b79c1c4 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -590,6 +590,7 @@ public:
bool with_sum_func() const { return true; }
virtual void set_partition_row_count(ulonglong count) { DBUG_ASSERT(0); }
+ bool is_packing_allowed(TABLE* table, uint* total_length);
};
@@ -696,7 +697,10 @@ public:
bool unique_walk_function(void *element);
bool unique_walk_function_for_count(void *element);
+ bool is_distinct_packed();
static int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
+ static int composite_packed_key_cmp(void* arg, uchar* key1, uchar* key2);
+ uchar* make_packed_record(uchar *to);
};
@@ -1920,6 +1924,9 @@ protected:
friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
const void* key1,
const void* key2);
+ friend int group_concat_packed_key_cmp_with_distinct(void *arg,
+ const void *key1,
+ const void *key2);
friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
const void* key2);
friend int group_concat_key_cmp_with_order_with_nulls(void *arg,
@@ -1941,6 +1948,9 @@ protected:
virtual String *get_str_from_field(Item *i, Field *f, String *tmp,
const uchar *key, size_t offset)
{ return f->val_str(tmp, key + offset); }
+ virtual String *get_str_from_field(Item *i, Field *f, String *tmp)
+ { return f->val_str(tmp); }
+
virtual void cut_max_length(String *result,
uint old_length, uint max_length) const;
public:
@@ -2015,11 +2025,12 @@ public:
{ context= (Name_resolution_context *)cntx; return FALSE; }
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_group_concat>(thd, this); }
- qsort_cmp2 get_comparator_function_for_distinct();
+ qsort_cmp2 get_comparator_function_for_distinct(bool packed);
qsort_cmp2 get_comparator_function_for_order_by();
uchar* get_record_pointer();
uint get_null_bytes();
-
+ bool is_distinct_packed();
+ bool is_packing_allowed(uint* total_length);
};
#endif /* ITEM_SUM_INCLUDED */
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 80d67d884bf..7d65c85129b 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -11772,7 +11772,7 @@ int read_keys_and_merge_scans(THD *thd,
}
DBUG_ASSERT(file->ref_length == unique->get_size());
- DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
+ DBUG_ASSERT(thd->variables.sortbuff_size <= unique->get_max_in_memory_size());
for (;;)
{
@@ -11815,7 +11815,7 @@ int read_keys_and_merge_scans(THD *thd,
continue;
cur_quick->file->position(cur_quick->record);
- if (unique->unique_add((char*)cur_quick->file->ref))
+ if (unique->unique_add((char*)cur_quick->file->ref, unique->get_size()))
goto err;
}
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 58a786ed33c..c158d7c5a14 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -6454,6 +6454,7 @@ struct SORT_FIELD: public SORT_FIELD_ATTR
Field *field; /* Field to sort */
Item *item; /* Item if not sorting fields */
bool reverse; /* if descending sort */
+ void setup(Field *fld, bool exclude_nulls, bool with_suffix);
};
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 5942efcd601..db5b5b777a8 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -714,7 +714,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
{
table->file->position(table->record[0]);
if (unlikely((error=
- deltempfile->unique_add((char*) table->file->ref))))
+ deltempfile->unique_add((char*) table->file->ref,
+ deltempfile->get_size()))))
{
error= 1;
goto terminate_delete;
@@ -1350,7 +1351,8 @@ int multi_delete::send_data(List<Item> &values)
}
else
{
- error=tempfiles[secure_counter]->unique_add((char*) table->file->ref);
+ uint size= tempfiles[secure_counter]->get_size();
+ error=tempfiles[secure_counter]->unique_add((char*) table->file->ref, size);
if (unlikely(error))
{
error= 1; // Fatal error
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 40f0c5ede5f..d512a9727b6 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -310,6 +310,8 @@ public:
sort_length+= len;
}
+ int compare_keys(uchar *a, uchar *b);
+
static const uint size_of_length_field= 4;
private:
@@ -567,8 +569,8 @@ public:
bool using_packed_sortkeys() const
{
- DBUG_ASSERT(m_using_packed_sortkeys ==
- (sort_keys != NULL && sort_keys->using_packed_sortkeys()));
+ DBUG_ASSERT(sort_keys == NULL ||
+ (m_using_packed_sortkeys == sort_keys->using_packed_sortkeys()));
return m_using_packed_sortkeys;
}
@@ -578,6 +580,11 @@ public:
return addon_fields != NULL;
}
+ void set_using_packed_keys(bool val)
+ {
+ m_using_packed_sortkeys= val;
+ }
+
uint32 get_result_length(uchar *plen)
{
if (!m_using_packed_addons)
@@ -659,6 +666,12 @@ public:
{
return m_packed_format;
}
+ void set_packed_format(bool val)
+ {
+ m_packed_format= val;
+ }
+
+ uint32 get_record_length_for_unique(uchar *to, uint size_of_dupl_count);
private:
uint m_packable_length;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 879955af723..bf6cebbf7e8 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -1570,7 +1570,16 @@ public:
return 0;
if (count > bucket_capacity * (curr_bucket + 1))
{
- column->store_field_value((uchar *) elem, col_length);
+ uchar *to= (uchar* )elem;
+ if (column->is_packable())
+ {
+ column->unpack(column->ptr,
+ to + Unique::size_of_length_field,
+ to + Unique::read_packed_length(to), 0);
+ }
+ else
+ column->store_field_value(to, col_length);
+
histogram->set_value(curr_bucket,
column->pos_in_interval(min_value, max_value));
curr_bucket++;
@@ -1646,13 +1655,11 @@ public:
of the parameters to be passed to the constructor of the Unique object.
*/
- Count_distinct_field(Field *field, size_t max_heap_table_size)
+ Count_distinct_field(Field *field)
{
table_field= field;
- tree_key_length= field->pack_length();
-
- tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field,
- tree_key_length, max_heap_table_size, 1);
+ tree_key_length= 0;
+ tree= NULL;
}
virtual ~Count_distinct_field()
@@ -1670,6 +1677,48 @@ public:
return (tree != NULL);
}
+
+ /*
+ @brief
+ Create and setup the Unique object for the column
+
+ @param
+ thd Thread structure
+ max_heap_table_size max allowed size of the unique tree
+ */
+ bool setup(THD *thd, size_t max_heap_table_size)
+ {
+ if (table_field->is_packable())
+ {
+ tree_key_length= table_field->max_packed_col_length(table_field->pack_length());
+
+ tree_key_length+= Unique::size_of_length_field;
+ tree= new Unique((qsort_cmp2) simple_packed_str_key_cmp, (void*) this,
+ tree_key_length, max_heap_table_size, 1, TRUE);
+ }
+ else
+ {
+ if (table_field->type() == MYSQL_TYPE_BIT)
+ {
+ tree_key_length= sizeof(ulonglong);
+ tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
+ (void*) &tree_key_length,
+ tree_key_length, max_heap_table_size, 1, FALSE);
+ }
+ else
+ {
+ tree_key_length= table_field->pack_length();
+ tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) table_field,
+ tree_key_length, max_heap_table_size, 1, FALSE);
+ }
+ }
+ if (!tree)
+ return true; // OOM
+
+ return tree->setup(thd, table_field);
+ }
+
+
/*
@brief
Add the value of 'field' to the container of the Unique object 'tree'
@@ -1677,7 +1726,20 @@ public:
virtual bool add()
{
table_field->mark_unused_memory_as_defined();
- return tree->unique_add(table_field->ptr);
+ uchar *orig_to= table_field->ptr;
+ DBUG_ASSERT(tree);
+
+ uint length= tree->get_size();
+ if (tree->is_packed())
+ {
+ uchar *to;
+ orig_to= to= tree->get_packed_rec_ptr();
+ to+= Unique::size_of_length_field;
+ to+= table_field->make_packed_record_field(to);
+ length= static_cast<uint>(to - orig_to);
+ Unique::store_packed_length(orig_to, length);
+ }
+ return tree->unique_add(orig_to, length);
}
/*
@@ -1733,16 +1795,31 @@ public:
return table_field->collected_stats->histogram.get_values();
}
+ static int simple_packed_str_key_cmp(void* arg, uchar* key1, uchar* key2);
+ static int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2);
+
};
-static
-int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2)
+int Count_distinct_field::simple_ulonglong_key_cmp(void* arg,
+ uchar* key1, uchar* key2)
{
ulonglong *val1= (ulonglong *) key1;
ulonglong *val2= (ulonglong *) key2;
return *val1 > *val2 ? 1 : *val1 == *val2 ? 0 : -1;
}
+
+
+/*
+ @brief
+ Compare function for packed keys
+*/
+int Count_distinct_field::simple_packed_str_key_cmp(void* arg,
+ uchar* key1, uchar* key2)
+{
+ Count_distinct_field *compare_arg= (Count_distinct_field*)arg;
+ return compare_arg->tree->compare_packed_keys(key1, key2);
+}
/*
@@ -1755,20 +1832,12 @@ class Count_distinct_field_bit: public Count_distinct_field
{
public:
- Count_distinct_field_bit(Field *field, size_t max_heap_table_size)
- {
- table_field= field;
- tree_key_length= sizeof(ulonglong);
-
- tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
- (void*) &tree_key_length,
- tree_key_length, max_heap_table_size, 1);
- }
+ Count_distinct_field_bit(Field *field): Count_distinct_field(field){}
bool add()
{
longlong val= table_field->val_int();
- return tree->unique_add(&val);
+ return tree->unique_add(&val, tree->get_size());
}
};
@@ -2344,8 +2413,11 @@ void Column_statistics_collected::init(THD *thd, Field *table_field)
{
count_distinct=
table_field->type() == MYSQL_TYPE_BIT ?
- new Count_distinct_field_bit(table_field, max_heap_table_size) :
- new Count_distinct_field(table_field, max_heap_table_size);
+ new Count_distinct_field_bit(table_field) :
+ new Count_distinct_field(table_field);
+
+ if (count_distinct && count_distinct->setup(thd, max_heap_table_size))
+ count_distinct= NULL;
}
if (count_distinct && !count_distinct->exists())
count_distinct= NULL;
diff --git a/sql/uniques.cc b/sql/uniques.cc
index a0cebe3e4dd..d8772f366fb 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -48,12 +48,14 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique)
when tree implementation chooses to store pointer to key in TREE_ELEMENT
(instead of storing the element itself there)
*/
- return my_b_write(&unique->file, key, unique->size) ? 1 : 0;
+ return (unique->is_packed() ?
+ my_b_write(&unique->file, key, Unique::read_packed_length(key)) :
+ my_b_write(&unique->file, key, unique->size)) ? 1 : 0;
}
int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique)
{
- return my_b_write(&unique->file, key, unique->size) ||
+ return unique_write_to_file(key, count, unique) ||
my_b_write(&unique->file, (uchar*)&count, sizeof(element_count)) ? 1 : 0;
}
@@ -79,10 +81,14 @@ int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *uniq
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
uint size_arg, size_t max_in_memory_size_arg,
- uint min_dupl_count_arg)
+ uint min_dupl_count_arg, bool packed_arg)
:max_in_memory_size(max_in_memory_size_arg),
size(size_arg),
+ packed(packed_arg),
+ memory_used(0), packed_rec_ptr(NULL),
+ sortorder(NULL), sort_keys(NULL),
elements(0)
+
{
my_b_clear(&file);
min_dupl_count= min_dupl_count_arg;
@@ -90,18 +96,32 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
if (min_dupl_count_arg)
full_size+= sizeof(element_count);
with_counters= MY_TEST(min_dupl_count_arg);
- init_tree(&tree, (max_in_memory_size / 16), 0, size, comp_func,
+ init_tree(&tree, (max_in_memory_size / 16), 0, 0, comp_func,
NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC));
/* If the following fail's the next add will also fail */
my_init_dynamic_array(PSI_INSTRUMENT_ME, &file_ptrs, sizeof(Merge_chunk), 16,
16, MYF(MY_THREAD_SPECIFIC));
+
+ if (packed)
+ {
+ packed_rec_ptr= (uchar *)my_malloc(PSI_INSTRUMENT_ME,
+ size,
+ MYF(MY_WME | MY_THREAD_SPECIFIC));
+ }
/*
If you change the following, change it in get_max_elements function, too.
*/
max_elements= (ulong) (max_in_memory_size /
ALIGN_SIZE(sizeof(TREE_ELEMENT)+size));
if (!max_elements)
+ {
max_elements= 1;
+ /*
+ Need to ensure that we have memory to store atleast one record
+ in the Unique tree
+ */
+ max_in_memory_size= sizeof(TREE_ELEMENT) + size;
+ }
(void) open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
MYF(MY_WME));
@@ -371,6 +391,7 @@ Unique::~Unique()
close_cached_file(&file);
delete_tree(&tree, 0);
delete_dynamic(&file_ptrs);
+ my_free(packed_rec_ptr);
}
@@ -389,6 +410,10 @@ bool Unique::flush()
(void*) this, left_root_right) ||
insert_dynamic(&file_ptrs, (uchar*) &file_ptr))
return 1;
+ /**
+ the tree gets reset so make sure the memory used is reset too
+ */
+ memory_used= 0;
delete_tree(&tree, 0);
return 0;
}
@@ -495,7 +520,8 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
uint key_length, Merge_chunk *begin, Merge_chunk *end,
tree_walk_action walk_action, void *walk_action_arg,
qsort_cmp2 compare, void *compare_arg,
- IO_CACHE *file, bool with_counters)
+ IO_CACHE *file, bool with_counters,
+ uint min_dupl_count, bool packed)
{
BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
QUEUE queue;
@@ -521,7 +547,12 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
// read_to_buffer() needs only rec_length.
Sort_param sort_param;
sort_param.rec_length= key_length;
+ sort_param.sort_length= key_length;
+ sort_param.min_dupl_count= min_dupl_count;
+ DBUG_ASSERT(sort_param.res_length == 0);
DBUG_ASSERT(!sort_param.using_addon_fields());
+ sort_param.set_using_packed_keys(packed);
+ uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0;
/*
Invariant: queue must contain top element from each tree, until a tree
@@ -534,7 +565,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
top->set_buffer(merge_buffer + (top - begin) * piece_size,
merge_buffer + (top - begin) * piece_size + piece_size);
top->set_max_keys(max_key_count_per_piece);
- bytes_read= read_to_buffer(file, top, &sort_param, false);
+ bytes_read= read_to_buffer(file, top, &sort_param, packed);
if (unlikely(bytes_read == (ulong) -1))
goto end;
DBUG_ASSERT(bytes_read);
@@ -555,6 +586,10 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
read next key from the cache or from the file and push it to the
queue; this gives new top.
*/
+ key_length= sort_param.get_record_length_for_unique((uchar*)old_key,
+ size_of_dupl_count);
+
+ cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
top->advance_current_key(key_length);
top->decrement_mem_count();
if (top->mem_count())
@@ -564,7 +599,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
/* save old_key not to overwrite it in read_to_buffer */
memcpy(save_key_buff, old_key, key_length);
old_key= save_key_buff;
- bytes_read= read_to_buffer(file, top, &sort_param, false);
+ bytes_read= read_to_buffer(file, top, &sort_param, packed);
if (unlikely(bytes_read == (ulong) -1))
goto end;
else if (bytes_read) /* top->key, top->mem_count are reset */
@@ -604,15 +639,18 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
{
do
{
-
+ key_length= sort_param.get_record_length_for_unique(top->current_key(),
+ size_of_dupl_count);
+ cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
cnt= with_counters ?
get_counter_from_merged_element(top->current_key(), cnt_ofs) : 1;
if (walk_action(top->current_key(), cnt, walk_action_arg))
goto end;
+
top->advance_current_key(key_length);
}
while (top->decrement_mem_count());
- bytes_read= read_to_buffer(file, top, &sort_param, false);
+ bytes_read= read_to_buffer(file, top, &sort_param, packed);
if (unlikely(bytes_read == (ulong) -1))
goto end;
}
@@ -678,7 +716,8 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
(Merge_chunk *) file_ptrs.buffer,
(Merge_chunk *) file_ptrs.buffer + file_ptrs.elements,
action, walk_action_arg,
- tree.compare, tree.custom_arg, &file, with_counters);
+ tree.compare, tree.custom_arg, &file, with_counters,
+ min_dupl_count, is_packed());
}
my_free(merge_buffer);
return res;
@@ -730,9 +769,11 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size,
sort_param.max_keys_per_buffer=
(uint) MY_MAX((max_in_memory_size / sort_param.sort_length), MERGEBUFF2);
sort_param.not_killable= 1;
+ sort_param.set_using_packed_keys(is_packed());
+ sort_param.set_packed_format(is_packed());
sort_param.unique_buff= buff +(sort_param.max_keys_per_buffer *
- sort_param.sort_length);
+ sort_param.sort_length);
sort_param.compare= (qsort2_cmp) buffpek_compare;
sort_param.cmp_context.key_compare= tree.compare;
@@ -760,7 +801,8 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size,
file_ptrs.elements= maxbuffer+1;
return 0;
}
- if (merge_index(&sort_param, Bounds_checked_array<uchar>(buff, buff_size),
+ if (merge_index(&sort_param,
+ Bounds_checked_array<uchar>(buff, buff_size),
file_ptr, maxbuffer, &file, outfile))
goto err;
error= 0;
@@ -789,6 +831,8 @@ bool Unique::get(TABLE *table)
sort.return_rows= elements+tree.elements_in_tree;
DBUG_ENTER("Unique::get");
+ DBUG_ASSERT(packed == FALSE);
+
if (my_b_tell(&file) == 0)
{
/* Whole tree is in memory; Don't use disk if you don't need to */
@@ -831,3 +875,114 @@ err:
my_free(sort_buffer);
DBUG_RETURN(rc);
}
+
+
+/*
+ @brief
+ Setup the structures that are used when Unique stores packed values
+
+ @param thd thread structure
+ @param item item of aggregate function
+ @param non_const_args number of non constant arguments
+ @param arg_count total number of arguments
+ @param exclude_nulls TRUE: NULLS should be excluded
+ FALSE: otherwise
+
+ @note
+ This implementation is used by GROUP_CONCAT as it can have more than
+ one arguments in the argument list.
+
+ @retval
+ TRUE error
+ FALSE setup successful
+*/
+
+bool
+Unique::setup(THD *thd, Item_sum *item, uint non_const_args,
+ uint arg_count, bool exclude_nulls)
+{
+ if (!packed) // no packing so don't create the sortorder list
+ return false;
+ SORT_FIELD *sort,*pos;
+ if (sortorder)
+ return false;
+ DBUG_ASSERT(sort_keys == NULL);
+ sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * non_const_args);
+ pos= sort= sortorder;
+ if (!pos)
+ return true;
+ sort_keys= new Sort_keys(sortorder, non_const_args);
+ if (!sort_keys)
+ return true;
+ sort=pos= sortorder;
+ for (uint i= 0; i < arg_count; i++)
+ {
+ Item *arg= item->get_arg(i);
+ if (arg->const_item())
+ continue;
+ Field *field= arg->get_tmp_table_field();
+ if (!field)
+ continue;
+
+ pos->setup(field, exclude_nulls, false);
+ pos++;
+ }
+ return false;
+}
+
+
+/*
+ @brief
+ Setup the structures that are used when Unique stores packed values
+
+ @param thd thread structure
+ @param field field structure
+
+ @retval
+ TRUE error
+ FALSE setup successful
+*/
+
+bool Unique::setup(THD *thd, Field *field)
+{
+ if (!packed) // no packing so don't create the sortorder list
+ return false;
+
+ SORT_FIELD *sort,*pos;
+ if (sortorder)
+ return false;
+
+ DBUG_ASSERT(sort_keys == NULL);
+ sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD));
+ pos= sort= sortorder;
+ if (!pos)
+ return true;
+ sort_keys= new Sort_keys(sortorder, 1);
+ if (!sort_keys)
+ return true;
+ sort=pos= sortorder;
+ pos->setup(field, true, false); // Nulls are always excluded
+
+ return false;
+}
+
+
+/*
+ @brief
+ Compare two packed keys inside the Unique tree
+
+ @param a_ptr packed sort key
+ @param b_ptr packed sort key
+
+ @retval
+ >0 key a_ptr greater than b_ptr
+ =0 key a_ptr equal to b_ptr
+ <0 key a_ptr less than b_ptr
+
+*/
+
+int Unique::compare_packed_keys(uchar *a_ptr, uchar *b_ptr)
+{
+ return sort_keys->compare_keys(a_ptr + Unique::size_of_length_field,
+ b_ptr + Unique::size_of_length_field);
+}
diff --git a/sql/uniques.h b/sql/uniques.h
index 7e12a391fbd..1ade09408bd 100644
--- a/sql/uniques.h
+++ b/sql/uniques.h
@@ -44,26 +44,86 @@ class Unique :public Sql_alloc
it to be written to record_pointers.
always 0 for unions, > 0 for intersections */
bool with_counters;
+ /*
+ TRUE : Value stored is of variable size
+ FALSE : Value stored is of fixed size
+ */
+ bool packed;
+ /*
+ size in bytes used for storing keys in the Unique tree
+ */
+ size_t memory_used;
+ /*
+ Packed record ptr for a record of the table, the packed value in this
+ record is added to the unique tree
+ */
+ uchar* packed_rec_ptr;
+
+ /*
+ Array of SORT_FIELD structure storing the information about the key parts
+ in the sort key of the Unique tree
+ @see Unique::setup()
+ */
+ SORT_FIELD *sortorder;
+
+ /*
+ Structure storing information about usage of keys
+ */
+ Sort_keys *sort_keys;
bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge);
bool flush();
+ // return the amount of unused memory in the Unique tree
+ size_t space_left()
+ {
+ DBUG_ASSERT(max_in_memory_size >= memory_used);
+ return max_in_memory_size - memory_used;
+ }
+
+ // Check if the Unique tree is full or not
+ bool is_full(size_t record_size)
+ {
+ if (!tree.elements_in_tree) // Atleast insert one element in the tree
+ return false;
+ return record_size > space_left();
+ }
+
public:
ulong elements;
SORT_INFO sort;
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
- uint size_arg, size_t max_in_memory_size_arg,
- uint min_dupl_count_arg= 0);
+ uint size_arg, size_t max_in_memory_size_arg,
+ uint min_dupl_count_arg= 0, bool packed_arg= false);
~Unique();
ulong elements_in_tree() { return tree.elements_in_tree; }
- inline bool unique_add(void *ptr)
+
+ /*
+ TODO varun:
+ remove the unique_add function above and pass the size as a parameter.
+ So for fixed types we would pass the fixed size as argument.
+ */
+
+ inline bool unique_add(void *ptr, uint size_arg)
{
DBUG_ENTER("unique_add");
DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
- if (!(tree.flag & TREE_ONLY_DUPS) &&
- tree.elements_in_tree >= max_elements && flush())
+ TREE_ELEMENT *res;
+ size_t rec_size= size_arg + sizeof(TREE_ELEMENT) + tree.size_of_element;
+
+ if (!(tree.flag & TREE_ONLY_DUPS) && is_full(rec_size) && flush())
DBUG_RETURN(1);
- DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
+ uint count= tree.elements_in_tree;
+ res= tree_insert(&tree, ptr, size_arg, tree.custom_arg);
+ if (tree.elements_in_tree != count)
+ {
+ /*
+ increment memory used only when a unique element is inserted
+ in the tree
+ */
+ memory_used+= rec_size;
+ }
+ DBUG_RETURN(!res);
}
bool is_in_memory() { return (my_b_tell(&file) == 0); }
@@ -96,7 +156,42 @@ public:
bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
uint get_size() const { return size; }
+ uint get_full_size() const { return full_size; }
size_t get_max_in_memory_size() const { return max_in_memory_size; }
+ uchar *get_packed_rec_ptr() { return packed_rec_ptr; }
+ Sort_keys *get_keys() { return sort_keys; }
+ SORT_FIELD *get_sortorder() { return sortorder; }
+ bool is_count_stored() { return with_counters; }
+
+ // returns TRUE if the unique tree stores packed values
+ bool is_packed() { return packed; }
+
+ static void store_packed_length(uchar *p, uint sz)
+ {
+ int4store(p, sz - size_of_length_field);
+ }
+
+ // returns the length of the key along with the length bytes for the key
+ static uint read_packed_length(uchar *p)
+ {
+ return size_of_length_field + uint4korr(p);
+ }
+
+ bool setup(THD *thd, Item_sum *item, uint non_const_args,
+ uint arg_count, bool exclude_nulls);
+ bool setup(THD *thd, Field *field);
+ int compare_packed_keys(uchar *a, uchar *b);
+
+ /*
+ TODO varun:
+ Currently the size_of_length_field is set to 4 but we can also use 2 here,
+ as fields are always inserted in the unique tree either from the base table
+ or from the temp table.
+ This does not look possible as the merge process
+ in merge buffers expect the length of dynmice sort keys to be stored in
+ 4 bytes, maybe this needs to be discussed
+ */
+ static const uint size_of_length_field= 4;
friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);