summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVarun Gupta <varun.gupta@mariadb.com>2020-03-28 12:31:22 +0530
committerVarun Gupta <varun.gupta@mariadb.com>2020-06-29 14:04:07 +0530
commit077b71e4e5e5ad4aa2b4a7f6a1b49d457f5ceb55 (patch)
treec4a09ebb32e662a1243c23601ab6bd65a321668f
parentead98fe5d34912578445d42e620c8ed95df4c433 (diff)
downloadmariadb-git-10.5-varun2.tar.gz
MDEV-21829: Use packed sort keys in Unique objects10.5-varun2
The task deals with packing the values stored in the Unique tree for each record. The changes brought by this feature is: 1) Unique tree can have dynamic length keys 2) Format of keys looks like <key_length> <packed_value1> <packed_value2> ....... <packed_valueN> Unique class is currently used in 1) agg_func(DISTINCT col) Here most aggregate functions like SUM, AVG accept only fixed size arguments so it is not beneficial to use packing for these. Packing is done for COUNT and GROUP_CONCAT (or JSON_ARRAYAGG) aggregate function as these are meaningful 2) index-merge stores row-ids index merge stores row-ids which are of fixed size, so packing is not required 3) Engine Independent Table statistics Packing is done here for variable length data types This task is an extension to MDEV-21580.
-rw-r--r--mysql-test/include/unique_packed_eits.inc63
-rw-r--r--mysql-test/main/unique_packed.result599
-rw-r--r--mysql-test/main/unique_packed.test251
-rw-r--r--sql/field.h27
-rw-r--r--sql/filesort.cc195
-rw-r--r--sql/item_jsonfunc.cc82
-rw-r--r--sql/item_jsonfunc.h1
-rw-r--r--sql/item_sum.cc373
-rw-r--r--sql/item_sum.h15
-rw-r--r--sql/opt_range.cc4
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_delete.cc6
-rw-r--r--sql/sql_sort.h17
-rw-r--r--sql/sql_statistics.cc114
-rw-r--r--sql/uniques.cc179
-rw-r--r--sql/uniques.h107
16 files changed, 1918 insertions, 116 deletions
diff --git a/mysql-test/include/unique_packed_eits.inc b/mysql-test/include/unique_packed_eits.inc
new file mode 100644
index 00000000000..ebcf3b304a1
--- /dev/null
+++ b/mysql-test/include/unique_packed_eits.inc
@@ -0,0 +1,63 @@
+--echo #
+--echo # Tests below show the avg_frequency calculated during EITS collection and
+--echo # we can compare that value by calculating distinct values for the same columns
+--echo #
+
+
+--echo #
+--echo # Running ANALYZE for customer table
+--echo #
+
+ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES ();
+
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey');
+
+SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone)
+FROM customer;
+
+--echo #
+--echo # Running ANALYZE for lineitem table
+--echo #
+
+ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES ();
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct');
+
+SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem;
+
+--echo #
+--echo # Running ANALYZE for nation table
+--echo #
+
+ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES ();
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey');
+
+SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation;
+
+--echo #
+--echo # Running ANALYZE for part table
+--echo #
+
+ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES ();
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr');
+
+SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part;
+
+--echo #
+--echo # Running ANALYZE for supplier table
+--echo #
+
+ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES ();
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone');
+
+SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone)
+FROM supplier;
diff --git a/mysql-test/main/unique_packed.result b/mysql-test/main/unique_packed.result
new file mode 100644
index 00000000000..b7b2b182b1c
--- /dev/null
+++ b/mysql-test/main/unique_packed.result
@@ -0,0 +1,599 @@
+create database dbt3;
+use dbt3;
+SET @save_tmp_table_size= @@tmp_table_size;
+CREATE VIEW v1 AS SELECT DISTINCT p_mfgr, p_brand FROM part;
+#
+# The two queries should return the same result
+#
+SELECT GROUP_CONCAT(p_brand order by p_brand) FROM v1 GROUP BY p_mfgr;
+GROUP_CONCAT(p_brand order by p_brand)
+Brand#11,Brand#12,Brand#13,Brand#14,Brand#15
+Brand#21,Brand#22,Brand#23,Brand#24,Brand#25
+Brand#31,Brand#32,Brand#33,Brand#34,Brand#35
+Brand#41,Brand#42,Brand#43,Brand#44,Brand#45
+Brand#51,Brand#52,Brand#53,Brand#54,Brand#55
+SELECT GROUP_CONCAT(DISTINCT p_brand) FROM part GROUP BY p_mfgr;
+GROUP_CONCAT(DISTINCT p_brand)
+Brand#11,Brand#12,Brand#13,Brand#14,Brand#15
+Brand#21,Brand#22,Brand#23,Brand#24,Brand#25
+Brand#31,Brand#32,Brand#33,Brand#34,Brand#35
+Brand#41,Brand#42,Brand#43,Brand#44,Brand#45
+Brand#51,Brand#52,Brand#53,Brand#54,Brand#55
+#
+# The two queries should return the same result
+#
+SELECT COUNT(*) FROM v1 GROUP BY p_mfgr;
+COUNT(*)
+5
+5
+5
+5
+5
+SELECT COUNT(DISTINCT p_brand) FROM part GROUP BY p_mfgr;
+COUNT(DISTINCT p_brand)
+5
+5
+5
+5
+5
+DROP VIEW v1;
+CREATE VIEW v1 AS
+SELECT DISTINCT p_mfgr, p_name FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey;
+SELECT p_mfgr, COUNT(*) FROM v1 GROUP BY p_mfgr;
+p_mfgr COUNT(*)
+Manufacturer#1 40
+Manufacturer#2 36
+Manufacturer#3 45
+Manufacturer#4 37
+Manufacturer#5 42
+#
+# The two queries should return the same result as the above one
+#
+SELECT p_mfgr, COUNT(DISTINCT p_name) FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey
+GROUP BY p.p_mfgr;
+p_mfgr COUNT(DISTINCT p_name)
+Manufacturer#1 40
+Manufacturer#2 36
+Manufacturer#3 45
+Manufacturer#4 37
+Manufacturer#5 42
+SET tmp_table_size=1024;
+#
+# merge_walk() get called, that is merging of sorted chunks of file happens
+#
+SELECT p_mfgr, COUNT(DISTINCT p_name) FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey
+GROUP BY p.p_mfgr;
+p_mfgr COUNT(DISTINCT p_name)
+Manufacturer#1 40
+Manufacturer#2 36
+Manufacturer#3 45
+Manufacturer#4 37
+Manufacturer#5 42
+SET tmp_table_size = @save_tmp_table_size;
+DROP VIEW v1;
+# The result should be 200
+SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p;
+COUNT(DISTINCT p_name)
+200
+#
+# merge_many_buffers get called, followed by merge_walk()
+#
+SET tmp_table_size=1024;
+SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p;
+COUNT(DISTINCT p_name)
+200
+SET tmp_table_size = @save_tmp_table_size;
+CREATE VIEW v1 AS SELECT DISTINCT n_name, c_name FROM customer, orders, nation
+WHERE o_custkey= c_custkey and c_nationkey= n_nationkey;
+#
+# The two queries should return the same result
+#
+SELECT n_name, GROUP_CONCAT(c_name), COUNT(c_name) FROM v1 GROUP BY n_name;
+n_name GROUP_CONCAT(c_name) COUNT(c_name)
+ALGERIA Customer#000000086,Customer#000000073,Customer#000000080,Customer#000000076,Customer#000000029 5
+ARGENTINA Customer#000000059,Customer#000000014,Customer#000000106 3
+BRAZIL Customer#000000101,Customer#000000047,Customer#000000092,Customer#000000017 4
+CANADA Customer#000000005,Customer#000000040,Customer#000000122,Customer#000000013,Customer#000000022,Customer#000000064,Customer#000000146,Customer#000000023 8
+CHINA Customer#000000019,Customer#000000082,Customer#000000007,Customer#000000124,Customer#000000118 5
+EGYPT Customer#000000128,Customer#000000074,Customer#000000140,Customer#000000004 4
+ETHIOPIA Customer#000000085,Customer#000000010 2
+FRANCE Customer#000000046,Customer#000000050 2
+GERMANY Customer#000000136,Customer#000000062,Customer#000000071,Customer#000000119 4
+INDIA Customer#000000115,Customer#000000028,Customer#000000091,Customer#000000037 4
+INDONESIA Customer#000000094,Customer#000000142,Customer#000000067,Customer#000000130,Customer#000000103,Customer#000000139 6
+IRAN Customer#000000055,Customer#000000041,Customer#000000110,Customer#000000049,Customer#000000056,Customer#000000104,Customer#000000016 7
+IRAQ Customer#000000134,Customer#000000131,Customer#000000052,Customer#000000148 4
+JAPAN Customer#000000038,Customer#000000025,Customer#000000068,Customer#000000098,Customer#000000113 5
+JORDAN Customer#000000145,Customer#000000002,Customer#000000058 3
+KENYA Customer#000000089 1
+MOROCCO Customer#000000107,Customer#000000032,Customer#000000053,Customer#000000095,Customer#000000034,Customer#000000001,Customer#000000079 7
+MOZAMBIQUE Customer#000000088,Customer#000000109,Customer#000000143,Customer#000000116,Customer#000000137,Customer#000000044 6
+PERU Customer#000000121,Customer#000000061,Customer#000000097,Customer#000000035,Customer#000000008,Customer#000000077,Customer#000000133 7
+ROMANIA Customer#000000149,Customer#000000043,Customer#000000125,Customer#000000112 4
+RUSSIA Customer#000000026,Customer#000000020,Customer#000000083,Customer#000000070 4
+SAUDI ARABIA Customer#000000100 1
+UNITED KINGDOM Customer#000000011,Customer#000000031,Customer#000000065 3
+VIETNAM Customer#000000127 1
+SELECT n_name, GROUP_CONCAT(DISTINCT c_name), COUNT(DISTINCT c_name)
+FROM customer, orders, nation
+WHERE o_custkey= c_custkey and c_nationkey= n_nationkey GROUP BY n_name;
+n_name GROUP_CONCAT(DISTINCT c_name) COUNT(DISTINCT c_name)
+ALGERIA Customer#000000029,Customer#000000073,Customer#000000076,Customer#000000080,Customer#000000086 5
+ARGENTINA Customer#000000014,Customer#000000059,Customer#000000106 3
+BRAZIL Customer#000000017,Customer#000000047,Customer#000000092,Customer#000000101 4
+CANADA Customer#000000005,Customer#000000013,Customer#000000022,Customer#000000023,Customer#000000040,Customer#000000064,Customer#000000122,Customer#000000146 8
+CHINA Customer#000000007,Customer#000000019,Customer#000000082,Customer#000000118,Customer#000000124 5
+EGYPT Customer#000000004,Customer#000000074,Customer#000000128,Customer#000000140 4
+ETHIOPIA Customer#000000010,Customer#000000085 2
+FRANCE Customer#000000046,Customer#000000050 2
+GERMANY Customer#000000062,Customer#000000071,Customer#000000119,Customer#000000136 4
+INDIA Customer#000000028,Customer#000000037,Customer#000000091,Customer#000000115 4
+INDONESIA Customer#000000067,Customer#000000094,Customer#000000103,Customer#000000130,Customer#000000139,Customer#000000142 6
+IRAN Customer#000000016,Customer#000000041,Customer#000000049,Customer#000000055,Customer#000000056,Customer#000000104,Customer#000000110 7
+IRAQ Customer#000000052,Customer#000000131,Customer#000000134,Customer#000000148 4
+JAPAN Customer#000000025,Customer#000000038,Customer#000000068,Customer#000000098,Customer#000000113 5
+JORDAN Customer#000000002,Customer#000000058,Customer#000000145 3
+KENYA Customer#000000089 1
+MOROCCO Customer#000000001,Customer#000000032,Customer#000000034,Customer#000000053,Customer#000000079,Customer#000000095,Customer#000000107 7
+MOZAMBIQUE Customer#000000044,Customer#000000088,Customer#000000109,Customer#000000116,Customer#000000137,Customer#000000143 6
+PERU Customer#000000008,Customer#000000035,Customer#000000061,Customer#000000077,Customer#000000097,Customer#000000121,Customer#000000133 7
+ROMANIA Customer#000000043,Customer#000000112,Customer#000000125,Customer#000000149 4
+RUSSIA Customer#000000020,Customer#000000026,Customer#000000070,Customer#000000083 4
+SAUDI ARABIA Customer#000000100 1
+UNITED KINGDOM Customer#000000011,Customer#000000031,Customer#000000065 3
+VIETNAM Customer#000000127 1
+DROP VIEW v1;
+#
+# Tests for packed unique during EITS collection
+#
+SET @save_histogram_size= @@histogram_size;
+SET histogram_size=0;
+#
+# Testing when histograms are not created, this tests the function count_distinct_single_occurence_walk
+# passed as a callback function to walk
+#
+#
+# Tests below show the avg_frequency calculated during EITS collection and
+# we can compare that value by calculating distinct values for the same columns
+#
+#
+# Running ANALYZE for customer table
+#
+ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.customer analyze status Engine-independent statistics collected
+dbt3.customer analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey');
+column_name avg_frequency
+c_name 1.0000
+c_nationkey 6.0000
+c_phone 1.0000
+SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone)
+FROM customer;
+COUNT(*) COUNT(DISTINCT c_name) COUNT(DISTINCT c_nationkey) COUNT(DISTINCT c_phone)
+150 150 25 150
+#
+# Running ANALYZE for lineitem table
+#
+ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.lineitem analyze status Engine-independent statistics collected
+dbt3.lineitem analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct');
+column_name avg_frequency
+l_shipDATE 2.6500
+l_shipinstruct 1501.2500
+SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem;
+COUNT(*) COUNT(DISTINCT l_shipDATE) COUNT(DISTINCT l_shipinstruct)
+6005 2266 4
+#
+# Running ANALYZE for nation table
+#
+ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.nation analyze status Engine-independent statistics collected
+dbt3.nation analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey');
+column_name avg_frequency
+n_name 1.0000
+n_regionkey 5.0000
+SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation;
+COUNT(*) COUNT(DISTINCT n_name) COUNT(DISTINCT n_regionkey)
+25 25 5
+#
+# Running ANALYZE for part table
+#
+ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.part analyze status Engine-independent statistics collected
+dbt3.part analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr');
+column_name avg_frequency
+p_name 1.0000
+p_mfgr 40.0000
+SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part;
+COUNT(*) COUNT(DISTINCT p_name) COUNT(DISTINCT p_mfgr)
+200 200 5
+#
+# Running ANALYZE for supplier table
+#
+ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.supplier analyze status Engine-independent statistics collected
+dbt3.supplier analyze status OK
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone');
+column_name avg_frequency
+s_name 1.0000
+s_phone 1.0000
+s_acctbal 1.0000
+SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone)
+FROM supplier;
+COUNT(*) COUNT(DISTINCT s_acctbal) COUNT(DISTINCT s_name) COUNT(DISTINCT s_phone)
+10 10 10 10
+#
+# Testing when histograms are created,this tests the function histogram_build_walk
+# passed as a callback function to walk
+#
+SET histogram_size=255;
+#
+# Tests below show the avg_frequency calculated during EITS collection and
+# we can compare that value by calculating distinct values for the same columns
+#
+#
+# Running ANALYZE for customer table
+#
+ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.customer analyze status Engine-independent statistics collected
+dbt3.customer analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey');
+column_name avg_frequency
+c_name 1.0000
+c_nationkey 6.0000
+c_phone 1.0000
+SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone)
+FROM customer;
+COUNT(*) COUNT(DISTINCT c_name) COUNT(DISTINCT c_nationkey) COUNT(DISTINCT c_phone)
+150 150 25 150
+#
+# Running ANALYZE for lineitem table
+#
+ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.lineitem analyze status Engine-independent statistics collected
+dbt3.lineitem analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct');
+column_name avg_frequency
+l_shipDATE 2.6500
+l_shipinstruct 1501.2500
+SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem;
+COUNT(*) COUNT(DISTINCT l_shipDATE) COUNT(DISTINCT l_shipinstruct)
+6005 2266 4
+#
+# Running ANALYZE for nation table
+#
+ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.nation analyze status Engine-independent statistics collected
+dbt3.nation analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey');
+column_name avg_frequency
+n_name 1.0000
+n_regionkey 5.0000
+SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation;
+COUNT(*) COUNT(DISTINCT n_name) COUNT(DISTINCT n_regionkey)
+25 25 5
+#
+# Running ANALYZE for part table
+#
+ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.part analyze status Engine-independent statistics collected
+dbt3.part analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr');
+column_name avg_frequency
+p_name 1.0000
+p_mfgr 40.0000
+SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part;
+COUNT(*) COUNT(DISTINCT p_name) COUNT(DISTINCT p_mfgr)
+200 200 5
+#
+# Running ANALYZE for supplier table
+#
+ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.supplier analyze status Engine-independent statistics collected
+dbt3.supplier analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone');
+column_name avg_frequency
+s_name 1.0000
+s_phone 1.0000
+s_acctbal 1.0000
+SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone)
+FROM supplier;
+COUNT(*) COUNT(DISTINCT s_acctbal) COUNT(DISTINCT s_name) COUNT(DISTINCT s_phone)
+10 10 10 10
+#
+# Testing with very small memory for Unique, will do merging
+#
+SET @save_max_heap_table_size= @@max_heap_table_size;
+SET max_heap_table_size=16384;
+#
+# Tests below show the avg_frequency calculated during EITS collection and
+# we can compare that value by calculating distinct values for the same columns
+#
+#
+# Running ANALYZE for customer table
+#
+ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.customer analyze status Engine-independent statistics collected
+dbt3.customer analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey');
+column_name avg_frequency
+c_name 1.0000
+c_nationkey 6.0000
+c_phone 1.0000
+SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone)
+FROM customer;
+COUNT(*) COUNT(DISTINCT c_name) COUNT(DISTINCT c_nationkey) COUNT(DISTINCT c_phone)
+150 150 25 150
+#
+# Running ANALYZE for lineitem table
+#
+ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.lineitem analyze status Engine-independent statistics collected
+dbt3.lineitem analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct');
+column_name avg_frequency
+l_shipDATE 2.6500
+l_shipinstruct 1501.2500
+SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem;
+COUNT(*) COUNT(DISTINCT l_shipDATE) COUNT(DISTINCT l_shipinstruct)
+6005 2266 4
+#
+# Running ANALYZE for nation table
+#
+ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.nation analyze status Engine-independent statistics collected
+dbt3.nation analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey');
+column_name avg_frequency
+n_name 1.0000
+n_regionkey 5.0000
+SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation;
+COUNT(*) COUNT(DISTINCT n_name) COUNT(DISTINCT n_regionkey)
+25 25 5
+#
+# Running ANALYZE for part table
+#
+ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.part analyze status Engine-independent statistics collected
+dbt3.part analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr');
+column_name avg_frequency
+p_name 1.0000
+p_mfgr 40.0000
+SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part;
+COUNT(*) COUNT(DISTINCT p_name) COUNT(DISTINCT p_mfgr)
+200 200 5
+#
+# Running ANALYZE for supplier table
+#
+ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES ();
+Table Op Msg_type Msg_text
+dbt3.supplier analyze status Engine-independent statistics collected
+dbt3.supplier analyze status Table is already up to date
+SELECT column_name, avg_frequency
+FROM mysql.column_stats
+WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone');
+column_name avg_frequency
+s_name 1.0000
+s_phone 1.0000
+s_acctbal 1.0000
+SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone)
+FROM supplier;
+COUNT(*) COUNT(DISTINCT s_acctbal) COUNT(DISTINCT s_name) COUNT(DISTINCT s_phone)
+10 10 10 10
+SET max_heap_table_size= @save_max_heap_table_size;
+SET histogram_size= @save_histogram_size;
+#
+# Test with CHAR types
+#
+CREATE VIEW v1 AS
+SELECT DISTINCT c_phone FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT COUNT(*) FROM v1;
+COUNT(*)
+100
+SELECT COUNT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+COUNT(DISTINCT c_phone)
+100
+SELECT GROUP_CONCAT(c_phone ORDER BY c_phone) FROM v1;
+GROUP_CONCAT(c_phone ORDER BY c_phone)
+10-267-172-7101,10-349-718-3044,10-473-439-3214,10-677-951-2353,10-773-203-7342,11-355-584-3112,11-751-989-4627,11-845-129-3851,12-427-271-9466,12-446-416-8471,12-514-298-3699,12-970-682-3487,13-312-472-8245,13-558-731-7204,13-652-915-8939,13-702-694-4520,13-750-942-6364,13-761-547-5974,13-806-545-9701,13-835-723-3223,14-128-190-5944,14-199-862-7209,14-273-885-6505,14-280-874-8044,15-741-346-9870,15-745-585-8219,16-357-681-2007,16-658-112-3221,17-361-978-7059,17-501-210-4726,17-697-919-8406,17-710-812-5403,18-239-400-3677,18-385-235-7162,18-774-241-1462,18-971-699-1843,19-140-352-1403,19-190-993-9281,19-216-107-2107,19-403-114-4356,19-407-425-2584,19-953-499-8833,20-180-440-8525,20-781-609-3107,20-893-536-2069,20-895-685-6920,20-908-631-4424,20-917-711-4011,20-966-284-8065,21-186-284-5998,21-200-159-5932,21-562-498-6636,21-840-210-3572,22-302-930-4756,22-306-880-7212,22-603-468-3533,22-885-845-6889,22-918-832-2411,23-244-493-2508,23-562-444-8454,23-768-687-3665,24-394-451-5404,25-147-850-4166,25-168-852-5363,25-336-529-9919,25-344-968-5422,25-430-914-2194,25-923-255-2929,25-989-741-2988,26-190-260-5375,26-314-406-7725,26-516-273-2566,26-632-309-5792,26-777-409-5654,26-992-422-8153,27-147-574-9335,27-269-357-4674,27-408-997-8430,27-411-990-2959,27-566-888-7431,27-588-919-5638,27-626-559-8599,28-159-442-5305,28-183-750-7809,28-190-982-9759,28-396-526-5053,28-639-943-7051,29-233-262-8382,29-261-996-3120,29-316-665-2897,29-797-439-6760,30-749-445-4907,31-101-672-2951,32-363-455-4837,32-817-154-4122,32-828-107-2832,32-957-234-8742,33-197-837-7094,33-464-151-3439,33-733-623-5267
+SELECT GROUP_CONCAT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+GROUP_CONCAT(DISTINCT c_phone)
+10-267-172-7101,10-349-718-3044,10-473-439-3214,10-677-951-2353,10-773-203-7342,11-355-584-3112,11-751-989-4627,11-845-129-3851,12-427-271-9466,12-446-416-8471,12-514-298-3699,12-970-682-3487,13-312-472-8245,13-558-731-7204,13-652-915-8939,13-702-694-4520,13-750-942-6364,13-761-547-5974,13-806-545-9701,13-835-723-3223,14-128-190-5944,14-199-862-7209,14-273-885-6505,14-280-874-8044,15-741-346-9870,15-745-585-8219,16-357-681-2007,16-658-112-3221,17-361-978-7059,17-501-210-4726,17-697-919-8406,17-710-812-5403,18-239-400-3677,18-385-235-7162,18-774-241-1462,18-971-699-1843,19-140-352-1403,19-190-993-9281,19-216-107-2107,19-403-114-4356,19-407-425-2584,19-953-499-8833,20-180-440-8525,20-781-609-3107,20-893-536-2069,20-895-685-6920,20-908-631-4424,20-917-711-4011,20-966-284-8065,21-186-284-5998,21-200-159-5932,21-562-498-6636,21-840-210-3572,22-302-930-4756,22-306-880-7212,22-603-468-3533,22-885-845-6889,22-918-832-2411,23-244-493-2508,23-562-444-8454,23-768-687-3665,24-394-451-5404,25-147-850-4166,25-168-852-5363,25-336-529-9919,25-344-968-5422,25-430-914-2194,25-923-255-2929,25-989-741-2988,26-190-260-5375,26-314-406-7725,26-516-273-2566,26-632-309-5792,26-777-409-5654,26-992-422-8153,27-147-574-9335,27-269-357-4674,27-408-997-8430,27-411-990-2959,27-566-888-7431,27-588-919-5638,27-626-559-8599,28-159-442-5305,28-183-750-7809,28-190-982-9759,28-396-526-5053,28-639-943-7051,29-233-262-8382,29-261-996-3120,29-316-665-2897,29-797-439-6760,30-749-445-4907,31-101-672-2951,32-363-455-4837,32-817-154-4122,32-828-107-2832,32-957-234-8742,33-197-837-7094,33-464-151-3439,33-733-623-5267
+SELECT JSON_ARRAYAGG(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+JSON_ARRAYAGG(DISTINCT c_phone)
+["10-267-172-7101","10-349-718-3044","10-473-439-3214","10-677-951-2353","10-773-203-7342","11-355-584-3112","11-751-989-4627","11-845-129-3851","12-427-271-9466","12-446-416-8471","12-514-298-3699","12-970-682-3487","13-312-472-8245","13-558-731-7204","13-652-915-8939","13-702-694-4520","13-750-942-6364","13-761-547-5974","13-806-545-9701","13-835-723-3223","14-128-190-5944","14-199-862-7209","14-273-885-6505","14-280-874-8044","15-741-346-9870","15-745-585-8219","16-357-681-2007","16-658-112-3221","17-361-978-7059","17-501-210-4726","17-697-919-8406","17-710-812-5403","18-239-400-3677","18-385-235-7162","18-774-241-1462","18-971-699-1843","19-140-352-1403","19-190-993-9281","19-216-107-2107","19-403-114-4356","19-407-425-2584","19-953-499-8833","20-180-440-8525","20-781-609-3107","20-893-536-2069","20-895-685-6920","20-908-631-4424","20-917-711-4011","20-966-284-8065","21-186-284-5998","21-200-159-5932","21-562-498-6636","21-840-210-3572","22-302-930-4756","22-306-880-7212","22-603-468-3533","22-885-845-6889","22-918-832-2411","23-244-493-2508","23-562-444-8454","23-768-687-3665","24-394-451-5404","25-147-850-4166","25-168-852-5363","25-336-529-9919","25-344-968-5422","25-430-914-2194","25-923-255-2929","25-989-741-2988","26-190-260-5375","26-314-406-7725","26-516-273-2566","26-632-309-5792","26-777-409-5654","26-992-422-8153","27-147-574-9335","27-269-357-4674","27-408-997-8430","27-411-990-2959","27-566-888-7431","27-588-919-5638","27-626-559-8599","28-159-442-5305","28-183-750-7809","28-190-982-9759","28-396-526-5053","28-639-943-7051","29-233-262-8382","29-261-996-3120","29-316-665-2897","29-797-439-6760","30-749-445-4907","31-101-672-2951","32-363-455-4837","32-817-154-4122","32-828-107-2832","32-957-234-8742","33-197-837-7094","33-464-151-3439","33-733-623-5267"]
+DROP VIEW v1;
+CREATE VIEW v1 AS
+SELECT DISTINCT c_name FROM customer, orders WHERE c_custkey=o_custkey;
+#
+# Test with VARCHAR types
+#
+SELECT GROUP_CONCAT(c_name ORDER BY c_name) FROM v1;
+GROUP_CONCAT(c_name ORDER BY c_name)
+Customer#000000001,Customer#000000002,Customer#000000004,Customer#000000005,Customer#000000007,Customer#000000008,Customer#000000010,Customer#000000011,Customer#000000013,Customer#000000014,Customer#000000016,Customer#000000017,Customer#000000019,Customer#000000020,Customer#000000022,Customer#000000023,Customer#000000025,Customer#000000026,Customer#000000028,Customer#000000029,Customer#000000031,Customer#000000032,Customer#000000034,Customer#000000035,Customer#000000037,Customer#000000038,Customer#000000040,Customer#000000041,Customer#000000043,Customer#000000044,Customer#000000046,Customer#000000047,Customer#000000049,Customer#000000050,Customer#000000052,Customer#000000053,Customer#000000055,Customer#000000056,Customer#000000058,Customer#000000059,Customer#000000061,Customer#000000062,Customer#000000064,Customer#000000065,Customer#000000067,Customer#000000068,Customer#000000070,Customer#000000071,Customer#000000073,Customer#000000074,Customer#000000076,Customer#000000077,Customer#000000079,Customer#000000080,Customer#000000082,Customer#000000083,Customer#000000085,Customer#000000086,Customer#000000088,Customer#000000089,Customer#000000091,Customer#000000092,Customer#000000094,Customer#000000095,Customer#000000097,Customer#000000098,Customer#000000100,Customer#000000101,Customer#000000103,Customer#000000104,Customer#000000106,Customer#000000107,Customer#000000109,Customer#000000110,Customer#000000112,Customer#000000113,Customer#000000115,Customer#000000116,Customer#000000118,Customer#000000119,Customer#000000121,Customer#000000122,Customer#000000124,Customer#000000125,Customer#000000127,Customer#000000128,Customer#000000130,Customer#000000131,Customer#000000133,Customer#000000134,Customer#000000136,Customer#000000137,Customer#000000139,Customer#000000140,Customer#000000142,Customer#000000143,Customer#000000145,Customer#000000146,Customer#000000148,Customer#000000149
+SELECT GROUP_CONCAT(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey;
+GROUP_CONCAT(DISTINCT c_name)
+Customer#000000001,Customer#000000002,Customer#000000004,Customer#000000005,Customer#000000007,Customer#000000008,Customer#000000010,Customer#000000011,Customer#000000013,Customer#000000014,Customer#000000016,Customer#000000017,Customer#000000019,Customer#000000020,Customer#000000022,Customer#000000023,Customer#000000025,Customer#000000026,Customer#000000028,Customer#000000029,Customer#000000031,Customer#000000032,Customer#000000034,Customer#000000035,Customer#000000037,Customer#000000038,Customer#000000040,Customer#000000041,Customer#000000043,Customer#000000044,Customer#000000046,Customer#000000047,Customer#000000049,Customer#000000050,Customer#000000052,Customer#000000053,Customer#000000055,Customer#000000056,Customer#000000058,Customer#000000059,Customer#000000061,Customer#000000062,Customer#000000064,Customer#000000065,Customer#000000067,Customer#000000068,Customer#000000070,Customer#000000071,Customer#000000073,Customer#000000074,Customer#000000076,Customer#000000077,Customer#000000079,Customer#000000080,Customer#000000082,Customer#000000083,Customer#000000085,Customer#000000086,Customer#000000088,Customer#000000089,Customer#000000091,Customer#000000092,Customer#000000094,Customer#000000095,Customer#000000097,Customer#000000098,Customer#000000100,Customer#000000101,Customer#000000103,Customer#000000104,Customer#000000106,Customer#000000107,Customer#000000109,Customer#000000110,Customer#000000112,Customer#000000113,Customer#000000115,Customer#000000116,Customer#000000118,Customer#000000119,Customer#000000121,Customer#000000122,Customer#000000124,Customer#000000125,Customer#000000127,Customer#000000128,Customer#000000130,Customer#000000131,Customer#000000133,Customer#000000134,Customer#000000136,Customer#000000137,Customer#000000139,Customer#000000140,Customer#000000142,Customer#000000143,Customer#000000145,Customer#000000146,Customer#000000148,Customer#000000149
+SELECT JSON_ARRAYAGG(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey;
+JSON_ARRAYAGG(DISTINCT c_name)
+["Customer#000000001","Customer#000000002","Customer#000000004","Customer#000000005","Customer#000000007","Customer#000000008","Customer#000000010","Customer#000000011","Customer#000000013","Customer#000000014","Customer#000000016","Customer#000000017","Customer#000000019","Customer#000000020","Customer#000000022","Customer#000000023","Customer#000000025","Customer#000000026","Customer#000000028","Customer#000000029","Customer#000000031","Customer#000000032","Customer#000000034","Customer#000000035","Customer#000000037","Customer#000000038","Customer#000000040","Customer#000000041","Customer#000000043","Customer#000000044","Customer#000000046","Customer#000000047","Customer#000000049","Customer#000000050","Customer#000000052","Customer#000000053","Customer#000000055","Customer#000000056","Customer#000000058","Customer#000000059","Customer#000000061","Customer#000000062","Customer#000000064","Customer#000000065","Customer#000000067","Customer#000000068","Customer#000000070","Customer#000000071","Customer#000000073","Customer#000000074","Customer#000000076","Customer#000000077","Customer#000000079","Customer#000000080","Customer#000000082","Customer#000000083","Customer#000000085","Customer#000000086","Customer#000000088","Customer#000000089","Customer#000000091","Customer#000000092","Customer#000000094","Customer#000000095","Customer#000000097","Customer#000000098","Customer#000000100","Customer#000000101","Customer#000000103","Customer#000000104","Customer#000000106","Customer#000000107","Customer#000000109","Customer#000000110","Customer#000000112","Customer#000000113","Customer#000000115","Customer#000000116","Customer#000000118","Customer#000000119","Customer#000000121","Customer#000000122","Customer#000000124","Customer#000000125","Customer#000000127","Customer#000000128","Customer#000000130","Customer#000000131","Customer#000000133","Customer#000000134","Customer#000000136","Customer#000000137","Customer#000000139","Customer#000000140","Customer#000000142","Customer#000000143","Customer#000000145","Customer#000000146","Customer#000000148","Customer#000000149"]
+DROP VIEW v1;
+#
+# Test with FIXED TYPES
+#
+CREATE VIEW v1 AS
+SELECT DISTINCT c_nationkey FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT GROUP_CONCAT(c_nationkey ORDER BY c_nationkey) FROM v1;
+GROUP_CONCAT(c_nationkey ORDER BY c_nationkey)
+0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
+SELECT GROUP_CONCAT(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey;
+GROUP_CONCAT(DISTINCT c_nationkey)
+0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
+SELECT JSON_ARRAYAGG(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey;
+JSON_ARRAYAGG(DISTINCT c_nationkey)
+[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
+DROP VIEW v1;
+DROP DATABASE dbt3;
+USE test;
+#
+# Testing for merge_buffers when Unique is packed
+#
+CREATE TABLE t1 (a VARCHAR(1500));
+INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100;
+SET @save_max_heap_table_size= @@max_heap_table_size;
+SET max_heap_table_size=16384;
+ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (a) INDEXES();
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+SELECT column_name, avg_frequency FROM mysql.column_stats where table_name='t1' AND column_name='a';
+column_name avg_frequency
+a 2.0000
+SELECT COUNT(*), COUNT(DISTINCT a) FROM t1;
+COUNT(*) COUNT(DISTINCT a)
+2400 1200
+SET max_heap_table_size= @save_max_heap_table_size;
+DROP TABLE t1;
+#
+# Simple test for DISTINCT with VARCHAR, CHAR and BLOBS
+#
+#
+# Test with VARCHAR types
+#
+CREATE TABLE t1 (a VARCHAR(2056), b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+[null,"a","b","c"]
+DROP TABLE t1;
+CREATE TABLE t1 (a VARCHAR(2056) NOT NULL, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+["a","b","c"]
+DROP TABLE t1;
+#
+# Test with CHAR types
+#
+CREATE TABLE t1 (a CHAR(255), b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+[null,"a","b","c"]
+DROP TABLE t1;
+CREATE TABLE t1 (a CHAR(255) NOT NULL, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+["a","b","c"]
+#
+# Test with INT types (fixed size datatypes)
+#
+SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
+GROUP_CONCAT(DISTINCT b)
+1,2,3,4
+SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
+JSON_ARRAYAGG(DISTINCT b)
+[1,2,3,4]
+DROP TABLE t1;
+#
+# Test with BLOB types
+#
+CREATE TABLE t1 (a BLOB, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+GROUP_CONCAT(DISTINCT a)
+a,b,c
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+JSON_ARRAYAGG(DISTINCT a)
+[null,"a","b","c"]
+DROP TABLE t1;
diff --git a/mysql-test/main/unique_packed.test b/mysql-test/main/unique_packed.test
new file mode 100644
index 00000000000..984aac0a4cd
--- /dev/null
+++ b/mysql-test/main/unique_packed.test
@@ -0,0 +1,251 @@
+--source include/have_sequence.inc
+create database dbt3;
+use dbt3;
+
+--disable_result_log
+--disable_query_log
+--source include/dbt3_s001.inc
+--enable_result_log
+--enable_query_log
+
+SET @save_tmp_table_size= @@tmp_table_size;
+
+CREATE VIEW v1 AS SELECT DISTINCT p_mfgr, p_brand FROM part;
+
+--echo #
+--echo # The two queries should return the same result
+--echo #
+SELECT GROUP_CONCAT(p_brand order by p_brand) FROM v1 GROUP BY p_mfgr;
+SELECT GROUP_CONCAT(DISTINCT p_brand) FROM part GROUP BY p_mfgr;
+
+--echo #
+--echo # The two queries should return the same result
+--echo #
+SELECT COUNT(*) FROM v1 GROUP BY p_mfgr;
+SELECT COUNT(DISTINCT p_brand) FROM part GROUP BY p_mfgr;
+
+DROP VIEW v1;
+
+CREATE VIEW v1 AS
+SELECT DISTINCT p_mfgr, p_name FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey;
+
+SELECT p_mfgr, COUNT(*) FROM v1 GROUP BY p_mfgr;
+
+--echo #
+--echo # The two queries should return the same result as the above one
+--echo #
+
+LET $query= SELECT p_mfgr, COUNT(DISTINCT p_name) FROM lineitem l, part p
+WHERE l.l_partkey= p.p_partkey
+GROUP BY p.p_mfgr;
+
+eval $query;
+
+SET tmp_table_size=1024;
+--echo #
+--echo # merge_walk() get called, that is merging of sorted chunks of file happens
+--echo #
+eval $query;
+SET tmp_table_size = @save_tmp_table_size;
+DROP VIEW v1;
+
+--echo # The result should be 200
+SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p;
+--echo #
+--echo # merge_many_buffers get called, followed by merge_walk()
+--echo #
+SET tmp_table_size=1024;
+SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p;
+SET tmp_table_size = @save_tmp_table_size;
+
+CREATE VIEW v1 AS SELECT DISTINCT n_name, c_name FROM customer, orders, nation
+WHERE o_custkey= c_custkey and c_nationkey= n_nationkey;
+
+--echo #
+--echo # The two queries should return the same result
+--echo #
+SELECT n_name, GROUP_CONCAT(c_name), COUNT(c_name) FROM v1 GROUP BY n_name;
+
+SELECT n_name, GROUP_CONCAT(DISTINCT c_name), COUNT(DISTINCT c_name)
+FROM customer, orders, nation
+WHERE o_custkey= c_custkey and c_nationkey= n_nationkey GROUP BY n_name;
+
+DROP VIEW v1;
+
+--echo #
+--echo # Tests for packed unique during EITS collection
+--echo #
+
+SET @save_histogram_size= @@histogram_size;
+SET histogram_size=0;
+--echo #
+--echo # Testing when histograms are not created, this tests the function count_distinct_single_occurence_walk
+--echo # passed as a callback function to walk
+--echo #
+
+--source include/unique_packed_eits.inc
+
+--echo #
+--echo # Testing when histograms are created,this tests the function histogram_build_walk
+--echo # passed as a callback function to walk
+--echo #
+
+SET histogram_size=255;
+--source include/unique_packed_eits.inc
+
+
+--echo #
+--echo # Testing with very small memory for Unique, will do merging
+--echo #
+
+SET @save_max_heap_table_size= @@max_heap_table_size;
+SET max_heap_table_size=16384;
+--source include/unique_packed_eits.inc
+SET max_heap_table_size= @save_max_heap_table_size;
+
+SET histogram_size= @save_histogram_size;
+
+--echo #
+--echo # Test with CHAR types
+--echo #
+
+CREATE VIEW v1 AS
+SELECT DISTINCT c_phone FROM customer, orders WHERE c_custkey=o_custkey;
+
+SELECT COUNT(*) FROM v1;
+SELECT COUNT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+
+SELECT GROUP_CONCAT(c_phone ORDER BY c_phone) FROM v1;
+SELECT GROUP_CONCAT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT JSON_ARRAYAGG(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey;
+DROP VIEW v1;
+
+CREATE VIEW v1 AS
+SELECT DISTINCT c_name FROM customer, orders WHERE c_custkey=o_custkey;
+
+--echo #
+--echo # Test with VARCHAR types
+--echo #
+
+SELECT GROUP_CONCAT(c_name ORDER BY c_name) FROM v1;
+SELECT GROUP_CONCAT(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT JSON_ARRAYAGG(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey;
+DROP VIEW v1;
+
+--echo #
+--echo # Test with FIXED TYPES
+--echo #
+
+CREATE VIEW v1 AS
+SELECT DISTINCT c_nationkey FROM customer, orders WHERE c_custkey=o_custkey;
+
+SELECT GROUP_CONCAT(c_nationkey ORDER BY c_nationkey) FROM v1;
+SELECT GROUP_CONCAT(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey;
+SELECT JSON_ARRAYAGG(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey;
+
+DROP VIEW v1;
+
+DROP DATABASE dbt3;
+
+USE test;
+
+--echo #
+--echo # Testing for merge_buffers when Unique is packed
+--echo #
+
+CREATE TABLE t1 (a VARCHAR(1500));
+INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100;
+INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100;
+
+SET @save_max_heap_table_size= @@max_heap_table_size;
+SET max_heap_table_size=16384;
+ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (a) INDEXES();
+
+SELECT column_name, avg_frequency FROM mysql.column_stats where table_name='t1' AND column_name='a';
+SELECT COUNT(*), COUNT(DISTINCT a) FROM t1;
+SET max_heap_table_size= @save_max_heap_table_size;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Simple test for DISTINCT with VARCHAR, CHAR and BLOBS
+--echo #
+
+--echo #
+--echo # Test with VARCHAR types
+--echo #
+
+CREATE TABLE t1 (a VARCHAR(2056), b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a VARCHAR(2056) NOT NULL, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # Test with CHAR types
+--echo #
+
+CREATE TABLE t1 (a CHAR(255), b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a CHAR(255) NOT NULL, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+
+
+--echo #
+--echo # Test with INT types (fixed size datatypes)
+--echo #
+
+SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Test with BLOB types
+--echo #
+
+CREATE TABLE t1 (a BLOB, b INT NOT NULL);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6);
+SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
+SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
+DROP TABLE t1;
diff --git a/sql/field.h b/sql/field.h
index e4c5ffcc0de..693d0b63ded 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1109,6 +1109,11 @@ public:
*/
virtual uint32 data_length() { return pack_length(); }
virtual uint32 sort_length() const { return pack_length(); }
+ /*
+ returns the sort_length for a field without the suffix length bytes
+ for fields with binary charset.
+ */
+ virtual uint32 sort_length_without_suffix() const { return pack_length(); }
/*
sort_suffix_length() return the length bytes needed to store the length
@@ -1316,9 +1321,21 @@ public:
}
return update_fl;
}
+
+ /*
+ @brief
+ Make a packed value for a field
+ @param
+ to buffer to store the value
+ */
+ virtual uint make_packed_record_field(uchar *to)
+ {
+ uchar* end= pack(to, ptr);
+ return static_cast<uint>(end - to);
+ }
virtual void store_field_value(uchar *val, uint len)
{
- memcpy(ptr, val, len);
+ memcpy(ptr, val, len);
}
virtual uint decimals() const { return 0; }
virtual Information_schema_numeric_attributes
@@ -4122,6 +4139,10 @@ public:
{
return (uint32) field_length + sort_suffix_length();
}
+ uint32 sort_length_without_suffix() const override
+ {
+ return (uint32) field_length;
+ }
virtual uint32 sort_suffix_length() const override
{
return (field_charset() == &my_charset_bin ? length_bytes : 0);
@@ -4463,6 +4484,10 @@ public:
{ return (uint32) (packlength); }
uint row_pack_length() const override { return pack_length_no_ptr(); }
uint32 sort_length() const override;
+ uint32 sort_length_without_suffix() const override
+ {
+ return field_length;
+ }
uint32 sort_suffix_length() const override;
uint32 value_length() override { return get_length(); }
virtual uint32 max_data_length() const override
diff --git a/sql/filesort.cc b/sql/filesort.cc
index ac43c96b0e0..e0389bb0343 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -35,6 +35,7 @@
#include "filesort_utils.h"
#include "sql_select.h"
#include "debug_sync.h"
+#include "uniques.h"
/* functions defined in this file */
@@ -1694,6 +1695,8 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek,
uint size_of_sort_length= param->using_packed_sortkeys() ?
Sort_keys::size_of_length_field : 0;
+ uint size_of_dupl_count= param->min_dupl_count ?
+ sizeof(element_count) : 0;
for (; ix < count; ++ix)
{
@@ -1709,14 +1712,16 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek,
buffpek->buffer_end())
break; // Incomplete record.
- uchar *plen= record + sort_length;
+ uchar *plen= record + sort_length + size_of_dupl_count;
+
uint res_length= param->get_result_length(plen);
if (plen + res_length > buffpek->buffer_end())
break; // Incomplete record.
- DBUG_ASSERT(res_length > 0);
+ DBUG_ASSERT(!param->sort_keys || res_length > 0);
DBUG_ASSERT(sort_length + res_length <= param->rec_length);
record+= sort_length;
record+= res_length;
+ record+= size_of_dupl_count;
}
DBUG_ASSERT(ix > 0);
count= ix;
@@ -1812,12 +1817,12 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
rec_length= param->rec_length;
res_length= param->res_length;
sort_length= param->sort_length;
- uint dupl_count_ofs= rec_length-sizeof(element_count);
uint min_dupl_count= param->min_dupl_count;
+ uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0;
+
bool check_dupl_count= flag && min_dupl_count;
offset= (rec_length-
(flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length);
- uint wr_len= flag ? res_length : rec_length;
uint wr_offset= flag ? offset : 0;
const bool using_packed_sortkeys= param->using_packed_sortkeys();
@@ -1867,10 +1872,16 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
Store it also in 'to_file'.
*/
buffpek= (Merge_chunk*) queue_top(&queue);
+ rec_length= param->get_record_length_for_unique(buffpek->current_key(),
+ size_of_dupl_count);
+
+ DBUG_ASSERT(rec_length <= param->sort_length);
+
memcpy(unique_buff, buffpek->current_key(), rec_length);
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
if (min_dupl_count)
- memcpy(&dupl_count, unique_buff+dupl_count_ofs,
- sizeof(dupl_count));
+ memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count));
+
buffpek->advance_current_key(rec_length);
buffpek->decrement_mem_count();
if (buffpek->mem_count() == 0)
@@ -1900,28 +1911,31 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
src= buffpek->current_key();
if (cmp) // Remove duplicates
{
- uchar *current_key= buffpek->current_key();
- if (!(*cmp)(first_cmp_arg, &unique_buff, &current_key))
+ if (!(*cmp)(first_cmp_arg, &unique_buff, &src))
{
if (min_dupl_count)
{
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
element_count cnt;
memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt));
dupl_count+= cnt;
}
goto skip_duplicate;
}
+
if (min_dupl_count)
{
- memcpy(unique_buff+dupl_count_ofs, &dupl_count,
- sizeof(dupl_count));
+ DBUG_ASSERT(rec_length <= param->sort_length);
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count));
}
+ res_length= rec_length - size_of_dupl_count;
src= unique_buff;
}
+ else
+ param->get_rec_and_res_len(src, &rec_length, &res_length);
{
- param->get_rec_and_res_len(buffpek->current_key(),
- &rec_length, &res_length);
const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
/*
@@ -1943,10 +1957,15 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
}
if (cmp)
{
+ rec_length= param->get_record_length_for_unique(buffpek->current_key(),
+ size_of_dupl_count);
+ DBUG_ASSERT(rec_length <= param->sort_length);
memcpy(unique_buff, buffpek->current_key(), rec_length);
if (min_dupl_count)
- memcpy(&dupl_count, unique_buff+dupl_count_ofs,
- sizeof(dupl_count));
+ {
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count));
+ }
}
if (!--max_rows)
{
@@ -1989,6 +2008,7 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
{
if (min_dupl_count)
{
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
element_count cnt;
memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt));
dupl_count+= cnt;
@@ -1998,13 +2018,22 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
}
if (min_dupl_count)
- memcpy(unique_buff+dupl_count_ofs, &dupl_count,
- sizeof(dupl_count));
+ {
+ DBUG_ASSERT(rec_length <= param->sort_length);
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count));
+ }
if (!check_dupl_count || dupl_count >= min_dupl_count)
{
src= unique_buff;
- if (my_b_write(to_file, src+wr_offset, wr_len))
+ res_length = rec_length - size_of_dupl_count;
+ const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
+ if (my_b_write(to_file,
+ src + (offset_for_packing ?
+ rec_length - res_length : // sort length
+ wr_offset),
+ bytes_to_write))
goto err; /* purecov: inspected */
if (!--max_rows)
goto end;
@@ -2022,17 +2051,28 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
for (uint ix= 0; ix < buffpek->mem_count(); ++ix)
{
uchar *src= buffpek->current_key();
- param->get_rec_and_res_len(src,
- &rec_length, &res_length);
- const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
- if (check_dupl_count)
+ if (cmp)
{
- memcpy((uchar *) &dupl_count,
- buffpek->current_key() + offset + dupl_count_ofs,
- sizeof(dupl_count));
- if (dupl_count < min_dupl_count)
- continue;
+ rec_length= param->get_record_length_for_unique(src,
+ size_of_dupl_count);
+ res_length= rec_length - size_of_dupl_count;
+ if (check_dupl_count)
+ {
+ /*
+ TODO varun: this looks incorrect to me
+ */
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(&dupl_count, src + dupl_count_ofs, sizeof(dupl_count));
+
+ if (dupl_count < min_dupl_count)
+ continue;
+ }
}
+ else
+ param->get_rec_and_res_len(src, &rec_length, &res_length);
+
+ const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
+
if(my_b_write(to_file,
src + (offset_for_packing ?
rec_length - res_length : // sort length
@@ -2532,6 +2572,23 @@ void Sort_param::try_to_pack_sortkeys()
rec_length= sort_length + addon_length;
}
+/*
+ @brief
+ Return the length of the record in the Unique tree
+
+ @param
+ to key value
+ size_of_dupl_count if min_dupl_count > 0, then the record length
+ needs size_of_dupl_count to store the counter
+*/
+uint32 Sort_param::get_record_length_for_unique(uchar *to,
+ uint size_of_dupl_count)
+{
+ return using_packed_sortkeys() ?
+ Unique::read_packed_length(to) + size_of_dupl_count :
+ rec_length;
+}
+
uint
Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item,
@@ -2742,6 +2799,45 @@ bool SORT_FIELD_ATTR::check_if_packing_possible(THD *thd) const
/*
+ @brief
+ Setup the SORT_FIELD structure
+
+ @param
+ fld field structure
+ exclude_nulls TRUE if nulls are not to be considered
+ with_suffix TRUE if length bytes needed to store the length
+ for binary charset
+
+ @note
+ Currently used only by Unique object
+ TODD varun: we can refactor the code for filesort to use this function.
+
+*/
+void SORT_FIELD::setup(Field *fld, bool exclude_nulls, bool with_suffix)
+{
+ field= fld;
+ item= NULL;
+ /*
+ For unique needs to be set to FALSE always
+ but we can even pass the reverse as an argument to the function
+ */
+ reverse= false;
+ original_length= length= (with_suffix ?
+ field->sort_length() :
+ field->sort_length_without_suffix());
+
+ cs= field->sort_charset();
+ suffix_length= with_suffix ? field->sort_suffix_length() : 0;
+ type= field->is_packable() ?
+ SORT_FIELD_ATTR::VARIABLE_SIZE :
+ SORT_FIELD_ATTR::FIXED_SIZE;
+ maybe_null= exclude_nulls ? false : field->maybe_null();
+ length_bytes= is_variable_sized() ?
+ number_storage_requirement(length) : 0;
+}
+
+
+/*
Compare function used for packing sort keys
*/
@@ -2871,16 +2967,45 @@ int compare_packed_sort_keys(void *sort_param,
unsigned char **a_ptr, unsigned char **b_ptr)
{
int retval= 0;
- size_t a_len, b_len;
Sort_param *param= (Sort_param*)sort_param;
Sort_keys *sort_keys= param->sort_keys;
uchar *a= *a_ptr;
uchar *b= *b_ptr;
- a+= Sort_keys::size_of_length_field;
- b+= Sort_keys::size_of_length_field;
- for (SORT_FIELD *sort_field= sort_keys->begin();
- sort_field != sort_keys->end(); sort_field++)
+ if ((retval= sort_keys->compare_keys(a + Sort_keys::size_of_length_field,
+ b + Sort_keys::size_of_length_field)))
+ return retval;
+
+ /*
+ this comparison is done for the case when the sort keys is appended with
+ the ROW_ID pointer. For such cases we don't have addon fields
+ so we can make a memcmp check over both the sort keys
+ */
+ if (!param->using_addon_fields())
+ {
+ a+= Sort_keys::read_sortkey_length(a);
+ b+= Sort_keys::read_sortkey_length(b);
+ retval= memcmp(a, b, param->res_length);
+ }
+ return retval;
+}
+
+
+/*
+ @brief
+ Compare two sort keys
+
+ @retval
+ >0 key a greater than b
+ =0 key a equal to b
+ <0 key a less than b
+*/
+
+int Sort_keys::compare_keys(uchar *a, uchar *b)
+{
+ int retval= 0;
+ size_t a_len, b_len;
+ for (SORT_FIELD *sort_field= begin(); sort_field != end(); sort_field++)
{
retval= sort_field->is_variable_sized() ?
sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) :
@@ -2891,15 +3016,7 @@ int compare_packed_sort_keys(void *sort_param,
a+= a_len;
b+= b_len;
-
}
- /*
- this comparison is done for the case when the sort keys is appended with
- the ROW_ID pointer. For such cases we don't have addon fields
- so we can make a memcmp check over both the sort keys
- */
- if (!param->using_addon_fields())
- retval= memcmp(a, b, param->res_length);
return retval;
}
diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc
index b9a84775311..48075106206 100644
--- a/sql/item_jsonfunc.cc
+++ b/sql/item_jsonfunc.cc
@@ -1452,7 +1452,7 @@ append_null:
}
-static int append_json_value_from_field(String *str,
+static bool append_json_value_from_field(String *str,
Item *i, Field *f, const uchar *key, size_t offset, String *tmp_val)
{
if (i->type_handler()->is_bool_type())
@@ -1498,6 +1498,73 @@ append_null:
}
+/*
+ @brief
+ Append the value of a field in JSON format
+
+ @param
+ str buffer to write the value
+ item JSON_ARRAYAGG item
+ field field whose value needs to be appended
+ tmp_val temp buffer
+
+ @note
+ Use this function when the field has the value in its ptr.
+ This is currently used when packing is done for JSON_ARRAYAGG function.
+ In the case of packing, we have to unpack the value to Field::ptr.
+
+ @retval
+ FALSE value appended in JSON format
+ TRUE error
+*/
+
+static bool append_json_value_from_field(String *str, Item *item, Field *field,
+ String *tmp_val)
+{
+ if (item->type_handler()->is_bool_type())
+ {
+ if (field->is_null())
+ goto append_null;
+
+ longlong v_int= field->val_int();
+ const char *t_f;
+ int t_f_len;
+
+ if (v_int)
+ {
+ t_f= "true";
+ t_f_len= 4;
+ }
+ else
+ {
+ t_f= "false";
+ t_f_len= 5;
+ }
+
+ return str->append(t_f, t_f_len);
+ }
+ {
+ if (field->is_null())
+ goto append_null;
+ String *sv= field->val_str(tmp_val);
+
+ if (item->is_json_type())
+ return str->append(sv->ptr(), sv->length());
+
+ if (item->result_type() == STRING_RESULT)
+ {
+ return str->append("\"", 1) ||
+ st_append_escaped(str, sv) ||
+ str->append("\"", 1);
+ }
+ return st_append_escaped(str, sv);
+ }
+
+append_null:
+ return str->append("null", 4);
+}
+
+
static int append_json_keyname(String *str, Item *item, String *tmp_val)
{
String *sv= item->val_str(tmp_val);
@@ -3689,6 +3756,19 @@ String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f,
}
+String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f,
+ String *tmp)
+{
+ m_tmp_json.length(0);
+
+ if (append_json_value_from_field(&m_tmp_json, i, f, tmp))
+ return NULL;
+
+ return &m_tmp_json;
+
+}
+
+
void Item_func_json_arrayagg::cut_max_length(String *result,
uint old_length, uint max_length) const
{
diff --git a/sql/item_jsonfunc.h b/sql/item_jsonfunc.h
index 0408b3ad72f..16f700724bb 100644
--- a/sql/item_jsonfunc.h
+++ b/sql/item_jsonfunc.h
@@ -547,6 +547,7 @@ protected:
String *get_str_from_item(Item *i, String *tmp);
String *get_str_from_field(Item *i, Field *f, String *tmp,
const uchar *key, size_t offset);
+ String *get_str_from_field(Item *i, Field *f, String *tmp);
void cut_max_length(String *result,
uint old_length, uint max_length) const;
public:
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index e79344507b2..96b17e74019 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -701,6 +701,32 @@ int Aggregator_distinct::composite_key_cmp(void* arg, uchar* key1, uchar* key2)
}
+/**
+ Correctly compare composite keys.
+
+ Used by the Unique class to compare packed keys. Will do correct comparisons
+ for composite keys with various field types.
+
+ @param arg Pointer to the relevant Aggregator_distinct instance
+ @param key1 left key image
+ @param key2 right key image
+
+ @return comparison result
+ @retval <0 if key1 < key2
+ @retval =0 if key1 = key2
+ @retval >0 if key1 > key2
+*/
+
+int
+Aggregator_distinct::composite_packed_key_cmp(void* arg,
+ uchar* key1, uchar* key2)
+{
+ Aggregator_distinct *aggr= (Aggregator_distinct *) arg;
+ DBUG_ASSERT(aggr->tree);
+ return aggr->tree->compare_packed_keys(key1, key2);
+}
+
+
/***************************************************************************/
C_MODE_START
@@ -766,13 +792,19 @@ bool Aggregator_distinct::setup(THD *thd)
return TRUE;
/* Create a table with an unique key over all parameters */
+ uint non_const_items= 0;
for (uint i=0; i < item_sum->get_arg_count() ; i++)
{
Item *item=item_sum->get_arg(i);
if (list.push_back(item, thd->mem_root))
return TRUE; // End of memory
- if (item->const_item() && item->is_null())
- always_null= true;
+ if (item->const_item())
+ {
+ if (item->is_null())
+ always_null= true;
+ }
+ else
+ non_const_items++;
}
if (always_null)
return FALSE;
@@ -861,16 +893,27 @@ bool Aggregator_distinct::setup(THD *thd)
}
}
}
+
+ bool allow_packing= item_sum->is_packing_allowed(table, &tree_key_length);
+
+ if (allow_packing)
+ {
+ compare_key= (qsort_cmp2) composite_packed_key_cmp;
+ cmp_arg= (void*)this;
+ }
+
DBUG_ASSERT(tree == 0);
tree= new Unique(compare_key, cmp_arg, tree_key_length,
- item_sum->ram_limitation(thd));
+ item_sum->ram_limitation(thd), 0, allow_packing);
/*
The only time tree_key_length could be 0 is if someone does
count(distinct) on a char(0) field - stupid thing to do,
but this has to be handled - otherwise someone can crash
the server with a DoS attack
*/
- if (! tree)
+ if (!tree || tree->setup(thd, item_sum,
+ non_const_items, item_sum->get_arg_count(),
+ TRUE))
return TRUE;
}
return FALSE;
@@ -987,9 +1030,23 @@ bool Aggregator_distinct::add()
if (copy_funcs(tmp_table_param->items_to_copy, table->in_use))
return TRUE;
- for (Field **field=table->field ; *field ; field++)
- if ((*field)->is_real_null(0))
- return 0; // Don't count NULL
+ uchar *to= NULL, *orig_to= table->record[0] + table->s->null_bytes;
+ uint packed_length= 0;
+
+ if (is_distinct_packed())
+ {
+ orig_to= to= tree->get_packed_rec_ptr();
+ if ((to= make_packed_record(to)) == NULL)
+ return false;
+ packed_length= static_cast<uint>(to - orig_to);
+ Unique::store_packed_length(orig_to, packed_length);
+ }
+ else
+ {
+ for (Field **field=table->field ; *field ; field++)
+ if ((*field)->is_real_null(0))
+ return 0; // Don't count NULL
+ }
if (tree)
{
@@ -999,7 +1056,8 @@ bool Aggregator_distinct::add()
bloat the tree without providing any valuable info. Besides,
key_length used to initialize the tree didn't include space for them.
*/
- return tree->unique_add(table->record[0] + table->s->null_bytes);
+ return tree->unique_add(orig_to, tree->is_packed() ?
+ packed_length : tree->get_size());
}
if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))) &&
table->file->is_fatal_error(error, HA_CHECK_DUP))
@@ -1017,7 +1075,7 @@ bool Aggregator_distinct::add()
'0' values are also stored in the tree. This doesn't matter
for SUM(DISTINCT), but is important for AVG(DISTINCT)
*/
- return tree->unique_add(table->field[0]->ptr);
+ return tree->unique_add(table->field[0]->ptr, tree->get_size());
}
}
@@ -1778,6 +1836,48 @@ bool Aggregator_distinct::unique_walk_function_for_count(void *element)
}
+/*
+ @brief
+ Checks whether the unique tree is packed or not
+
+ @retval
+ TRUE tree stored packed values
+ FALSE otherwise
+*/
+
+bool Aggregator_distinct::is_distinct_packed()
+{
+ return tree && tree->is_packed();
+
+}
+
+
+/*
+ @brief
+ Make a record with packed values
+
+ @param
+ to buffer to store the packed value
+*/
+uchar *Aggregator_distinct::make_packed_record(uchar *to)
+{
+ to+= Unique::size_of_length_field;
+
+ for (Field **field=table->field ; *field ; field++)
+ {
+ Field *fld= (*field);
+ if (fld->is_real_null(0))
+ return NULL; // Don't count NULL
+
+ /*
+ Storing the packed values for the non-const fields of the record
+ */
+ to+= fld->make_packed_record_field(to);
+ }
+ return to;
+}
+
+
Aggregator_distinct::~Aggregator_distinct()
{
if (tree)
@@ -3612,6 +3712,33 @@ int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
/**
+ Compares the packed values for fields in expr list of GROUP_CONCAT.
+ @note
+
+ GROUP_CONCAT([DISTINCT] expr [,expr ...]
+ [ORDER BY {unsigned_integer | col_name | expr}
+ [ASC | DESC] [,col_name ...]]
+ [SEPARATOR str_val])
+
+ @return
+ @retval -1 : key1 < key2
+ @retval 0 : key1 = key2
+ @retval 1 : key1 > key2
+*/
+int group_concat_packed_key_cmp_with_distinct(void *arg,
+ const void *a_ptr,
+ const void *b_ptr)
+{
+ Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
+
+ DBUG_ASSERT(item_func->unique_filter);
+ uchar *a= (uchar*)a_ptr;
+ uchar *b= (uchar*)b_ptr;
+ return item_func->unique_filter->compare_packed_keys(a, b);
+}
+
+
+/**
function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... )
*/
@@ -3778,10 +3905,21 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
String tmp((char *)table->record[1], table->s->reclength,
default_charset_info);
String tmp2;
- uchar *key= (uchar *) key_arg;
+ const uchar *key= (const uchar *) key_arg;
+ const uchar *key_end= NULL;
String *result= &item->result;
Item **arg= item->args, **arg_end= item->args + item->arg_count_field;
uint old_length= result->length();
+ SORT_FIELD *pos;
+
+ const bool packed= item->is_distinct_packed();
+
+ if (packed)
+ {
+ pos= item->unique_filter->get_sortorder();
+ key_end= key + item->unique_filter->get_full_size();
+ key+= Unique::size_of_length_field;
+ }
ulonglong *offset_limit= &item->copy_offset_limit;
ulonglong *row_limit = &item->copy_row_limit;
@@ -3822,11 +3960,43 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
Field *field= (*arg)->get_tmp_table_field();
if (field)
{
- uint offset= (field->offset(field->table->record[0]) -
- table->s->null_bytes);
- DBUG_ASSERT(offset < table->s->reclength);
- res= item->get_str_from_field(*arg, field, &tmp, key,
- offset + item->get_null_bytes());
+ if (packed)
+ {
+ DBUG_ASSERT(pos->field == field);
+ if (!item->skip_nulls() && field->maybe_null())
+ {
+ if (*key == 0)
+ {
+ // Case with NULL value
+ field->set_null();
+ res= item->get_str_from_field(*arg, field, &tmp);
+ key++;
+ }
+ else
+ {
+ // Case with NOT NULL value
+ field->set_notnull();
+ const uchar *end= field->unpack(field->ptr, key+1, key_end, 0);
+ res= item->get_str_from_field(*arg, field, &tmp);
+ key= end;
+ }
+ }
+ else
+ {
+ const uchar *end= field->unpack(field->ptr, key, key_end, 0);
+ res= item->get_str_from_field(*arg, field, &tmp);
+ key= end;
+ }
+ pos++;
+ }
+ else
+ {
+ uint offset= (field->offset(field->table->record[0]) -
+ table->s->null_bytes);
+ DBUG_ASSERT(offset < table->s->reclength);
+ res= item->get_str_from_field(*arg, field, &tmp, key,
+ offset + item->get_null_bytes());
+ }
}
else
res= item->get_str_from_item(*arg, &tmp);
@@ -4137,6 +4307,14 @@ bool Item_func_group_concat::add(bool exclude_nulls)
size_t row_str_len= 0;
StringBuffer<MAX_FIELD_WIDTH> buf;
String *res;
+ uchar *to= NULL, *orig_to= get_record_pointer();
+
+ if (is_distinct_packed())
+ {
+ orig_to= to= unique_filter->get_packed_rec_ptr();
+ to+= Unique::size_of_length_field;
+ }
+
for (uint i= 0; i < arg_count_field; i++)
{
Item *show_item= args[i];
@@ -4151,6 +4329,24 @@ bool Item_func_group_concat::add(bool exclude_nulls)
return 0; // Skip row if it contains null
if (tree && (res= field->val_str(&buf)))
row_str_len+= res->length();
+
+ /*
+ Storing the packed values for the non-const fields of the record
+ */
+ if (is_distinct_packed())
+ {
+ if (!exclude_nulls && field->maybe_null())
+ {
+ if (field->is_null_in_record((const uchar*) table->record[0]))
+ {
+ *to++=0;
+ continue;
+ }
+ *to++=1;
+ }
+ uchar* end= field->pack(to, field->ptr);
+ to+= static_cast<uint>(end - to);
+ }
}
else
{
@@ -4167,9 +4363,17 @@ bool Item_func_group_concat::add(bool exclude_nulls)
if (distinct)
{
+ DBUG_ASSERT(unique_filter);
+ uint packed_length= unique_filter->get_size();
+
+ if (unique_filter->is_packed())
+ {
+ packed_length= static_cast<uint>(to - orig_to);
+ Unique::store_packed_length(orig_to, packed_length);
+ }
/* Filter out duplicate rows. */
uint count= unique_filter->elements_in_tree();
- unique_filter->unique_add(get_record_pointer());
+ unique_filter->unique_add(get_record_pointer(), packed_length);
if (count == unique_filter->elements_in_tree())
row_eligible= FALSE;
}
@@ -4285,16 +4489,22 @@ bool Item_func_group_concat::setup(THD *thd)
/* Push all not constant fields to the list and create a temp table */
always_null= 0;
+ uint non_const_items= 0;
for (uint i= 0; i < arg_count_field; i++)
{
Item *item= args[i];
if (list.push_back(item, thd->mem_root))
DBUG_RETURN(TRUE);
- if (item->const_item() && item->is_null() && skip_nulls())
+ if (item->const_item())
{
- always_null= 1;
- DBUG_RETURN(FALSE);
+ if (item->is_null() && skip_nulls())
+ {
+ always_null= 1;
+ DBUG_RETURN(FALSE);
+ }
}
+ else
+ non_const_items++;
}
List<Item> all_fields(list);
@@ -4377,6 +4587,7 @@ bool Item_func_group_concat::setup(THD *thd)
the row is not added to the result.
*/
uint tree_key_length= table->s->reclength - table->s->null_bytes;
+ bool allow_packing= is_packing_allowed(&tree_key_length);
if (arg_count_order)
{
@@ -4395,10 +4606,16 @@ bool Item_func_group_concat::setup(THD *thd)
}
if (distinct)
- unique_filter= new Unique(get_comparator_function_for_distinct(),
+ {
+ unique_filter= new Unique(get_comparator_function_for_distinct(allow_packing),
(void*)this,
tree_key_length + get_null_bytes(),
- ram_limitation(thd));
+ ram_limitation(thd), 0, allow_packing);
+
+ if (!unique_filter || unique_filter->setup(thd, this, non_const_items,
+ arg_count_field, skip_nulls()))
+ DBUG_RETURN(TRUE);
+ }
if ((row_limit && row_limit->cmp_type() != INT_RESULT) ||
(offset_limit && offset_limit->cmp_type() != INT_RESULT))
{
@@ -4456,11 +4673,13 @@ String* Item_func_group_concat::val_str(String* str)
Get the comparator function for DISTINT clause
*/
-qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct()
+qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct(bool packed)
{
- return skip_nulls() ?
- group_concat_key_cmp_with_distinct :
- group_concat_key_cmp_with_distinct_with_nulls;
+ return packed ?
+ group_concat_packed_key_cmp_with_distinct :
+ (skip_nulls() ?
+ group_concat_key_cmp_with_distinct :
+ group_concat_key_cmp_with_distinct_with_nulls);
}
@@ -4488,9 +4707,11 @@ qsort_cmp2 Item_func_group_concat::get_comparator_function_for_order_by()
uchar* Item_func_group_concat::get_record_pointer()
{
- return skip_nulls() ?
- table->record[0] + table->s->null_bytes :
- table->record[0];
+ return is_distinct_packed() ?
+ unique_filter->get_packed_rec_ptr() :
+ (skip_nulls() ?
+ table->record[0] + table->s->null_bytes :
+ table->record[0]);
}
@@ -4511,6 +4732,102 @@ uint Item_func_group_concat::get_null_bytes()
}
+/*
+ @brief
+ Checks whether the unique tree is packed or not
+
+ @retval
+ TRUE tree stored packed values
+ FALSE otherwise
+*/
+
+bool Item_func_group_concat::is_distinct_packed()
+{
+ return unique_filter && unique_filter->is_packed();
+}
+
+
+/*
+ @brief
+ Checks if one can store packed values in the Unique tree
+
+ @param
+ total_length [OUT] length of the key in the tree
+
+ @retval
+ TRUE packing allowed
+ FALSE packing not allowed
+*/
+
+bool Item_func_group_concat::is_packing_allowed(uint* total_length)
+{
+ /*
+ TODO varun:
+ Currently Unique is not packed if ORDER BY clause is used
+ This needs to be implemented when MDEV-22089 is fixed
+ */
+ if (!distinct || arg_count_order)
+ return false;
+
+ return Item_sum::is_packing_allowed(table, total_length);
+}
+
+
+/*
+ @brief
+ Check if storing packed values inside the Unique tree is allowed
+
+ @param table Table structure
+ @total_length [OUT] max length of the packed key(takes into account
+ the length bytes also)
+*/
+
+
+bool Item_sum::is_packing_allowed(TABLE *table, uint* total_length)
+{
+ uint size_of_packable_fields= 0;
+ uint tot_length= 0;
+ for (uint i= 0; i < get_arg_count(); i++)
+ {
+ Item *item= args[i];
+ if (item->const_item())
+ continue;
+ Field *field= item->get_tmp_table_field();
+ if (field)
+ {
+ /*
+ For blob columns packing is not allowed
+ */
+ if (field->flags & BLOB_FLAG)
+ return false;
+
+ tot_length+= field->sort_length_without_suffix();
+ if (field->is_packable())
+ {
+ size_of_packable_fields+=
+ number_storage_requirement(field->sort_length_without_suffix());
+ }
+ }
+ }
+
+ // All fields are of fixed size
+ if (size_of_packable_fields == 0)
+ return false;
+
+ /*
+ TODO varun: we can introduce a heuristic here, no need to do packing if we
+ have small VARCHAR or CHAR
+ */
+ *total_length= tot_length;
+ /*
+ Unique::size_of_lengt_field is the length bytes to store the packed length
+ for each record inserted in the Unique tree
+ */
+ (*total_length)+= Unique::size_of_length_field + size_of_packable_fields;
+ return true;
+}
+
+
void Item_func_group_concat::print(String *str, enum_query_type query_type)
{
str->append(func_name());
diff --git a/sql/item_sum.h b/sql/item_sum.h
index dc520ce2578..39d5b79c1c4 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -590,6 +590,7 @@ public:
bool with_sum_func() const { return true; }
virtual void set_partition_row_count(ulonglong count) { DBUG_ASSERT(0); }
+ bool is_packing_allowed(TABLE* table, uint* total_length);
};
@@ -696,7 +697,10 @@ public:
bool unique_walk_function(void *element);
bool unique_walk_function_for_count(void *element);
+ bool is_distinct_packed();
static int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
+ static int composite_packed_key_cmp(void* arg, uchar* key1, uchar* key2);
+ uchar* make_packed_record(uchar *to);
};
@@ -1920,6 +1924,9 @@ protected:
friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
const void* key1,
const void* key2);
+ friend int group_concat_packed_key_cmp_with_distinct(void *arg,
+ const void *key1,
+ const void *key2);
friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
const void* key2);
friend int group_concat_key_cmp_with_order_with_nulls(void *arg,
@@ -1941,6 +1948,9 @@ protected:
virtual String *get_str_from_field(Item *i, Field *f, String *tmp,
const uchar *key, size_t offset)
{ return f->val_str(tmp, key + offset); }
+ virtual String *get_str_from_field(Item *i, Field *f, String *tmp)
+ { return f->val_str(tmp); }
+
virtual void cut_max_length(String *result,
uint old_length, uint max_length) const;
public:
@@ -2015,11 +2025,12 @@ public:
{ context= (Name_resolution_context *)cntx; return FALSE; }
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_group_concat>(thd, this); }
- qsort_cmp2 get_comparator_function_for_distinct();
+ qsort_cmp2 get_comparator_function_for_distinct(bool packed);
qsort_cmp2 get_comparator_function_for_order_by();
uchar* get_record_pointer();
uint get_null_bytes();
-
+ bool is_distinct_packed();
+ bool is_packing_allowed(uint* total_length);
};
#endif /* ITEM_SUM_INCLUDED */
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 80d67d884bf..7d65c85129b 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -11772,7 +11772,7 @@ int read_keys_and_merge_scans(THD *thd,
}
DBUG_ASSERT(file->ref_length == unique->get_size());
- DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
+ DBUG_ASSERT(thd->variables.sortbuff_size <= unique->get_max_in_memory_size());
for (;;)
{
@@ -11815,7 +11815,7 @@ int read_keys_and_merge_scans(THD *thd,
continue;
cur_quick->file->position(cur_quick->record);
- if (unique->unique_add((char*)cur_quick->file->ref))
+ if (unique->unique_add((char*)cur_quick->file->ref, unique->get_size()))
goto err;
}
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 58a786ed33c..c158d7c5a14 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -6454,6 +6454,7 @@ struct SORT_FIELD: public SORT_FIELD_ATTR
Field *field; /* Field to sort */
Item *item; /* Item if not sorting fields */
bool reverse; /* if descending sort */
+ void setup(Field *fld, bool exclude_nulls, bool with_suffix);
};
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 5942efcd601..db5b5b777a8 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -714,7 +714,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
{
table->file->position(table->record[0]);
if (unlikely((error=
- deltempfile->unique_add((char*) table->file->ref))))
+ deltempfile->unique_add((char*) table->file->ref,
+ deltempfile->get_size()))))
{
error= 1;
goto terminate_delete;
@@ -1350,7 +1351,8 @@ int multi_delete::send_data(List<Item> &values)
}
else
{
- error=tempfiles[secure_counter]->unique_add((char*) table->file->ref);
+ uint size= tempfiles[secure_counter]->get_size();
+ error=tempfiles[secure_counter]->unique_add((char*) table->file->ref, size);
if (unlikely(error))
{
error= 1; // Fatal error
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 40f0c5ede5f..d512a9727b6 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -310,6 +310,8 @@ public:
sort_length+= len;
}
+ int compare_keys(uchar *a, uchar *b);
+
static const uint size_of_length_field= 4;
private:
@@ -567,8 +569,8 @@ public:
bool using_packed_sortkeys() const
{
- DBUG_ASSERT(m_using_packed_sortkeys ==
- (sort_keys != NULL && sort_keys->using_packed_sortkeys()));
+ DBUG_ASSERT(sort_keys == NULL ||
+ (m_using_packed_sortkeys == sort_keys->using_packed_sortkeys()));
return m_using_packed_sortkeys;
}
@@ -578,6 +580,11 @@ public:
return addon_fields != NULL;
}
+ void set_using_packed_keys(bool val)
+ {
+ m_using_packed_sortkeys= val;
+ }
+
uint32 get_result_length(uchar *plen)
{
if (!m_using_packed_addons)
@@ -659,6 +666,12 @@ public:
{
return m_packed_format;
}
+ void set_packed_format(bool val)
+ {
+ m_packed_format= val;
+ }
+
+ uint32 get_record_length_for_unique(uchar *to, uint size_of_dupl_count);
private:
uint m_packable_length;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 879955af723..bf6cebbf7e8 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -1570,7 +1570,16 @@ public:
return 0;
if (count > bucket_capacity * (curr_bucket + 1))
{
- column->store_field_value((uchar *) elem, col_length);
+ uchar *to= (uchar* )elem;
+ if (column->is_packable())
+ {
+ column->unpack(column->ptr,
+ to + Unique::size_of_length_field,
+ to + Unique::read_packed_length(to), 0);
+ }
+ else
+ column->store_field_value(to, col_length);
+
histogram->set_value(curr_bucket,
column->pos_in_interval(min_value, max_value));
curr_bucket++;
@@ -1646,13 +1655,11 @@ public:
of the parameters to be passed to the constructor of the Unique object.
*/
- Count_distinct_field(Field *field, size_t max_heap_table_size)
+ Count_distinct_field(Field *field)
{
table_field= field;
- tree_key_length= field->pack_length();
-
- tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field,
- tree_key_length, max_heap_table_size, 1);
+ tree_key_length= 0;
+ tree= NULL;
}
virtual ~Count_distinct_field()
@@ -1670,6 +1677,48 @@ public:
return (tree != NULL);
}
+
+ /*
+ @brief
+ Create and setup the Unique object for the column
+
+ @param
+ thd Thread structure
+ max_heap_table_size max allowed size of the unique tree
+ */
+ bool setup(THD *thd, size_t max_heap_table_size)
+ {
+ if (table_field->is_packable())
+ {
+ tree_key_length= table_field->max_packed_col_length(table_field->pack_length());
+
+ tree_key_length+= Unique::size_of_length_field;
+ tree= new Unique((qsort_cmp2) simple_packed_str_key_cmp, (void*) this,
+ tree_key_length, max_heap_table_size, 1, TRUE);
+ }
+ else
+ {
+ if (table_field->type() == MYSQL_TYPE_BIT)
+ {
+ tree_key_length= sizeof(ulonglong);
+ tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
+ (void*) &tree_key_length,
+ tree_key_length, max_heap_table_size, 1, FALSE);
+ }
+ else
+ {
+ tree_key_length= table_field->pack_length();
+ tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) table_field,
+ tree_key_length, max_heap_table_size, 1, FALSE);
+ }
+ }
+ if (!tree)
+ return true; // OOM
+
+ return tree->setup(thd, table_field);
+ }
+
+
/*
@brief
Add the value of 'field' to the container of the Unique object 'tree'
@@ -1677,7 +1726,20 @@ public:
virtual bool add()
{
table_field->mark_unused_memory_as_defined();
- return tree->unique_add(table_field->ptr);
+ uchar *orig_to= table_field->ptr;
+ DBUG_ASSERT(tree);
+
+ uint length= tree->get_size();
+ if (tree->is_packed())
+ {
+ uchar *to;
+ orig_to= to= tree->get_packed_rec_ptr();
+ to+= Unique::size_of_length_field;
+ to+= table_field->make_packed_record_field(to);
+ length= static_cast<uint>(to - orig_to);
+ Unique::store_packed_length(orig_to, length);
+ }
+ return tree->unique_add(orig_to, length);
}
/*
@@ -1733,16 +1795,31 @@ public:
return table_field->collected_stats->histogram.get_values();
}
+ static int simple_packed_str_key_cmp(void* arg, uchar* key1, uchar* key2);
+ static int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2);
+
};
-static
-int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2)
+int Count_distinct_field::simple_ulonglong_key_cmp(void* arg,
+ uchar* key1, uchar* key2)
{
ulonglong *val1= (ulonglong *) key1;
ulonglong *val2= (ulonglong *) key2;
return *val1 > *val2 ? 1 : *val1 == *val2 ? 0 : -1;
}
+
+
+/*
+ @brief
+ Compare function for packed keys
+*/
+int Count_distinct_field::simple_packed_str_key_cmp(void* arg,
+ uchar* key1, uchar* key2)
+{
+ Count_distinct_field *compare_arg= (Count_distinct_field*)arg;
+ return compare_arg->tree->compare_packed_keys(key1, key2);
+}
/*
@@ -1755,20 +1832,12 @@ class Count_distinct_field_bit: public Count_distinct_field
{
public:
- Count_distinct_field_bit(Field *field, size_t max_heap_table_size)
- {
- table_field= field;
- tree_key_length= sizeof(ulonglong);
-
- tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
- (void*) &tree_key_length,
- tree_key_length, max_heap_table_size, 1);
- }
+ Count_distinct_field_bit(Field *field): Count_distinct_field(field){}
bool add()
{
longlong val= table_field->val_int();
- return tree->unique_add(&val);
+ return tree->unique_add(&val, tree->get_size());
}
};
@@ -2344,8 +2413,11 @@ void Column_statistics_collected::init(THD *thd, Field *table_field)
{
count_distinct=
table_field->type() == MYSQL_TYPE_BIT ?
- new Count_distinct_field_bit(table_field, max_heap_table_size) :
- new Count_distinct_field(table_field, max_heap_table_size);
+ new Count_distinct_field_bit(table_field) :
+ new Count_distinct_field(table_field);
+
+ if (count_distinct && count_distinct->setup(thd, max_heap_table_size))
+ count_distinct= NULL;
}
if (count_distinct && !count_distinct->exists())
count_distinct= NULL;
diff --git a/sql/uniques.cc b/sql/uniques.cc
index a0cebe3e4dd..d8772f366fb 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -48,12 +48,14 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique)
when tree implementation chooses to store pointer to key in TREE_ELEMENT
(instead of storing the element itself there)
*/
- return my_b_write(&unique->file, key, unique->size) ? 1 : 0;
+ return (unique->is_packed() ?
+ my_b_write(&unique->file, key, Unique::read_packed_length(key)) :
+ my_b_write(&unique->file, key, unique->size)) ? 1 : 0;
}
int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique)
{
- return my_b_write(&unique->file, key, unique->size) ||
+ return unique_write_to_file(key, count, unique) ||
my_b_write(&unique->file, (uchar*)&count, sizeof(element_count)) ? 1 : 0;
}
@@ -79,10 +81,14 @@ int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *uniq
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
uint size_arg, size_t max_in_memory_size_arg,
- uint min_dupl_count_arg)
+ uint min_dupl_count_arg, bool packed_arg)
:max_in_memory_size(max_in_memory_size_arg),
size(size_arg),
+ packed(packed_arg),
+ memory_used(0), packed_rec_ptr(NULL),
+ sortorder(NULL), sort_keys(NULL),
elements(0)
+
{
my_b_clear(&file);
min_dupl_count= min_dupl_count_arg;
@@ -90,18 +96,32 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
if (min_dupl_count_arg)
full_size+= sizeof(element_count);
with_counters= MY_TEST(min_dupl_count_arg);
- init_tree(&tree, (max_in_memory_size / 16), 0, size, comp_func,
+ init_tree(&tree, (max_in_memory_size / 16), 0, 0, comp_func,
NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC));
/* If the following fail's the next add will also fail */
my_init_dynamic_array(PSI_INSTRUMENT_ME, &file_ptrs, sizeof(Merge_chunk), 16,
16, MYF(MY_THREAD_SPECIFIC));
+
+ if (packed)
+ {
+ packed_rec_ptr= (uchar *)my_malloc(PSI_INSTRUMENT_ME,
+ size,
+ MYF(MY_WME | MY_THREAD_SPECIFIC));
+ }
/*
If you change the following, change it in get_max_elements function, too.
*/
max_elements= (ulong) (max_in_memory_size /
ALIGN_SIZE(sizeof(TREE_ELEMENT)+size));
if (!max_elements)
+ {
max_elements= 1;
+ /*
+ Need to ensure that we have memory to store atleast one record
+ in the Unique tree
+ */
+ max_in_memory_size= sizeof(TREE_ELEMENT) + size;
+ }
(void) open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
MYF(MY_WME));
@@ -371,6 +391,7 @@ Unique::~Unique()
close_cached_file(&file);
delete_tree(&tree, 0);
delete_dynamic(&file_ptrs);
+ my_free(packed_rec_ptr);
}
@@ -389,6 +410,10 @@ bool Unique::flush()
(void*) this, left_root_right) ||
insert_dynamic(&file_ptrs, (uchar*) &file_ptr))
return 1;
+ /**
+ the tree gets reset so make sure the memory used is reset too
+ */
+ memory_used= 0;
delete_tree(&tree, 0);
return 0;
}
@@ -495,7 +520,8 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
uint key_length, Merge_chunk *begin, Merge_chunk *end,
tree_walk_action walk_action, void *walk_action_arg,
qsort_cmp2 compare, void *compare_arg,
- IO_CACHE *file, bool with_counters)
+ IO_CACHE *file, bool with_counters,
+ uint min_dupl_count, bool packed)
{
BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
QUEUE queue;
@@ -521,7 +547,12 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
// read_to_buffer() needs only rec_length.
Sort_param sort_param;
sort_param.rec_length= key_length;
+ sort_param.sort_length= key_length;
+ sort_param.min_dupl_count= min_dupl_count;
+ DBUG_ASSERT(sort_param.res_length == 0);
DBUG_ASSERT(!sort_param.using_addon_fields());
+ sort_param.set_using_packed_keys(packed);
+ uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0;
/*
Invariant: queue must contain top element from each tree, until a tree
@@ -534,7 +565,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
top->set_buffer(merge_buffer + (top - begin) * piece_size,
merge_buffer + (top - begin) * piece_size + piece_size);
top->set_max_keys(max_key_count_per_piece);
- bytes_read= read_to_buffer(file, top, &sort_param, false);
+ bytes_read= read_to_buffer(file, top, &sort_param, packed);
if (unlikely(bytes_read == (ulong) -1))
goto end;
DBUG_ASSERT(bytes_read);
@@ -555,6 +586,10 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
read next key from the cache or from the file and push it to the
queue; this gives new top.
*/
+ key_length= sort_param.get_record_length_for_unique((uchar*)old_key,
+ size_of_dupl_count);
+
+ cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
top->advance_current_key(key_length);
top->decrement_mem_count();
if (top->mem_count())
@@ -564,7 +599,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
/* save old_key not to overwrite it in read_to_buffer */
memcpy(save_key_buff, old_key, key_length);
old_key= save_key_buff;
- bytes_read= read_to_buffer(file, top, &sort_param, false);
+ bytes_read= read_to_buffer(file, top, &sort_param, packed);
if (unlikely(bytes_read == (ulong) -1))
goto end;
else if (bytes_read) /* top->key, top->mem_count are reset */
@@ -604,15 +639,18 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
{
do
{
-
+ key_length= sort_param.get_record_length_for_unique(top->current_key(),
+ size_of_dupl_count);
+ cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
cnt= with_counters ?
get_counter_from_merged_element(top->current_key(), cnt_ofs) : 1;
if (walk_action(top->current_key(), cnt, walk_action_arg))
goto end;
+
top->advance_current_key(key_length);
}
while (top->decrement_mem_count());
- bytes_read= read_to_buffer(file, top, &sort_param, false);
+ bytes_read= read_to_buffer(file, top, &sort_param, packed);
if (unlikely(bytes_read == (ulong) -1))
goto end;
}
@@ -678,7 +716,8 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
(Merge_chunk *) file_ptrs.buffer,
(Merge_chunk *) file_ptrs.buffer + file_ptrs.elements,
action, walk_action_arg,
- tree.compare, tree.custom_arg, &file, with_counters);
+ tree.compare, tree.custom_arg, &file, with_counters,
+ min_dupl_count, is_packed());
}
my_free(merge_buffer);
return res;
@@ -730,9 +769,11 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size,
sort_param.max_keys_per_buffer=
(uint) MY_MAX((max_in_memory_size / sort_param.sort_length), MERGEBUFF2);
sort_param.not_killable= 1;
+ sort_param.set_using_packed_keys(is_packed());
+ sort_param.set_packed_format(is_packed());
sort_param.unique_buff= buff +(sort_param.max_keys_per_buffer *
- sort_param.sort_length);
+ sort_param.sort_length);
sort_param.compare= (qsort2_cmp) buffpek_compare;
sort_param.cmp_context.key_compare= tree.compare;
@@ -760,7 +801,8 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size,
file_ptrs.elements= maxbuffer+1;
return 0;
}
- if (merge_index(&sort_param, Bounds_checked_array<uchar>(buff, buff_size),
+ if (merge_index(&sort_param,
+ Bounds_checked_array<uchar>(buff, buff_size),
file_ptr, maxbuffer, &file, outfile))
goto err;
error= 0;
@@ -789,6 +831,8 @@ bool Unique::get(TABLE *table)
sort.return_rows= elements+tree.elements_in_tree;
DBUG_ENTER("Unique::get");
+ DBUG_ASSERT(packed == FALSE);
+
if (my_b_tell(&file) == 0)
{
/* Whole tree is in memory; Don't use disk if you don't need to */
@@ -831,3 +875,114 @@ err:
my_free(sort_buffer);
DBUG_RETURN(rc);
}
+
+
+/*
+ @brief
+ Setup the structures that are used when Unique stores packed values
+
+ @param thd thread structure
+ @param item item of aggregate function
+ @param non_const_args number of non constant arguments
+ @param arg_count total number of arguments
+ @param exclude_nulls TRUE: NULLS should be excluded
+ FALSE: otherwise
+
+ @note
+ This implementation is used by GROUP_CONCAT as it can have more than
+ one arguments in the argument list.
+
+ @retval
+ TRUE error
+ FALSE setup successful
+*/
+
+bool
+Unique::setup(THD *thd, Item_sum *item, uint non_const_args,
+ uint arg_count, bool exclude_nulls)
+{
+ if (!packed) // no packing so don't create the sortorder list
+ return false;
+ SORT_FIELD *sort,*pos;
+ if (sortorder)
+ return false;
+ DBUG_ASSERT(sort_keys == NULL);
+ sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * non_const_args);
+ pos= sort= sortorder;
+ if (!pos)
+ return true;
+ sort_keys= new Sort_keys(sortorder, non_const_args);
+ if (!sort_keys)
+ return true;
+ sort=pos= sortorder;
+ for (uint i= 0; i < arg_count; i++)
+ {
+ Item *arg= item->get_arg(i);
+ if (arg->const_item())
+ continue;
+ Field *field= arg->get_tmp_table_field();
+ if (!field)
+ continue;
+
+ pos->setup(field, exclude_nulls, false);
+ pos++;
+ }
+ return false;
+}
+
+
+/*
+ @brief
+ Setup the structures that are used when Unique stores packed values
+
+ @param thd thread structure
+ @param field field structure
+
+ @retval
+ TRUE error
+ FALSE setup successful
+*/
+
+bool Unique::setup(THD *thd, Field *field)
+{
+ if (!packed) // no packing so don't create the sortorder list
+ return false;
+
+ SORT_FIELD *sort,*pos;
+ if (sortorder)
+ return false;
+
+ DBUG_ASSERT(sort_keys == NULL);
+ sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD));
+ pos= sort= sortorder;
+ if (!pos)
+ return true;
+ sort_keys= new Sort_keys(sortorder, 1);
+ if (!sort_keys)
+ return true;
+ sort=pos= sortorder;
+ pos->setup(field, true, false); // Nulls are always excluded
+
+ return false;
+}
+
+
+/*
+ @brief
+ Compare two packed keys inside the Unique tree
+
+ @param a_ptr packed sort key
+ @param b_ptr packed sort key
+
+ @retval
+ >0 key a_ptr greater than b_ptr
+ =0 key a_ptr equal to b_ptr
+ <0 key a_ptr less than b_ptr
+
+*/
+
+int Unique::compare_packed_keys(uchar *a_ptr, uchar *b_ptr)
+{
+ return sort_keys->compare_keys(a_ptr + Unique::size_of_length_field,
+ b_ptr + Unique::size_of_length_field);
+}
diff --git a/sql/uniques.h b/sql/uniques.h
index 7e12a391fbd..1ade09408bd 100644
--- a/sql/uniques.h
+++ b/sql/uniques.h
@@ -44,26 +44,86 @@ class Unique :public Sql_alloc
it to be written to record_pointers.
always 0 for unions, > 0 for intersections */
bool with_counters;
+ /*
+ TRUE : Value stored is of variable size
+ FALSE : Value stored is of fixed size
+ */
+ bool packed;
+ /*
+ size in bytes used for storing keys in the Unique tree
+ */
+ size_t memory_used;
+ /*
+ Packed record ptr for a record of the table, the packed value in this
+ record is added to the unique tree
+ */
+ uchar* packed_rec_ptr;
+
+ /*
+ Array of SORT_FIELD structure storing the information about the key parts
+ in the sort key of the Unique tree
+ @see Unique::setup()
+ */
+ SORT_FIELD *sortorder;
+
+ /*
+ Structure storing information about usage of keys
+ */
+ Sort_keys *sort_keys;
bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge);
bool flush();
+ // return the amount of unused memory in the Unique tree
+ size_t space_left()
+ {
+ DBUG_ASSERT(max_in_memory_size >= memory_used);
+ return max_in_memory_size - memory_used;
+ }
+
+ // Check if the Unique tree is full or not
+ bool is_full(size_t record_size)
+ {
+ if (!tree.elements_in_tree) // Atleast insert one element in the tree
+ return false;
+ return record_size > space_left();
+ }
+
public:
ulong elements;
SORT_INFO sort;
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
- uint size_arg, size_t max_in_memory_size_arg,
- uint min_dupl_count_arg= 0);
+ uint size_arg, size_t max_in_memory_size_arg,
+ uint min_dupl_count_arg= 0, bool packed_arg= false);
~Unique();
ulong elements_in_tree() { return tree.elements_in_tree; }
- inline bool unique_add(void *ptr)
+
+ /*
+ TODO varun:
+ remove the unique_add function above and pass the size as a parameter.
+ So for fixed types we would pass the fixed size as argument.
+ */
+
+ inline bool unique_add(void *ptr, uint size_arg)
{
DBUG_ENTER("unique_add");
DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
- if (!(tree.flag & TREE_ONLY_DUPS) &&
- tree.elements_in_tree >= max_elements && flush())
+ TREE_ELEMENT *res;
+ size_t rec_size= size_arg + sizeof(TREE_ELEMENT) + tree.size_of_element;
+
+ if (!(tree.flag & TREE_ONLY_DUPS) && is_full(rec_size) && flush())
DBUG_RETURN(1);
- DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
+ uint count= tree.elements_in_tree;
+ res= tree_insert(&tree, ptr, size_arg, tree.custom_arg);
+ if (tree.elements_in_tree != count)
+ {
+ /*
+ increment memory used only when a unique element is inserted
+ in the tree
+ */
+ memory_used+= rec_size;
+ }
+ DBUG_RETURN(!res);
}
bool is_in_memory() { return (my_b_tell(&file) == 0); }
@@ -96,7 +156,42 @@ public:
bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
uint get_size() const { return size; }
+ uint get_full_size() const { return full_size; }
size_t get_max_in_memory_size() const { return max_in_memory_size; }
+ uchar *get_packed_rec_ptr() { return packed_rec_ptr; }
+ Sort_keys *get_keys() { return sort_keys; }
+ SORT_FIELD *get_sortorder() { return sortorder; }
+ bool is_count_stored() { return with_counters; }
+
+ // returns TRUE if the unique tree stores packed values
+ bool is_packed() { return packed; }
+
+ static void store_packed_length(uchar *p, uint sz)
+ {
+ int4store(p, sz - size_of_length_field);
+ }
+
+ // returns the length of the key along with the length bytes for the key
+ static uint read_packed_length(uchar *p)
+ {
+ return size_of_length_field + uint4korr(p);
+ }
+
+ bool setup(THD *thd, Item_sum *item, uint non_const_args,
+ uint arg_count, bool exclude_nulls);
+ bool setup(THD *thd, Field *field);
+ int compare_packed_keys(uchar *a, uchar *b);
+
+ /*
+ TODO varun:
+ Currently the size_of_length_field is set to 4 but we can also use 2 here,
+ as fields are always inserted in the unique tree either from the base table
+ or from the temp table.
+ This does not look possible as the merge process
+ in merge buffers expect the length of dynmice sort keys to be stored in
+ 4 bytes, maybe this needs to be discussed
+ */
+ static const uint size_of_length_field= 4;
friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);