diff options
author | Varun Gupta <varun.gupta@mariadb.com> | 2020-03-28 12:31:22 +0530 |
---|---|---|
committer | Varun Gupta <varun.gupta@mariadb.com> | 2020-12-04 23:38:09 +0530 |
commit | 202393086e4da6c6864778dcb87339f159ea48f6 (patch) | |
tree | 203cb33f02b64b59de11f86c56ba423f352ba0d2 | |
parent | e9f33b7760539e2ba60fb236fab8eaf0ce53ca4a (diff) | |
download | mariadb-git-bb-10.6-mdev21829.tar.gz |
MDEV-21829: Use packed sort keys in Unique objectsbb-10.6-mdev21829
The task deals with packing the values stored in the Unique tree for each record.
The changes brought by this feature is:
1) Unique tree can have dynamic length keys
2) Format of keys looks like
<key_length> <packed_value1> <packed_value2> ....... <packed_valueN>
Unique class is currently used in
1) agg_func(DISTINCT col)
Here most aggregate functions like SUM, AVG accept only fixed size arguments
so it is not beneficial to use packing for these. Packing is done for
COUNT and GROUP_CONCAT (or JSON_ARRAYAGG) aggregate function as these are meaningful
2) index-merge stores row-ids
index merge stores row-ids which are of fixed size, so packing is not required
3) Engine Independent Table statistics
Packing is done here for variable length data types
This task is an extension to MDEV-21580.
-rw-r--r-- | mysql-test/include/unique_packed_eits.inc | 63 | ||||
-rw-r--r-- | mysql-test/main/unique_packed.result | 613 | ||||
-rw-r--r-- | mysql-test/main/unique_packed.test | 264 | ||||
-rw-r--r-- | plugin/type_inet/sql_type_inet.cc | 2 | ||||
-rw-r--r-- | plugin/type_inet/sql_type_inet.h | 2 | ||||
-rw-r--r-- | sql/field.cc | 41 | ||||
-rw-r--r-- | sql/field.h | 18 | ||||
-rw-r--r-- | sql/filesort.cc | 441 | ||||
-rw-r--r-- | sql/item_jsonfunc.cc | 82 | ||||
-rw-r--r-- | sql/item_jsonfunc.h | 1 | ||||
-rw-r--r-- | sql/item_sum.cc | 697 | ||||
-rw-r--r-- | sql/item_sum.h | 51 | ||||
-rw-r--r-- | sql/opt_range.cc | 36 | ||||
-rw-r--r-- | sql/opt_range.h | 4 | ||||
-rw-r--r-- | sql/sql_class.h | 11 | ||||
-rw-r--r-- | sql/sql_delete.cc | 53 | ||||
-rw-r--r-- | sql/sql_select.h | 2 | ||||
-rw-r--r-- | sql/sql_sort.h | 17 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 202 | ||||
-rw-r--r-- | sql/sql_statistics.h | 2 | ||||
-rw-r--r-- | sql/sql_type.h | 22 | ||||
-rw-r--r-- | sql/uniques.cc | 598 | ||||
-rw-r--r-- | sql/uniques.h | 456 |
23 files changed, 3326 insertions, 352 deletions
diff --git a/mysql-test/include/unique_packed_eits.inc b/mysql-test/include/unique_packed_eits.inc new file mode 100644 index 00000000000..ebcf3b304a1 --- /dev/null +++ b/mysql-test/include/unique_packed_eits.inc @@ -0,0 +1,63 @@ +--echo # +--echo # Tests below show the avg_frequency calculated during EITS collection and +--echo # we can compare that value by calculating distinct values for the same columns +--echo # + + +--echo # +--echo # Running ANALYZE for customer table +--echo # + +ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES (); + +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey'); + +SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone) +FROM customer; + +--echo # +--echo # Running ANALYZE for lineitem table +--echo # + +ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES (); +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct'); + +SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem; + +--echo # +--echo # Running ANALYZE for nation table +--echo # + +ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES (); +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey'); + +SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation; + +--echo # +--echo # Running ANALYZE for part table +--echo # + +ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES (); +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr'); + +SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part; + +--echo # +--echo # Running ANALYZE for supplier table +--echo # + +ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES (); +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone'); + +SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone) +FROM supplier; diff --git a/mysql-test/main/unique_packed.result b/mysql-test/main/unique_packed.result new file mode 100644 index 00000000000..54cce0b6c5b --- /dev/null +++ b/mysql-test/main/unique_packed.result @@ -0,0 +1,613 @@ +create database dbt3; +use dbt3; +SET @save_tmp_table_size= @@tmp_table_size; +CREATE VIEW v1 AS SELECT DISTINCT p_mfgr, p_brand FROM part; +# +# The two queries should return the same result +# +SELECT GROUP_CONCAT(p_brand order by p_brand) FROM v1 GROUP BY p_mfgr; +GROUP_CONCAT(p_brand order by p_brand) +Brand#11,Brand#12,Brand#13,Brand#14,Brand#15 +Brand#21,Brand#22,Brand#23,Brand#24,Brand#25 +Brand#31,Brand#32,Brand#33,Brand#34,Brand#35 +Brand#41,Brand#42,Brand#43,Brand#44,Brand#45 +Brand#51,Brand#52,Brand#53,Brand#54,Brand#55 +SELECT GROUP_CONCAT(DISTINCT p_brand) FROM part GROUP BY p_mfgr; +GROUP_CONCAT(DISTINCT p_brand) +Brand#11,Brand#12,Brand#13,Brand#14,Brand#15 +Brand#21,Brand#22,Brand#23,Brand#24,Brand#25 +Brand#31,Brand#32,Brand#33,Brand#34,Brand#35 +Brand#41,Brand#42,Brand#43,Brand#44,Brand#45 +Brand#51,Brand#52,Brand#53,Brand#54,Brand#55 +# +# The two queries should return the same result +# +SELECT COUNT(*) FROM v1 GROUP BY p_mfgr; +COUNT(*) +5 +5 +5 +5 +5 +SELECT COUNT(DISTINCT p_brand) FROM part GROUP BY p_mfgr; +COUNT(DISTINCT p_brand) +5 +5 +5 +5 +5 +DROP VIEW v1; +CREATE VIEW v1 AS +SELECT DISTINCT p_mfgr, p_name FROM lineitem l, part p +WHERE l.l_partkey= p.p_partkey; +SELECT p_mfgr, COUNT(*) FROM v1 GROUP BY p_mfgr; +p_mfgr COUNT(*) +Manufacturer#1 40 +Manufacturer#2 36 +Manufacturer#3 45 +Manufacturer#4 37 +Manufacturer#5 42 +# +# The two queries should return the same result as the above one +# +SELECT p_mfgr, COUNT(DISTINCT p_name) FROM lineitem l, part p +WHERE l.l_partkey= p.p_partkey +GROUP BY p.p_mfgr; +p_mfgr COUNT(DISTINCT p_name) +Manufacturer#1 40 +Manufacturer#2 36 +Manufacturer#3 45 +Manufacturer#4 37 +Manufacturer#5 42 +SET tmp_table_size=1024; +# +# merge_walk() get called, that is merging of sorted chunks of file happens +# +SELECT p_mfgr, COUNT(DISTINCT p_name) FROM lineitem l, part p +WHERE l.l_partkey= p.p_partkey +GROUP BY p.p_mfgr; +p_mfgr COUNT(DISTINCT p_name) +Manufacturer#1 40 +Manufacturer#2 36 +Manufacturer#3 45 +Manufacturer#4 37 +Manufacturer#5 42 +SET tmp_table_size = @save_tmp_table_size; +DROP VIEW v1; +# The result should be 200 +SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p; +COUNT(DISTINCT p_name) +200 +# +# merge_many_buffers get called, followed by merge_walk() +# +SET tmp_table_size=1024; +SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p; +COUNT(DISTINCT p_name) +200 +SET tmp_table_size = @save_tmp_table_size; +CREATE VIEW v1 AS SELECT DISTINCT n_name, c_name FROM customer, orders, nation +WHERE o_custkey= c_custkey and c_nationkey= n_nationkey; +# +# These 2 queries should return the same result +# +SELECT n_name, GROUP_CONCAT(c_name ORDER BY c_name), COUNT(c_name) FROM v1 GROUP BY n_name; +n_name GROUP_CONCAT(c_name ORDER BY c_name) COUNT(c_name) +ALGERIA Customer#000000029,Customer#000000073,Customer#000000076,Customer#000000080,Customer#000000086 5 +ARGENTINA Customer#000000014,Customer#000000059,Customer#000000106 3 +BRAZIL Customer#000000017,Customer#000000047,Customer#000000092,Customer#000000101 4 +CANADA Customer#000000005,Customer#000000013,Customer#000000022,Customer#000000023,Customer#000000040,Customer#000000064,Customer#000000122,Customer#000000146 8 +CHINA Customer#000000007,Customer#000000019,Customer#000000082,Customer#000000118,Customer#000000124 5 +EGYPT Customer#000000004,Customer#000000074,Customer#000000128,Customer#000000140 4 +ETHIOPIA Customer#000000010,Customer#000000085 2 +FRANCE Customer#000000046,Customer#000000050 2 +GERMANY Customer#000000062,Customer#000000071,Customer#000000119,Customer#000000136 4 +INDIA Customer#000000028,Customer#000000037,Customer#000000091,Customer#000000115 4 +INDONESIA Customer#000000067,Customer#000000094,Customer#000000103,Customer#000000130,Customer#000000139,Customer#000000142 6 +IRAN Customer#000000016,Customer#000000041,Customer#000000049,Customer#000000055,Customer#000000056,Customer#000000104,Customer#000000110 7 +IRAQ Customer#000000052,Customer#000000131,Customer#000000134,Customer#000000148 4 +JAPAN Customer#000000025,Customer#000000038,Customer#000000068,Customer#000000098,Customer#000000113 5 +JORDAN Customer#000000002,Customer#000000058,Customer#000000145 3 +KENYA Customer#000000089 1 +MOROCCO Customer#000000001,Customer#000000032,Customer#000000034,Customer#000000053,Customer#000000079,Customer#000000095,Customer#000000107 7 +MOZAMBIQUE Customer#000000044,Customer#000000088,Customer#000000109,Customer#000000116,Customer#000000137,Customer#000000143 6 +PERU Customer#000000008,Customer#000000035,Customer#000000061,Customer#000000077,Customer#000000097,Customer#000000121,Customer#000000133 7 +ROMANIA Customer#000000043,Customer#000000112,Customer#000000125,Customer#000000149 4 +RUSSIA Customer#000000020,Customer#000000026,Customer#000000070,Customer#000000083 4 +SAUDI ARABIA Customer#000000100 1 +UNITED KINGDOM Customer#000000011,Customer#000000031,Customer#000000065 3 +VIETNAM Customer#000000127 1 +SELECT n_name, GROUP_CONCAT(DISTINCT c_name), COUNT(DISTINCT c_name) +FROM customer, orders, nation +WHERE o_custkey= c_custkey and c_nationkey= n_nationkey GROUP BY n_name; +n_name GROUP_CONCAT(DISTINCT c_name) COUNT(DISTINCT c_name) +ALGERIA Customer#000000029,Customer#000000073,Customer#000000076,Customer#000000080,Customer#000000086 5 +ARGENTINA Customer#000000014,Customer#000000059,Customer#000000106 3 +BRAZIL Customer#000000017,Customer#000000047,Customer#000000092,Customer#000000101 4 +CANADA Customer#000000005,Customer#000000013,Customer#000000022,Customer#000000023,Customer#000000040,Customer#000000064,Customer#000000122,Customer#000000146 8 +CHINA Customer#000000007,Customer#000000019,Customer#000000082,Customer#000000118,Customer#000000124 5 +EGYPT Customer#000000004,Customer#000000074,Customer#000000128,Customer#000000140 4 +ETHIOPIA Customer#000000010,Customer#000000085 2 +FRANCE Customer#000000046,Customer#000000050 2 +GERMANY Customer#000000062,Customer#000000071,Customer#000000119,Customer#000000136 4 +INDIA Customer#000000028,Customer#000000037,Customer#000000091,Customer#000000115 4 +INDONESIA Customer#000000067,Customer#000000094,Customer#000000103,Customer#000000130,Customer#000000139,Customer#000000142 6 +IRAN Customer#000000016,Customer#000000041,Customer#000000049,Customer#000000055,Customer#000000056,Customer#000000104,Customer#000000110 7 +IRAQ Customer#000000052,Customer#000000131,Customer#000000134,Customer#000000148 4 +JAPAN Customer#000000025,Customer#000000038,Customer#000000068,Customer#000000098,Customer#000000113 5 +JORDAN Customer#000000002,Customer#000000058,Customer#000000145 3 +KENYA Customer#000000089 1 +MOROCCO Customer#000000001,Customer#000000032,Customer#000000034,Customer#000000053,Customer#000000079,Customer#000000095,Customer#000000107 7 +MOZAMBIQUE Customer#000000044,Customer#000000088,Customer#000000109,Customer#000000116,Customer#000000137,Customer#000000143 6 +PERU Customer#000000008,Customer#000000035,Customer#000000061,Customer#000000077,Customer#000000097,Customer#000000121,Customer#000000133 7 +ROMANIA Customer#000000043,Customer#000000112,Customer#000000125,Customer#000000149 4 +RUSSIA Customer#000000020,Customer#000000026,Customer#000000070,Customer#000000083 4 +SAUDI ARABIA Customer#000000100 1 +UNITED KINGDOM Customer#000000011,Customer#000000031,Customer#000000065 3 +VIETNAM Customer#000000127 1 +DROP VIEW v1; +# +# Tests for packed unique during EITS collection +# +SET @save_histogram_size= @@histogram_size; +SET histogram_size=0; +# +# Testing when histograms are not created, this tests the function count_distinct_single_occurence_walk +# passed as a callback function to walk +# +# +# Tests below show the avg_frequency calculated during EITS collection and +# we can compare that value by calculating distinct values for the same columns +# +# +# Running ANALYZE for customer table +# +ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES (); +Table Op Msg_type Msg_text +dbt3.customer analyze status Engine-independent statistics collected +dbt3.customer analyze status OK +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey'); +column_name avg_frequency +c_name 1.0000 +c_nationkey 6.0000 +c_phone 1.0000 +SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone) +FROM customer; +COUNT(*) COUNT(DISTINCT c_name) COUNT(DISTINCT c_nationkey) COUNT(DISTINCT c_phone) +150 150 25 150 +# +# Running ANALYZE for lineitem table +# +ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES (); +Table Op Msg_type Msg_text +dbt3.lineitem analyze status Engine-independent statistics collected +dbt3.lineitem analyze status OK +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct'); +column_name avg_frequency +l_shipDATE 2.6500 +l_shipinstruct 1501.2500 +SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem; +COUNT(*) COUNT(DISTINCT l_shipDATE) COUNT(DISTINCT l_shipinstruct) +6005 2266 4 +# +# Running ANALYZE for nation table +# +ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES (); +Table Op Msg_type Msg_text +dbt3.nation analyze status Engine-independent statistics collected +dbt3.nation analyze status OK +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey'); +column_name avg_frequency +n_name 1.0000 +n_regionkey 5.0000 +SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation; +COUNT(*) COUNT(DISTINCT n_name) COUNT(DISTINCT n_regionkey) +25 25 5 +# +# Running ANALYZE for part table +# +ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES (); +Table Op Msg_type Msg_text +dbt3.part analyze status Engine-independent statistics collected +dbt3.part analyze status OK +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr'); +column_name avg_frequency +p_name 1.0000 +p_mfgr 40.0000 +SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part; +COUNT(*) COUNT(DISTINCT p_name) COUNT(DISTINCT p_mfgr) +200 200 5 +# +# Running ANALYZE for supplier table +# +ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES (); +Table Op Msg_type Msg_text +dbt3.supplier analyze status Engine-independent statistics collected +dbt3.supplier analyze status OK +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone'); +column_name avg_frequency +s_name 1.0000 +s_phone 1.0000 +s_acctbal 1.0000 +SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone) +FROM supplier; +COUNT(*) COUNT(DISTINCT s_acctbal) COUNT(DISTINCT s_name) COUNT(DISTINCT s_phone) +10 10 10 10 +# +# Testing when histograms are created,this tests the function histogram_build_walk +# passed as a callback function to walk +# +SET histogram_size=255; +# +# Tests below show the avg_frequency calculated during EITS collection and +# we can compare that value by calculating distinct values for the same columns +# +# +# Running ANALYZE for customer table +# +ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES (); +Table Op Msg_type Msg_text +dbt3.customer analyze status Engine-independent statistics collected +dbt3.customer analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey'); +column_name avg_frequency +c_name 1.0000 +c_nationkey 6.0000 +c_phone 1.0000 +SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone) +FROM customer; +COUNT(*) COUNT(DISTINCT c_name) COUNT(DISTINCT c_nationkey) COUNT(DISTINCT c_phone) +150 150 25 150 +# +# Running ANALYZE for lineitem table +# +ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES (); +Table Op Msg_type Msg_text +dbt3.lineitem analyze status Engine-independent statistics collected +dbt3.lineitem analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct'); +column_name avg_frequency +l_shipDATE 2.6500 +l_shipinstruct 1501.2500 +SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem; +COUNT(*) COUNT(DISTINCT l_shipDATE) COUNT(DISTINCT l_shipinstruct) +6005 2266 4 +# +# Running ANALYZE for nation table +# +ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES (); +Table Op Msg_type Msg_text +dbt3.nation analyze status Engine-independent statistics collected +dbt3.nation analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey'); +column_name avg_frequency +n_name 1.0000 +n_regionkey 5.0000 +SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation; +COUNT(*) COUNT(DISTINCT n_name) COUNT(DISTINCT n_regionkey) +25 25 5 +# +# Running ANALYZE for part table +# +ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES (); +Table Op Msg_type Msg_text +dbt3.part analyze status Engine-independent statistics collected +dbt3.part analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr'); +column_name avg_frequency +p_name 1.0000 +p_mfgr 40.0000 +SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part; +COUNT(*) COUNT(DISTINCT p_name) COUNT(DISTINCT p_mfgr) +200 200 5 +# +# Running ANALYZE for supplier table +# +ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES (); +Table Op Msg_type Msg_text +dbt3.supplier analyze status Engine-independent statistics collected +dbt3.supplier analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone'); +column_name avg_frequency +s_name 1.0000 +s_phone 1.0000 +s_acctbal 1.0000 +SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone) +FROM supplier; +COUNT(*) COUNT(DISTINCT s_acctbal) COUNT(DISTINCT s_name) COUNT(DISTINCT s_phone) +10 10 10 10 +# +# Testing with very small memory for Unique, will do merging +# +SET @save_max_heap_table_size= @@max_heap_table_size; +SET max_heap_table_size=16384; +# +# Tests below show the avg_frequency calculated during EITS collection and +# we can compare that value by calculating distinct values for the same columns +# +# +# Running ANALYZE for customer table +# +ANALYZE TABLE customer PERSISTENT FOR COLUMNS (c_name, c_phone, c_nationkey) INDEXES (); +Table Op Msg_type Msg_text +dbt3.customer analyze status Engine-independent statistics collected +dbt3.customer analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='customer' AND column_name IN ('c_name', 'c_phone', 'c_nationkey'); +column_name avg_frequency +c_name 1.0000 +c_nationkey 6.0000 +c_phone 1.0000 +SELECT COUNT(*), COUNT(DISTINCT c_name), COUNT(DISTINCT c_nationkey), COUNT(DISTINCT c_phone) +FROM customer; +COUNT(*) COUNT(DISTINCT c_name) COUNT(DISTINCT c_nationkey) COUNT(DISTINCT c_phone) +150 150 25 150 +# +# Running ANALYZE for lineitem table +# +ANALYZE TABLE lineitem PERSISTENT FOR COLUMNS (l_shipDATE, l_shipinstruct) INDEXES (); +Table Op Msg_type Msg_text +dbt3.lineitem analyze status Engine-independent statistics collected +dbt3.lineitem analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='lineitem' AND column_name IN ('l_shipDATE', 'l_shipinstruct'); +column_name avg_frequency +l_shipDATE 2.6500 +l_shipinstruct 1501.2500 +SELECT COUNT(*), COUNT(DISTINCT l_shipDATE), COUNT(DISTINCT l_shipinstruct) FROM lineitem; +COUNT(*) COUNT(DISTINCT l_shipDATE) COUNT(DISTINCT l_shipinstruct) +6005 2266 4 +# +# Running ANALYZE for nation table +# +ANALYZE TABLE nation PERSISTENT FOR COLUMNS (n_name, n_regionkey) INDEXES (); +Table Op Msg_type Msg_text +dbt3.nation analyze status Engine-independent statistics collected +dbt3.nation analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='nation' AND column_name IN ('n_name', 'n_regionkey'); +column_name avg_frequency +n_name 1.0000 +n_regionkey 5.0000 +SELECT COUNT(*), COUNT(DISTINCT n_name), COUNT(DISTINCT n_regionkey) FROM nation; +COUNT(*) COUNT(DISTINCT n_name) COUNT(DISTINCT n_regionkey) +25 25 5 +# +# Running ANALYZE for part table +# +ANALYZE TABLE part PERSISTENT FOR COLUMNS (p_name, p_mfgr) INDEXES (); +Table Op Msg_type Msg_text +dbt3.part analyze status Engine-independent statistics collected +dbt3.part analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='part' AND column_name IN ('p_name', 'p_mfgr'); +column_name avg_frequency +p_name 1.0000 +p_mfgr 40.0000 +SELECT COUNT(*), COUNT(DISTINCT p_name), COUNT(DISTINCT p_mfgr) FROM part; +COUNT(*) COUNT(DISTINCT p_name) COUNT(DISTINCT p_mfgr) +200 200 5 +# +# Running ANALYZE for supplier table +# +ANALYZE TABLE supplier PERSISTENT FOR COLUMNS (s_acctbal, s_name, s_phone) INDEXES (); +Table Op Msg_type Msg_text +dbt3.supplier analyze status Engine-independent statistics collected +dbt3.supplier analyze status Table is already up to date +SELECT column_name, avg_frequency +FROM mysql.column_stats +WHERE table_name='supplier' AND column_name IN ('s_acctbal', 's_name', 's_phone'); +column_name avg_frequency +s_name 1.0000 +s_phone 1.0000 +s_acctbal 1.0000 +SELECT COUNT(*), COUNT(DISTINCT s_acctbal), COUNT(DISTINCT s_name), COUNT(DISTINCT s_phone) +FROM supplier; +COUNT(*) COUNT(DISTINCT s_acctbal) COUNT(DISTINCT s_name) COUNT(DISTINCT s_phone) +10 10 10 10 +SET max_heap_table_size= @save_max_heap_table_size; +SET histogram_size= @save_histogram_size; +# +# Test with CHAR types +# +CREATE VIEW v1 AS +SELECT DISTINCT c_phone FROM customer, orders WHERE c_custkey=o_custkey; +SELECT COUNT(*) FROM v1; +COUNT(*) +100 +SELECT COUNT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey; +COUNT(DISTINCT c_phone) +100 +SELECT GROUP_CONCAT(c_phone ORDER BY c_phone) FROM v1; +GROUP_CONCAT(c_phone ORDER BY c_phone) +10-267-172-7101,10-349-718-3044,10-473-439-3214,10-677-951-2353,10-773-203-7342,11-355-584-3112,11-751-989-4627,11-845-129-3851,12-427-271-9466,12-446-416-8471,12-514-298-3699,12-970-682-3487,13-312-472-8245,13-558-731-7204,13-652-915-8939,13-702-694-4520,13-750-942-6364,13-761-547-5974,13-806-545-9701,13-835-723-3223,14-128-190-5944,14-199-862-7209,14-273-885-6505,14-280-874-8044,15-741-346-9870,15-745-585-8219,16-357-681-2007,16-658-112-3221,17-361-978-7059,17-501-210-4726,17-697-919-8406,17-710-812-5403,18-239-400-3677,18-385-235-7162,18-774-241-1462,18-971-699-1843,19-140-352-1403,19-190-993-9281,19-216-107-2107,19-403-114-4356,19-407-425-2584,19-953-499-8833,20-180-440-8525,20-781-609-3107,20-893-536-2069,20-895-685-6920,20-908-631-4424,20-917-711-4011,20-966-284-8065,21-186-284-5998,21-200-159-5932,21-562-498-6636,21-840-210-3572,22-302-930-4756,22-306-880-7212,22-603-468-3533,22-885-845-6889,22-918-832-2411,23-244-493-2508,23-562-444-8454,23-768-687-3665,24-394-451-5404,25-147-850-4166,25-168-852-5363,25-336-529-9919,25-344-968-5422,25-430-914-2194,25-923-255-2929,25-989-741-2988,26-190-260-5375,26-314-406-7725,26-516-273-2566,26-632-309-5792,26-777-409-5654,26-992-422-8153,27-147-574-9335,27-269-357-4674,27-408-997-8430,27-411-990-2959,27-566-888-7431,27-588-919-5638,27-626-559-8599,28-159-442-5305,28-183-750-7809,28-190-982-9759,28-396-526-5053,28-639-943-7051,29-233-262-8382,29-261-996-3120,29-316-665-2897,29-797-439-6760,30-749-445-4907,31-101-672-2951,32-363-455-4837,32-817-154-4122,32-828-107-2832,32-957-234-8742,33-197-837-7094,33-464-151-3439,33-733-623-5267 +SELECT GROUP_CONCAT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey; +GROUP_CONCAT(DISTINCT c_phone) +10-267-172-7101,10-349-718-3044,10-473-439-3214,10-677-951-2353,10-773-203-7342,11-355-584-3112,11-751-989-4627,11-845-129-3851,12-427-271-9466,12-446-416-8471,12-514-298-3699,12-970-682-3487,13-312-472-8245,13-558-731-7204,13-652-915-8939,13-702-694-4520,13-750-942-6364,13-761-547-5974,13-806-545-9701,13-835-723-3223,14-128-190-5944,14-199-862-7209,14-273-885-6505,14-280-874-8044,15-741-346-9870,15-745-585-8219,16-357-681-2007,16-658-112-3221,17-361-978-7059,17-501-210-4726,17-697-919-8406,17-710-812-5403,18-239-400-3677,18-385-235-7162,18-774-241-1462,18-971-699-1843,19-140-352-1403,19-190-993-9281,19-216-107-2107,19-403-114-4356,19-407-425-2584,19-953-499-8833,20-180-440-8525,20-781-609-3107,20-893-536-2069,20-895-685-6920,20-908-631-4424,20-917-711-4011,20-966-284-8065,21-186-284-5998,21-200-159-5932,21-562-498-6636,21-840-210-3572,22-302-930-4756,22-306-880-7212,22-603-468-3533,22-885-845-6889,22-918-832-2411,23-244-493-2508,23-562-444-8454,23-768-687-3665,24-394-451-5404,25-147-850-4166,25-168-852-5363,25-336-529-9919,25-344-968-5422,25-430-914-2194,25-923-255-2929,25-989-741-2988,26-190-260-5375,26-314-406-7725,26-516-273-2566,26-632-309-5792,26-777-409-5654,26-992-422-8153,27-147-574-9335,27-269-357-4674,27-408-997-8430,27-411-990-2959,27-566-888-7431,27-588-919-5638,27-626-559-8599,28-159-442-5305,28-183-750-7809,28-190-982-9759,28-396-526-5053,28-639-943-7051,29-233-262-8382,29-261-996-3120,29-316-665-2897,29-797-439-6760,30-749-445-4907,31-101-672-2951,32-363-455-4837,32-817-154-4122,32-828-107-2832,32-957-234-8742,33-197-837-7094,33-464-151-3439,33-733-623-5267 +SELECT JSON_ARRAYAGG(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey; +JSON_ARRAYAGG(DISTINCT c_phone) +["10-267-172-7101","10-349-718-3044","10-473-439-3214","10-677-951-2353","10-773-203-7342","11-355-584-3112","11-751-989-4627","11-845-129-3851","12-427-271-9466","12-446-416-8471","12-514-298-3699","12-970-682-3487","13-312-472-8245","13-558-731-7204","13-652-915-8939","13-702-694-4520","13-750-942-6364","13-761-547-5974","13-806-545-9701","13-835-723-3223","14-128-190-5944","14-199-862-7209","14-273-885-6505","14-280-874-8044","15-741-346-9870","15-745-585-8219","16-357-681-2007","16-658-112-3221","17-361-978-7059","17-501-210-4726","17-697-919-8406","17-710-812-5403","18-239-400-3677","18-385-235-7162","18-774-241-1462","18-971-699-1843","19-140-352-1403","19-190-993-9281","19-216-107-2107","19-403-114-4356","19-407-425-2584","19-953-499-8833","20-180-440-8525","20-781-609-3107","20-893-536-2069","20-895-685-6920","20-908-631-4424","20-917-711-4011","20-966-284-8065","21-186-284-5998","21-200-159-5932","21-562-498-6636","21-840-210-3572","22-302-930-4756","22-306-880-7212","22-603-468-3533","22-885-845-6889","22-918-832-2411","23-244-493-2508","23-562-444-8454","23-768-687-3665","24-394-451-5404","25-147-850-4166","25-168-852-5363","25-336-529-9919","25-344-968-5422","25-430-914-2194","25-923-255-2929","25-989-741-2988","26-190-260-5375","26-314-406-7725","26-516-273-2566","26-632-309-5792","26-777-409-5654","26-992-422-8153","27-147-574-9335","27-269-357-4674","27-408-997-8430","27-411-990-2959","27-566-888-7431","27-588-919-5638","27-626-559-8599","28-159-442-5305","28-183-750-7809","28-190-982-9759","28-396-526-5053","28-639-943-7051","29-233-262-8382","29-261-996-3120","29-316-665-2897","29-797-439-6760","30-749-445-4907","31-101-672-2951","32-363-455-4837","32-817-154-4122","32-828-107-2832","32-957-234-8742","33-197-837-7094","33-464-151-3439","33-733-623-5267"] +DROP VIEW v1; +CREATE VIEW v1 AS +SELECT DISTINCT c_name FROM customer, orders WHERE c_custkey=o_custkey; +# +# Test with VARCHAR types +# +SELECT GROUP_CONCAT(c_name ORDER BY c_name) FROM v1; +GROUP_CONCAT(c_name ORDER BY c_name) +Customer#000000001,Customer#000000002,Customer#000000004,Customer#000000005,Customer#000000007,Customer#000000008,Customer#000000010,Customer#000000011,Customer#000000013,Customer#000000014,Customer#000000016,Customer#000000017,Customer#000000019,Customer#000000020,Customer#000000022,Customer#000000023,Customer#000000025,Customer#000000026,Customer#000000028,Customer#000000029,Customer#000000031,Customer#000000032,Customer#000000034,Customer#000000035,Customer#000000037,Customer#000000038,Customer#000000040,Customer#000000041,Customer#000000043,Customer#000000044,Customer#000000046,Customer#000000047,Customer#000000049,Customer#000000050,Customer#000000052,Customer#000000053,Customer#000000055,Customer#000000056,Customer#000000058,Customer#000000059,Customer#000000061,Customer#000000062,Customer#000000064,Customer#000000065,Customer#000000067,Customer#000000068,Customer#000000070,Customer#000000071,Customer#000000073,Customer#000000074,Customer#000000076,Customer#000000077,Customer#000000079,Customer#000000080,Customer#000000082,Customer#000000083,Customer#000000085,Customer#000000086,Customer#000000088,Customer#000000089,Customer#000000091,Customer#000000092,Customer#000000094,Customer#000000095,Customer#000000097,Customer#000000098,Customer#000000100,Customer#000000101,Customer#000000103,Customer#000000104,Customer#000000106,Customer#000000107,Customer#000000109,Customer#000000110,Customer#000000112,Customer#000000113,Customer#000000115,Customer#000000116,Customer#000000118,Customer#000000119,Customer#000000121,Customer#000000122,Customer#000000124,Customer#000000125,Customer#000000127,Customer#000000128,Customer#000000130,Customer#000000131,Customer#000000133,Customer#000000134,Customer#000000136,Customer#000000137,Customer#000000139,Customer#000000140,Customer#000000142,Customer#000000143,Customer#000000145,Customer#000000146,Customer#000000148,Customer#000000149 +SELECT GROUP_CONCAT(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey; +GROUP_CONCAT(DISTINCT c_name) +Customer#000000001,Customer#000000002,Customer#000000004,Customer#000000005,Customer#000000007,Customer#000000008,Customer#000000010,Customer#000000011,Customer#000000013,Customer#000000014,Customer#000000016,Customer#000000017,Customer#000000019,Customer#000000020,Customer#000000022,Customer#000000023,Customer#000000025,Customer#000000026,Customer#000000028,Customer#000000029,Customer#000000031,Customer#000000032,Customer#000000034,Customer#000000035,Customer#000000037,Customer#000000038,Customer#000000040,Customer#000000041,Customer#000000043,Customer#000000044,Customer#000000046,Customer#000000047,Customer#000000049,Customer#000000050,Customer#000000052,Customer#000000053,Customer#000000055,Customer#000000056,Customer#000000058,Customer#000000059,Customer#000000061,Customer#000000062,Customer#000000064,Customer#000000065,Customer#000000067,Customer#000000068,Customer#000000070,Customer#000000071,Customer#000000073,Customer#000000074,Customer#000000076,Customer#000000077,Customer#000000079,Customer#000000080,Customer#000000082,Customer#000000083,Customer#000000085,Customer#000000086,Customer#000000088,Customer#000000089,Customer#000000091,Customer#000000092,Customer#000000094,Customer#000000095,Customer#000000097,Customer#000000098,Customer#000000100,Customer#000000101,Customer#000000103,Customer#000000104,Customer#000000106,Customer#000000107,Customer#000000109,Customer#000000110,Customer#000000112,Customer#000000113,Customer#000000115,Customer#000000116,Customer#000000118,Customer#000000119,Customer#000000121,Customer#000000122,Customer#000000124,Customer#000000125,Customer#000000127,Customer#000000128,Customer#000000130,Customer#000000131,Customer#000000133,Customer#000000134,Customer#000000136,Customer#000000137,Customer#000000139,Customer#000000140,Customer#000000142,Customer#000000143,Customer#000000145,Customer#000000146,Customer#000000148,Customer#000000149 +SELECT JSON_ARRAYAGG(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey; +JSON_ARRAYAGG(DISTINCT c_name) +["Customer#000000001","Customer#000000002","Customer#000000004","Customer#000000005","Customer#000000007","Customer#000000008","Customer#000000010","Customer#000000011","Customer#000000013","Customer#000000014","Customer#000000016","Customer#000000017","Customer#000000019","Customer#000000020","Customer#000000022","Customer#000000023","Customer#000000025","Customer#000000026","Customer#000000028","Customer#000000029","Customer#000000031","Customer#000000032","Customer#000000034","Customer#000000035","Customer#000000037","Customer#000000038","Customer#000000040","Customer#000000041","Customer#000000043","Customer#000000044","Customer#000000046","Customer#000000047","Customer#000000049","Customer#000000050","Customer#000000052","Customer#000000053","Customer#000000055","Customer#000000056","Customer#000000058","Customer#000000059","Customer#000000061","Customer#000000062","Customer#000000064","Customer#000000065","Customer#000000067","Customer#000000068","Customer#000000070","Customer#000000071","Customer#000000073","Customer#000000074","Customer#000000076","Customer#000000077","Customer#000000079","Customer#000000080","Customer#000000082","Customer#000000083","Customer#000000085","Customer#000000086","Customer#000000088","Customer#000000089","Customer#000000091","Customer#000000092","Customer#000000094","Customer#000000095","Customer#000000097","Customer#000000098","Customer#000000100","Customer#000000101","Customer#000000103","Customer#000000104","Customer#000000106","Customer#000000107","Customer#000000109","Customer#000000110","Customer#000000112","Customer#000000113","Customer#000000115","Customer#000000116","Customer#000000118","Customer#000000119","Customer#000000121","Customer#000000122","Customer#000000124","Customer#000000125","Customer#000000127","Customer#000000128","Customer#000000130","Customer#000000131","Customer#000000133","Customer#000000134","Customer#000000136","Customer#000000137","Customer#000000139","Customer#000000140","Customer#000000142","Customer#000000143","Customer#000000145","Customer#000000146","Customer#000000148","Customer#000000149"] +DROP VIEW v1; +# +# Test with FIXED TYPES +# +CREATE VIEW v1 AS +SELECT DISTINCT c_nationkey FROM customer, orders WHERE c_custkey=o_custkey; +SELECT GROUP_CONCAT(c_nationkey ORDER BY c_nationkey) FROM v1; +GROUP_CONCAT(c_nationkey ORDER BY c_nationkey) +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 +SELECT GROUP_CONCAT(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey; +GROUP_CONCAT(DISTINCT c_nationkey) +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 +SELECT JSON_ARRAYAGG(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey; +JSON_ARRAYAGG(DISTINCT c_nationkey) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] +DROP VIEW v1; +DROP DATABASE dbt3; +USE test; +# +# Testing for merge_buffers when Unique is packed +# +CREATE TABLE t1 (a VARCHAR(1500)); +INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100; +SET @save_max_heap_table_size= @@max_heap_table_size; +SET max_heap_table_size=16384; +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (a) INDEXES(); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT column_name, avg_frequency FROM mysql.column_stats where table_name='t1' AND column_name='a'; +column_name avg_frequency +a 2.0000 +SELECT COUNT(*), COUNT(DISTINCT a) FROM t1; +COUNT(*) COUNT(DISTINCT a) +2400 1200 +SET max_heap_table_size= @save_max_heap_table_size; +DROP TABLE t1; +# +# Simple test for DISTINCT with VARCHAR, CHAR and BLOBS +# +# +# Test with VARCHAR types +# +CREATE TABLE t1 (a VARCHAR(2056), b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +GROUP_CONCAT(DISTINCT a) +a,b,c +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +JSON_ARRAYAGG(DISTINCT a) +[null,"a","b","c"] +DROP TABLE t1; +CREATE TABLE t1 (a VARCHAR(2056) NOT NULL, b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +GROUP_CONCAT(DISTINCT a) +a,b,c +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +JSON_ARRAYAGG(DISTINCT a) +["a","b","c"] +DROP TABLE t1; +# +# Test with CHAR types +# +CREATE TABLE t1 (a CHAR(255), b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +GROUP_CONCAT(DISTINCT a) +a,b,c +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +JSON_ARRAYAGG(DISTINCT a) +[null,"a","b","c"] +DROP TABLE t1; +CREATE TABLE t1 (a CHAR(255) NOT NULL, b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +GROUP_CONCAT(DISTINCT a) +a,b,c +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +JSON_ARRAYAGG(DISTINCT a) +["a","b","c"] +# +# Test with INT types (fixed size datatypes) +# +SELECT GROUP_CONCAT(DISTINCT b) FROM t1; +GROUP_CONCAT(DISTINCT b) +1,2,3,4 +SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1; +JSON_ARRAYAGG(DISTINCT b) +[1,2,3,4] +DROP TABLE t1; +# +# Test with BLOB types +# +CREATE TABLE t1 (a BLOB, b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +GROUP_CONCAT(DISTINCT a) +a,b,c +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +JSON_ARRAYAGG(DISTINCT a) +[null,"a","b","c"] +DROP TABLE t1; +# +# Multiple arguments to GROUP_CONCAT +# +CREATE TABLE t1 (a VARCHAR(10) NOT NULL, b INT); +INSERT INTO t1 VALUES ('a',259),('a',11); +INSERT INTO t1 VALUES ('a',259),('a',11); +INSERT INTO t1 VALUES ('b',259),('c',11); +SELECT GROUP_CONCAT(DISTINCT a, b) FROM t1; +GROUP_CONCAT(DISTINCT a, b) +a11,a259,b259,c11 +SELECT GROUP_CONCAT(DISTINCT a, b+1) FROM t1; +GROUP_CONCAT(DISTINCT a, b+1) +a12,a260,b260,c12 +DROP TABLE t1; diff --git a/mysql-test/main/unique_packed.test b/mysql-test/main/unique_packed.test new file mode 100644 index 00000000000..93c5b5910b2 --- /dev/null +++ b/mysql-test/main/unique_packed.test @@ -0,0 +1,264 @@ +--source include/have_sequence.inc +create database dbt3; +use dbt3; + +--disable_result_log +--disable_query_log +--source include/dbt3_s001.inc +--enable_result_log +--enable_query_log + +SET @save_tmp_table_size= @@tmp_table_size; + +CREATE VIEW v1 AS SELECT DISTINCT p_mfgr, p_brand FROM part; + +--echo # +--echo # The two queries should return the same result +--echo # +SELECT GROUP_CONCAT(p_brand order by p_brand) FROM v1 GROUP BY p_mfgr; +SELECT GROUP_CONCAT(DISTINCT p_brand) FROM part GROUP BY p_mfgr; + +--echo # +--echo # The two queries should return the same result +--echo # +SELECT COUNT(*) FROM v1 GROUP BY p_mfgr; +SELECT COUNT(DISTINCT p_brand) FROM part GROUP BY p_mfgr; + +DROP VIEW v1; + +CREATE VIEW v1 AS +SELECT DISTINCT p_mfgr, p_name FROM lineitem l, part p +WHERE l.l_partkey= p.p_partkey; + +SELECT p_mfgr, COUNT(*) FROM v1 GROUP BY p_mfgr; + +--echo # +--echo # The two queries should return the same result as the above one +--echo # + +LET $query= SELECT p_mfgr, COUNT(DISTINCT p_name) FROM lineitem l, part p +WHERE l.l_partkey= p.p_partkey +GROUP BY p.p_mfgr; + +eval $query; + +SET tmp_table_size=1024; +--echo # +--echo # merge_walk() get called, that is merging of sorted chunks of file happens +--echo # +eval $query; +SET tmp_table_size = @save_tmp_table_size; +DROP VIEW v1; + +--echo # The result should be 200 +SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p; +--echo # +--echo # merge_many_buffers get called, followed by merge_walk() +--echo # +SET tmp_table_size=1024; +SELECT COUNT(DISTINCT p_name) FROM lineitem l, part p; +SET tmp_table_size = @save_tmp_table_size; + +CREATE VIEW v1 AS SELECT DISTINCT n_name, c_name FROM customer, orders, nation +WHERE o_custkey= c_custkey and c_nationkey= n_nationkey; + +--echo # +--echo # These 2 queries should return the same result +--echo # + +SELECT n_name, GROUP_CONCAT(c_name ORDER BY c_name), COUNT(c_name) FROM v1 GROUP BY n_name; + +SELECT n_name, GROUP_CONCAT(DISTINCT c_name), COUNT(DISTINCT c_name) +FROM customer, orders, nation +WHERE o_custkey= c_custkey and c_nationkey= n_nationkey GROUP BY n_name; + +DROP VIEW v1; + +--echo # +--echo # Tests for packed unique during EITS collection +--echo # + +SET @save_histogram_size= @@histogram_size; +SET histogram_size=0; +--echo # +--echo # Testing when histograms are not created, this tests the function count_distinct_single_occurence_walk +--echo # passed as a callback function to walk +--echo # + +--source include/unique_packed_eits.inc + +--echo # +--echo # Testing when histograms are created,this tests the function histogram_build_walk +--echo # passed as a callback function to walk +--echo # + +SET histogram_size=255; +--source include/unique_packed_eits.inc + + +--echo # +--echo # Testing with very small memory for Unique, will do merging +--echo # + +SET @save_max_heap_table_size= @@max_heap_table_size; +SET max_heap_table_size=16384; +--source include/unique_packed_eits.inc +SET max_heap_table_size= @save_max_heap_table_size; + +SET histogram_size= @save_histogram_size; + +--echo # +--echo # Test with CHAR types +--echo # + +CREATE VIEW v1 AS +SELECT DISTINCT c_phone FROM customer, orders WHERE c_custkey=o_custkey; + +SELECT COUNT(*) FROM v1; +SELECT COUNT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey; + +SELECT GROUP_CONCAT(c_phone ORDER BY c_phone) FROM v1; +SELECT GROUP_CONCAT(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey; +SELECT JSON_ARRAYAGG(DISTINCT c_phone) FROM customer, orders WHERE c_custkey=o_custkey; +DROP VIEW v1; + +CREATE VIEW v1 AS +SELECT DISTINCT c_name FROM customer, orders WHERE c_custkey=o_custkey; + +--echo # +--echo # Test with VARCHAR types +--echo # + +SELECT GROUP_CONCAT(c_name ORDER BY c_name) FROM v1; +SELECT GROUP_CONCAT(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey; +SELECT JSON_ARRAYAGG(DISTINCT c_name) FROM customer, orders WHERE c_custkey=o_custkey; +DROP VIEW v1; + +--echo # +--echo # Test with FIXED TYPES +--echo # + +CREATE VIEW v1 AS +SELECT DISTINCT c_nationkey FROM customer, orders WHERE c_custkey=o_custkey; + +SELECT GROUP_CONCAT(c_nationkey ORDER BY c_nationkey) FROM v1; +SELECT GROUP_CONCAT(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey; +SELECT JSON_ARRAYAGG(DISTINCT c_nationkey) FROM customer, orders WHERE c_custkey=o_custkey; + +DROP VIEW v1; + +DROP DATABASE dbt3; + +USE test; + +--echo # +--echo # Testing for merge_buffers when Unique is packed +--echo # + +CREATE TABLE t1 (a VARCHAR(1500)); +INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+100) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+200) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+300) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+400) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+500) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+600) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+700) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+800) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+900) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1000) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1100) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100; +INSERT INTO t1 SELECT repeat('a', seq+1200) FROM seq_1_to_100; + +SET @save_max_heap_table_size= @@max_heap_table_size; +SET max_heap_table_size=16384; +ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (a) INDEXES(); + +SELECT column_name, avg_frequency FROM mysql.column_stats where table_name='t1' AND column_name='a'; +SELECT COUNT(*), COUNT(DISTINCT a) FROM t1; +SET max_heap_table_size= @save_max_heap_table_size; + +DROP TABLE t1; + +--echo # +--echo # Simple test for DISTINCT with VARCHAR, CHAR and BLOBS +--echo # + +--echo # +--echo # Test with VARCHAR types +--echo # + +CREATE TABLE t1 (a VARCHAR(2056), b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a VARCHAR(2056) NOT NULL, b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +DROP TABLE t1; + +--echo # +--echo # Test with CHAR types +--echo # + +CREATE TABLE t1 (a CHAR(255), b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a CHAR(255) NOT NULL, b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; + + +--echo # +--echo # Test with INT types (fixed size datatypes) +--echo # + +SELECT GROUP_CONCAT(DISTINCT b) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1; + +DROP TABLE t1; + +--echo # +--echo # Test with BLOB types +--echo # + +CREATE TABLE t1 (a BLOB, b INT NOT NULL); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +INSERT INTO t1 VALUES ('a',2),('b',2),('c',1),('a',3),('b',4),('c',4), (NULL, 5), (NULL, 6); +SELECT GROUP_CONCAT(DISTINCT a) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +DROP TABLE t1; + +--echo # +--echo # Multiple arguments to GROUP_CONCAT +--echo # + +CREATE TABLE t1 (a VARCHAR(10) NOT NULL, b INT); +INSERT INTO t1 VALUES ('a',259),('a',11); +INSERT INTO t1 VALUES ('a',259),('a',11); +INSERT INTO t1 VALUES ('b',259),('c',11); +SELECT GROUP_CONCAT(DISTINCT a, b) FROM t1; +SELECT GROUP_CONCAT(DISTINCT a, b+1) FROM t1; +DROP TABLE t1; diff --git a/plugin/type_inet/sql_type_inet.cc b/plugin/type_inet/sql_type_inet.cc index 14d854be14f..309457d3393 100644 --- a/plugin/type_inet/sql_type_inet.cc +++ b/plugin/type_inet/sql_type_inet.cc @@ -1406,7 +1406,7 @@ void Type_handler_inet6::make_sort_key_part(uchar *to, Item *item, uint Type_handler_inet6::make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const + String *tmp_buffer) const { DBUG_ASSERT(item->type_handler() == this); NativeBufferInet6 tmp; diff --git a/plugin/type_inet/sql_type_inet.h b/plugin/type_inet/sql_type_inet.h index 03153af6ddc..a02a1fca42f 100644 --- a/plugin/type_inet/sql_type_inet.h +++ b/plugin/type_inet/sql_type_inet.h @@ -530,7 +530,7 @@ public: const override; uint make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; + String *tmp_buffer) const override; void sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *attr) const override; diff --git a/sql/field.cc b/sql/field.cc index 2173670572d..587cbdfb8c3 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1064,6 +1064,47 @@ Field::make_packed_sort_key_part(uchar *buff, } +/* + @brief + Create a packed sort key part + + @param buff buffer where values are written + @param sort_field sort column structure + + @details + This function stores the original values for the fixed size columns and + does not covert them to mem-comparable images. + + @retval + length of the bytes written, does not include the NULL bytes +*/ + +uint +Field::make_packed_key_part(uchar *buff, const SORT_FIELD_ATTR *sort_field) +{ + if (maybe_null()) + { + if (is_null()) + { + *buff++= 0; + return 0; // For NULL values don't write any data + } + *buff++=1; + } + memcpy(buff, ptr, sort_field->original_length); + return sort_field->original_length; +} + + +uint +Field_longstr::make_packed_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field) +{ + return make_packed_sort_key_part(buff, sort_field); +} + + + uint Field_longstr::make_packed_sort_key_part(uchar *buff, const SORT_FIELD_ATTR *sort_field) diff --git a/sql/field.h b/sql/field.h index dfc02149f9d..351dd9620a0 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1110,6 +1110,11 @@ public: */ virtual uint32 data_length() { return pack_length(); } virtual uint32 sort_length() const { return pack_length(); } + /* + returns the sort_length for a field without the suffix length bytes + for field with binary charset. + */ + virtual uint32 sort_length_without_suffix() const { return pack_length(); } /* sort_suffix_length() return the length bytes needed to store the length @@ -1470,6 +1475,9 @@ public: virtual uint make_packed_sort_key_part(uchar *buff, const SORT_FIELD_ATTR *sort_field); + virtual uint make_packed_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field); + virtual void make_send_field(Send_field *); /* @@ -2222,6 +2230,8 @@ public: bool is_packable() const override { return true; } uint make_packed_sort_key_part(uchar *buff, const SORT_FIELD_ATTR *sort_field)override; + uint make_packed_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field) override; uchar* pack_sort_string(uchar *to, const SORT_FIELD_ATTR *sort_field); }; @@ -4148,6 +4158,10 @@ public: { return (uint32) field_length + sort_suffix_length(); } + uint32 sort_length_without_suffix() const override + { + return (uint32) field_length; + } virtual uint32 sort_suffix_length() const override { return (field_charset() == &my_charset_bin ? length_bytes : 0); @@ -4496,6 +4510,10 @@ public: { return (uint32) (packlength); } uint row_pack_length() const override { return pack_length_no_ptr(); } uint32 sort_length() const override; + uint32 sort_length_without_suffix() const override + { + return (uint32)field_length; + } uint32 sort_suffix_length() const override; uint32 value_length() override { return get_length(); } virtual uint32 max_data_length() const override diff --git a/sql/filesort.cc b/sql/filesort.cc index 0337325b544..7a77694410f 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -35,6 +35,7 @@ #include "filesort_utils.h" #include "sql_select.h" #include "debug_sync.h" +#include "uniques.h" /* functions defined in this file */ @@ -1711,6 +1712,8 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, uint size_of_sort_length= param->using_packed_sortkeys() ? Sort_keys::size_of_length_field : 0; + uint size_of_dupl_count= param->min_dupl_count ? + sizeof(element_count) : 0; for (; ix < count; ++ix) { @@ -1726,14 +1729,16 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, buffpek->buffer_end()) break; // Incomplete record. - uchar *plen= record + sort_length; + uchar *plen= record + sort_length + size_of_dupl_count; + uint res_length= param->get_result_length(plen); if (plen + res_length > buffpek->buffer_end()) break; // Incomplete record. - DBUG_ASSERT(res_length > 0); + DBUG_ASSERT(!param->sort_keys || res_length > 0); DBUG_ASSERT(sort_length + res_length <= param->rec_length); record+= sort_length; record+= res_length; + record+= size_of_dupl_count; } DBUG_ASSERT(ix > 0); count= ix; @@ -1829,12 +1834,12 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, rec_length= param->rec_length; res_length= param->res_length; sort_length= param->sort_length; - uint dupl_count_ofs= rec_length-sizeof(element_count); uint min_dupl_count= param->min_dupl_count; + uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0; + bool check_dupl_count= flag && min_dupl_count; offset= (rec_length- (flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length); - uint wr_len= flag ? res_length : rec_length; uint wr_offset= flag ? offset : 0; const bool using_packed_sortkeys= param->using_packed_sortkeys(); @@ -1884,10 +1889,16 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, Store it also in 'to_file'. */ buffpek= (Merge_chunk*) queue_top(&queue); + rec_length= param->get_key_length_for_unique(buffpek->current_key(), + size_of_dupl_count); + + DBUG_ASSERT(rec_length <= param->sort_length); + memcpy(unique_buff, buffpek->current_key(), rec_length); + uint dupl_count_ofs= rec_length - sizeof(element_count); if (min_dupl_count) - memcpy(&dupl_count, unique_buff+dupl_count_ofs, - sizeof(dupl_count)); + memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count)); + buffpek->advance_current_key(rec_length); buffpek->decrement_mem_count(); if (buffpek->mem_count() == 0) @@ -1917,28 +1928,35 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, src= buffpek->current_key(); if (cmp) // Remove duplicates { - uchar *current_key= buffpek->current_key(); - if (!(*cmp)(first_cmp_arg, &unique_buff, ¤t_key)) + if (!(*cmp)(first_cmp_arg, &unique_buff, &src)) { if (min_dupl_count) { + uint dupl_count_ofs= rec_length - sizeof(element_count); element_count cnt; memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt)); dupl_count+= cnt; } + rec_length= param->get_key_length_for_unique(buffpek->current_key(), + size_of_dupl_count); goto skip_duplicate; } + if (min_dupl_count) { - memcpy(unique_buff+dupl_count_ofs, &dupl_count, - sizeof(dupl_count)); + DBUG_ASSERT(rec_length <= param->sort_length); + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count)); } + rec_length= param->get_key_length_for_unique(unique_buff, + size_of_dupl_count); + res_length= rec_length - size_of_dupl_count; src= unique_buff; } + else + param->get_rec_and_res_len(src, &rec_length, &res_length); { - param->get_rec_and_res_len(buffpek->current_key(), - &rec_length, &res_length); const uint bytes_to_write= (flag == 0) ? rec_length : res_length; /* @@ -1960,10 +1978,15 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, } if (cmp) { + rec_length= param->get_key_length_for_unique(buffpek->current_key(), + size_of_dupl_count); + DBUG_ASSERT(rec_length <= param->sort_length); memcpy(unique_buff, buffpek->current_key(), rec_length); if (min_dupl_count) - memcpy(&dupl_count, unique_buff+dupl_count_ofs, - sizeof(dupl_count)); + { + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count)); + } } if (!--max_rows) { @@ -2006,6 +2029,7 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, { if (min_dupl_count) { + uint dupl_count_ofs= rec_length - sizeof(element_count); element_count cnt; memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt)); dupl_count+= cnt; @@ -2015,13 +2039,22 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, } if (min_dupl_count) - memcpy(unique_buff+dupl_count_ofs, &dupl_count, - sizeof(dupl_count)); + { + DBUG_ASSERT(rec_length <= param->sort_length); + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count)); + } if (!check_dupl_count || dupl_count >= min_dupl_count) { src= unique_buff; - if (my_b_write(to_file, src+wr_offset, wr_len)) + res_length = rec_length - size_of_dupl_count; + const uint bytes_to_write= (flag == 0) ? rec_length : res_length; + if (my_b_write(to_file, + src + (offset_for_packing ? + rec_length - res_length : // sort length + wr_offset), + bytes_to_write)) goto err; /* purecov: inspected */ if (!--max_rows) goto end; @@ -2039,17 +2072,24 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, for (uint ix= 0; ix < buffpek->mem_count(); ++ix) { uchar *src= buffpek->current_key(); - param->get_rec_and_res_len(src, - &rec_length, &res_length); - const uint bytes_to_write= (flag == 0) ? rec_length : res_length; - if (check_dupl_count) + if (cmp) { - memcpy((uchar *) &dupl_count, - buffpek->current_key() + offset + dupl_count_ofs, - sizeof(dupl_count)); - if (dupl_count < min_dupl_count) - continue; + rec_length= param->get_key_length_for_unique(src, size_of_dupl_count); + res_length= rec_length - size_of_dupl_count; + if (check_dupl_count) + { + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(&dupl_count, src + dupl_count_ofs, sizeof(dupl_count)); + + if (dupl_count < min_dupl_count) + continue; + } } + else + param->get_rec_and_res_len(src, &rec_length, &res_length); + + const uint bytes_to_write= (flag == 0) ? rec_length : res_length; + if(my_b_write(to_file, src + (offset_for_packing ? rec_length - res_length : // sort length @@ -2557,11 +2597,30 @@ void Sort_param::try_to_pack_sortkeys() rec_length= sort_length + addon_length; } +/* + @brief + Return the length of the key inserted in the Unique tree + + @param + to key value + size_of_dupl_count if min_dupl_count > 0, then the record length + needs size_of_dupl_count to store the counter +*/ + +uint32 Sort_param::get_key_length_for_unique(uchar *key, + uint size_of_dupl_count) +{ + if (!using_packed_sortkeys()) + return rec_length; + return Variable_size_keys_descriptor::read_packed_length(key) + + size_of_dupl_count; +} + uint Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const + String *tmp_buffer) const { CHARSET_INFO *cs= item->collation.collation; bool maybe_null= item->maybe_null; @@ -2569,7 +2628,7 @@ Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item, if (maybe_null) *to++= 1; - Binary_string *res= item->str_result(¶m->tmp_buffer); + Binary_string *res= item->str_result(tmp_buffer); if (!res) { if (maybe_null) @@ -2600,7 +2659,7 @@ Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item, uint Type_handler_int_result::make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const + String *tmp_buffer) const { longlong value= item->val_int_result(); return make_packed_sort_key_longlong(to, item->maybe_null, @@ -2612,7 +2671,7 @@ Type_handler_int_result::make_packed_sort_key_part(uchar *to, Item *item, uint Type_handler_decimal_result::make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const + String *tmp_buffer) const { my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf); if (item->maybe_null) @@ -2634,7 +2693,7 @@ Type_handler_decimal_result::make_packed_sort_key_part(uchar *to, Item *item, uint Type_handler_real_result::make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const + String *tmp_buffer) const { double value= item->val_result(); if (item->maybe_null) @@ -2655,7 +2714,7 @@ Type_handler_real_result::make_packed_sort_key_part(uchar *to, Item *item, uint Type_handler_temporal_result::make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const + String *tmp_buffer) const { MYSQL_TIME buf; // This is a temporal type. No nanoseconds. Rounding mode is not important. @@ -2677,7 +2736,7 @@ Type_handler_temporal_result::make_packed_sort_key_part(uchar *to, Item *item, uint Type_handler_timestamp_common::make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const + String *tmp_buffer) const { THD *thd= current_thd; uint binlen= my_timestamp_binary_length(item->decimals); @@ -2776,6 +2835,93 @@ void SORT_FIELD_ATTR::set_length_and_original_length(THD *thd, uint length_arg) /* + @brief + Setup the SORT_FIELD structure for a key part of a variable size key + + @param + fld field structure + + @notes + Currently used only by Unique object + +*/ + +void SORT_FIELD::setup_key_part_for_variable_size_key(Field *fld) +{ + field= fld; + item= NULL; + reverse= false; + SORT_FIELD_ATTR::setup_key_part_for_variable_size_key(fld); +} + + +/* + @brief + Setup the SORT_FIELD structure for an key part of a variable size key + + @param + fld Item structure + + @notes + Currently used only by Unique object +*/ + +void SORT_FIELD::setup_key_part_for_variable_size_key(Item *item_arg) +{ + Field *fld= item_arg->get_tmp_table_field(); + DBUG_ASSERT(fld); + item= item_arg; + field= NULL; + reverse= false; + SORT_FIELD_ATTR::setup_key_part_for_variable_size_key(fld); +} + + +/* + @brief + Setup the SORT_FIELD structure for a field of a fixed size key + + @param + fld field structure + + @note + Currently used only by Unique object +*/ + +void SORT_FIELD::setup_key_part_for_fixed_size_key(Field *fld) +{ + field= fld; + item= NULL; + reverse= false; + SORT_FIELD_ATTR::setup_key_part_for_fixed_size_key(fld); +} + + +void SORT_FIELD_ATTR::setup_key_part_for_fixed_size_key(Field *field) +{ + original_length= length= field->pack_length(); + cs= field->charset(); + suffix_length= 0; + type= SORT_FIELD_ATTR::FIXED_SIZE; + maybe_null= field->maybe_null(); + length_bytes= 0; +} + + +void SORT_FIELD_ATTR::setup_key_part_for_variable_size_key(Field *fld) +{ + original_length= length= fld->sort_length_without_suffix(); + cs= fld->sort_charset(); + suffix_length= 0; + type= fld->is_packable() ? + SORT_FIELD_ATTR::VARIABLE_SIZE : + SORT_FIELD_ATTR::FIXED_SIZE; + maybe_null= fld->maybe_null(); + length_bytes= is_variable_sized() ? number_storage_requirement(length) : 0; +} + + +/* Compare function used for packing sort keys */ @@ -2786,6 +2932,38 @@ qsort2_cmp get_packed_keys_compare_ptr() /* + @brief + Compare null-ability of 2 keys + + @param + a key to be compared + b key to be compared + + @retval + -1 key a is NULL + 1 key b is NULL + 0 either key a and key b are both NULL or + both are NOT NULL +*/ + +int SORT_FIELD_ATTR::compare_nullability(uchar *a, uchar *b) +{ + if (*a != *b) + { + if (*a == 0) + return -1; + return 1; + } + else + { + if (*a == 0) + return 0; + } + return 0; +} + + +/* Compare two varstrings. The strings are in this data format: @@ -2803,20 +2981,10 @@ int SORT_FIELD_ATTR::compare_packed_varstrings(uchar *a, size_t *a_len, if (maybe_null) { *a_len= *b_len= 1; // NULL bytes are always stored - if (*a != *b) - { - // Note we don't return a proper value in *{a|b}_len for the non-NULL - // value but that's ok - if (*a == 0) - return -1; - else - return 1; - } - else - { - if (*a == 0) - return 0; - } + int cmp_val; + + if ((cmp_val= compare_nullability(a, b)) || *a == 0) + return cmp_val; a++; b++; } @@ -2848,6 +3016,51 @@ int SORT_FIELD_ATTR::compare_packed_varstrings(uchar *a, size_t *a_len, /* + @brief + Compare two packed varstring + + @param a key to be compared + @param b key to be compared + + @details + This function compares packed values of two keys with a collation specific + comparison function. + + @notes + This function basically does the same work as compare_packed_varstring + but the only difference is that this function is invoked when the key + has only one key part. This is currently used by Unique only as most + of the cases where Unique is used involves one key component. + + @retval + >0 key a greater than b + =0 key a equal to b + <0 key a less than b +*/ + +int +SORT_FIELD_ATTR::compare_packed_varstrings(uchar *a, uchar *b) +{ + size_t a_length, b_length; + if (maybe_null) + { + int cmp_val; + if ((cmp_val= compare_nullability(a, b)) || *a == 0) + return cmp_val; + + a++; + b++; + } + + a_length= read_keypart_length(a, length_bytes); + b_length= read_keypart_length(b, length_bytes); + + return cs->strnncollsp(a + length_bytes, a_length, + b + length_bytes, b_length); +} + + +/* A value comparison function that has a signature that's suitable for comparing packed values, but actually compares fixed-size values with memcmp. @@ -2862,18 +3075,10 @@ int SORT_FIELD_ATTR::compare_packed_fixed_size_vals(uchar *a, size_t *a_len, { *a_len=1; *b_len=1; - if (*a != *b) - { - if (*a == 0) - return -1; - else - return 1; - } - else - { - if (*a == 0) - return 0; - } + int cmp_val; + if ((cmp_val= compare_nullability(a, b)) || *a == 0) + return cmp_val; + a++; b++; } @@ -2882,7 +3087,53 @@ int SORT_FIELD_ATTR::compare_packed_fixed_size_vals(uchar *a, size_t *a_len, *a_len+= length; *b_len+= length; - return memcmp(a,b, length); + return memcmp(a, b, length); +} + + +/* + @brief + Comparison function to compare fixed size key parts via Field::cmp + + @param a key for comparison + @param b key for comparison + @param a_len [OUT] length of the value for the key part in key a + @param b_len [OUT] length of the value for the key part in key b + + @details + A value comparison function that has a signature that's suitable for + comparing packed values, but actually compares fixed-size values with + Field::cmp. + + @notes + This is used for ordering fixed-size columns when the keys are added + to the Unique tree + + @retval + >0 key a greater than b + =0 key a equal to b + <0 key a less than b +*/ + +int SORT_FIELD::compare_fixed_size_vals(uchar *a, size_t *a_len, + uchar *b, size_t *b_len) +{ + if (maybe_null) + { + *a_len=1; + *b_len=1; + int cmp_val; + if ((cmp_val= compare_nullability(a, b)) || *a == 0) + return cmp_val; + + a++; + b++; + } + else + *a_len= *b_len= 0; + *a_len+= length; + *b_len+= length; + return field->cmp(a, b); } @@ -2905,16 +3156,45 @@ int compare_packed_sort_keys(void *sort_param, unsigned char **a_ptr, unsigned char **b_ptr) { int retval= 0; - size_t a_len, b_len; Sort_param *param= (Sort_param*)sort_param; Sort_keys *sort_keys= param->sort_keys; uchar *a= *a_ptr; uchar *b= *b_ptr; - a+= Sort_keys::size_of_length_field; - b+= Sort_keys::size_of_length_field; - for (SORT_FIELD *sort_field= sort_keys->begin(); - sort_field != sort_keys->end(); sort_field++) + if ((retval= sort_keys->compare_keys(a + Sort_keys::size_of_length_field, + b + Sort_keys::size_of_length_field))) + return retval; + + /* + this comparison is done for the case when the sort keys is appended with + the ROW_ID pointer. For such cases we don't have addon fields + so we can make a memcmp check over both the sort keys + */ + if (!param->using_addon_fields()) + { + a+= Sort_keys::read_sortkey_length(a); + b+= Sort_keys::read_sortkey_length(b); + retval= memcmp(a, b, param->res_length); + } + return retval; +} + + +/* + @brief + Compare two sort keys + + @retval + >0 key a greater than b + =0 key a equal to b + <0 key a less than b +*/ + +int Sort_keys::compare_keys(uchar *a, uchar *b) +{ + int retval= 0; + size_t a_len, b_len; + for (SORT_FIELD *sort_field= begin(); sort_field != end(); sort_field++) { retval= sort_field->is_variable_sized() ? sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) : @@ -2925,21 +3205,30 @@ int compare_packed_sort_keys(void *sort_param, a+= a_len; b+= b_len; - } - /* - this comparison is done for the case when the sort keys is appended with - the ROW_ID pointer. For such cases we don't have addon fields - so we can make a memcmp check over both the sort keys - */ - if (!param->using_addon_fields()) - retval= memcmp(a, b, param->res_length); return retval; } /* @brief + Compare two packed sort keys with a single key part + + @retval + >0 key a greater than b + =0 key a equal to b + <0 key a less than b +*/ +int Sort_keys::compare_keys_for_single_arg(uchar *a, uchar *b) +{ + SORT_FIELD *sort_field= begin(); + + return sort_field->compare_packed_varstrings(a, b); +} + + +/* + @brief Store a packed string in the buffer @param to buffer @@ -3066,7 +3355,7 @@ static uint make_packed_sortkey(Sort_param *param, uchar *to) { // Field length= field->make_packed_sort_key_part(to, sort_field); - if ((maybe_null= field->maybe_null())) + if ((maybe_null= sort_field->maybe_null)) to++; } else @@ -3074,8 +3363,8 @@ static uint make_packed_sortkey(Sort_param *param, uchar *to) Item *item= sort_field->item; length= item->type_handler()->make_packed_sort_key_part(to, item, sort_field, - param); - if ((maybe_null= sort_field->item->maybe_null)) + ¶m->tmp_buffer); + if ((maybe_null= sort_field->maybe_null)) to++; } to+= length; diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 032ecb1bb91..5fdd612589c 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -1487,7 +1487,7 @@ append_null: } -static int append_json_value_from_field(String *str, +static bool append_json_value_from_field(String *str, Item *i, Field *f, const uchar *key, size_t offset, String *tmp_val) { if (i->type_handler()->is_bool_type()) @@ -1533,6 +1533,73 @@ append_null: } +/* + @brief + Append the value of a field in JSON format + + @param + str buffer to write the value + item argument to JSON_ARRAYAGG item + field field whose value needs to be appended + tmp_val temp buffer + + @note + Use this function when the field has the value in its ptr. + This is currently used when packing is done for JSON_ARRAYAGG function. + In the case of packing, we have to unpack the value to Field::ptr. + + @retval + FALSE value appended in JSON format + TRUE error +*/ + +static bool append_json_value_from_field(String *str, Item *item, Field *field, + String *tmp_val) +{ + if (item->type_handler()->is_bool_type()) + { + if (field->is_null()) + goto append_null; + + longlong v_int= field->val_int(); + const char *t_f; + int t_f_len; + + if (v_int) + { + t_f= "true"; + t_f_len= 4; + } + else + { + t_f= "false"; + t_f_len= 5; + } + + return str->append(t_f, t_f_len); + } + { + if (field->is_null()) + goto append_null; + String *sv= field->val_str(tmp_val); + + if (item->is_json_type()) + return str->append(sv->ptr(), sv->length()); + + if (item->result_type() == STRING_RESULT) + { + return str->append("\"", 1) || + st_append_escaped(str, sv) || + str->append("\"", 1); + } + return st_append_escaped(str, sv); + } + +append_null: + return str->append("null", 4); +} + + static int append_json_keyname(String *str, Item *item, String *tmp_val) { String *sv= item->val_str(tmp_val); @@ -3726,6 +3793,19 @@ String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f, } +String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f, + String *tmp) +{ + m_tmp_json.length(0); + + if (append_json_value_from_field(&m_tmp_json, i, f, tmp)) + return NULL; + + return &m_tmp_json; + +} + + void Item_func_json_arrayagg::cut_max_length(String *result, uint old_length, uint max_length) const { diff --git a/sql/item_jsonfunc.h b/sql/item_jsonfunc.h index ec6c6696001..bf1db777a8b 100644 --- a/sql/item_jsonfunc.h +++ b/sql/item_jsonfunc.h @@ -548,6 +548,7 @@ protected: String *get_str_from_item(Item *i, String *tmp); String *get_str_from_field(Item *i, Field *f, String *tmp, const uchar *key, size_t offset); + String *get_str_from_field(Item *i, Field *f, String *tmp); void cut_max_length(String *result, uint old_length, uint max_length) const; public: diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 5d0578df4ac..19a5c1ab82c 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -34,6 +34,7 @@ #include "sp.h" #include "sql_parse.h" #include "sp_head.h" +#include "sql_sort.h" /** Calculate the affordable RAM limit for structures like TREE or Unique @@ -668,37 +669,28 @@ C_MODE_END /** - Correctly compare composite keys. - - Used by the Unique class to compare keys. Will do correct comparisons - for composite keys with various field types. + @brief + Compare composite variable size keys. + + @notes + Used by the Unique class to compare packed keys. + Will do correct comparisons for composite keys with various field types. @param arg Pointer to the relevant Aggregator_distinct instance @param key1 left key image @param key2 right key image + @return comparison result @retval <0 if key1 < key2 @retval =0 if key1 = key2 @retval >0 if key1 > key2 */ -int Aggregator_distinct::composite_key_cmp(void* arg, uchar* key1, uchar* key2) +int Aggregator_distinct::key_cmp(void* arg, uchar* key1, uchar* key2) { Aggregator_distinct *aggr= (Aggregator_distinct *) arg; - Field **field = aggr->table->field; - Field **field_end= field + aggr->table->s->fields; - uint32 *lengths=aggr->field_lengths; - for (; field < field_end; ++field) - { - Field* f = *field; - int len = *lengths++; - int res = f->cmp(key1, key2); - if (res) - return res; - key1 += len; - key2 += len; - } - return 0; + DBUG_ASSERT(aggr->tree); + return aggr->tree->get_descriptor()->compare_keys(key1, key2); } @@ -757,8 +749,8 @@ bool Aggregator_distinct::setup(THD *thd) if (item_sum->setup(thd)) return TRUE; - if (item_sum->sum_func() == Item_sum::COUNT_FUNC || - item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + + if (item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) { List<Item> list; SELECT_LEX *select_lex= thd->lex->current_select; @@ -767,13 +759,19 @@ bool Aggregator_distinct::setup(THD *thd) return TRUE; /* Create a table with an unique key over all parameters */ + uint non_const_items= 0; for (uint i=0; i < item_sum->get_arg_count() ; i++) { Item *item=item_sum->get_arg(i); if (list.push_back(item, thd->mem_root)) return TRUE; // End of memory - if (item->const_item() && item->is_null()) - always_null= true; + if (item->const_item()) + { + if (item->is_null()) + always_null= true; + } + else + non_const_items++; } if (always_null) return FALSE; @@ -844,14 +842,14 @@ bool Aggregator_distinct::setup(THD *thd) compare method that can take advantage of not having to worry about other fields. */ - compare_key= (qsort_cmp2) simple_str_key_cmp; + compare_key= (qsort_cmp2) key_cmp; cmp_arg= (void*) table->field[0]; /* tree_key_length has been set already */ } else { uint32 *length; - compare_key= (qsort_cmp2) composite_key_cmp; + compare_key= (qsort_cmp2) key_cmp; cmp_arg= (void*) this; field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32)); for (tree_key_length= 0, length= field_lengths, field= table->field; @@ -862,16 +860,29 @@ bool Aggregator_distinct::setup(THD *thd) } } } + + bool allow_packing= item_sum->is_packing_allowed(table, &tree_key_length); + + if (allow_packing) + { + compare_key= (qsort_cmp2) key_cmp; + cmp_arg= (void*)this; + } DBUG_ASSERT(tree == 0); - tree= new Unique(compare_key, cmp_arg, tree_key_length, - item_sum->ram_limitation(thd)); + + tree= item_sum->get_unique(compare_key, cmp_arg, tree_key_length, + item_sum->ram_limitation(thd), 0, + allow_packing, non_const_items); /* The only time tree_key_length could be 0 is if someone does count(distinct) on a char(0) field - stupid thing to do, but this has to be handled - otherwise someone can crash the server with a DoS attack */ - if (! tree) + if (!tree || + (tree->get_descriptor()->setup_for_item(thd, item_sum, + non_const_items, + item_sum->get_arg_count()))) return TRUE; } return FALSE; @@ -920,9 +931,9 @@ bool Aggregator_distinct::setup(THD *thd) simple_raw_key_cmp because the table contains numbers only; decimals are converted to binary representation as well. */ - tree= new Unique(simple_raw_key_cmp, &tree_key_length, tree_key_length, - item_sum->ram_limitation(thd)); - + tree= item_sum->get_unique(simple_raw_key_cmp, &tree_key_length, tree_key_length, + item_sum->ram_limitation(thd), 0, + false, 1); DBUG_RETURN(tree == 0); } } @@ -959,6 +970,46 @@ void Aggregator_distinct::clear() } +/* + @brief + Insert a record inside the Unique tree + + @retval + -1 NULL value, record rejected + 0 record successfully inserted into the tree + 1 error +*/ +int Aggregator_distinct::insert_record_to_unique() +{ + if (tree->is_variable_sized()) + { + uchar *rec_ptr; + Descriptor *descriptor= tree->get_descriptor(); + if ((rec_ptr= descriptor->make_record(true)) == NULL) + return -1; // NULL value + DBUG_ASSERT(descriptor->get_length_of_key(rec_ptr) <= tree->get_size()); + return tree->unique_add(rec_ptr); + } + + copy_fields(tmp_table_param); + if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) + return 1; + + for (Field **field=table->field ; *field ; field++) + if ((*field)->is_real_null(0)) + return -1; // Don't count NULL + + /* + The first few bytes of record (at least one) are just markers + for deleted and NULLs. We want to skip them since they will + bloat the tree without providing any valuable info. Besides, + key_length used to initialize the tree didn't include space for them. + */ + + return tree->unique_add(table->record[0] + table->s->null_bytes); +} + + /** Process incoming row. @@ -980,9 +1031,16 @@ bool Aggregator_distinct::add() if (always_null) return 0; - if (item_sum->sum_func() == Item_sum::COUNT_FUNC || - item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + if (item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) { + if (tree) + { + int retval= insert_record_to_unique(); + if (retval == -1) + return false; + return retval != 0; + } + int error; copy_fields(tmp_table_param); if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) @@ -990,18 +1048,8 @@ bool Aggregator_distinct::add() for (Field **field=table->field ; *field ; field++) if ((*field)->is_real_null(0)) - return 0; // Don't count NULL + return 0; // Don't count NULL - if (tree) - { - /* - The first few bytes of record (at least one) are just markers - for deleted and NULLs. We want to skip them since they will - bloat the tree without providing any valuable info. Besides, - key_length used to initialize the tree didn't include space for them. - */ - return tree->unique_add(table->record[0] + table->s->null_bytes); - } if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))) && table->file->is_fatal_error(error, HA_CHECK_DUP)) return TRUE; @@ -1014,6 +1062,8 @@ bool Aggregator_distinct::add() return 0; DBUG_ASSERT(tree); item_sum->null_value= 0; + + DBUG_ASSERT(tree->is_variable_sized() == false); /* '0' values are also stored in the tree. This doesn't matter for SUM(DISTINCT), but is important for AVG(DISTINCT) @@ -1052,7 +1102,7 @@ void Aggregator_distinct::endup() { DBUG_ASSERT(item_sum->fixed == 1); Item_sum_count *sum= (Item_sum_count *)item_sum; - if (tree && tree->elements == 0) + if (tree && tree->is_in_memory()) { /* everything fits in memory */ sum->count= (longlong) tree->elements_in_tree(); @@ -3566,51 +3616,34 @@ int group_concat_key_cmp_with_distinct(void* arg, const void* key1, */ int group_concat_key_cmp_with_distinct_with_nulls(void* arg, - const void* key1_arg, - const void* key2_arg) + const void* key1, + const void* key2) { Item_func_group_concat *item_func= (Item_func_group_concat*)arg; + return item_func->unique_filter->get_descriptor() + ->compare_keys((uchar *)key1, (uchar *)key2); +} - uchar *key1= (uchar*)key1_arg + item_func->table->s->null_bytes; - uchar *key2= (uchar*)key2_arg + item_func->table->s->null_bytes; - - /* - JSON_ARRAYAGG function only accepts one argument. - */ - - Item *item= item_func->args[0]; - /* - If item is a const item then either get_tmp_table_field returns 0 - or it is an item over a const table. - */ - if (item->const_item()) - return 0; - /* - We have to use get_tmp_table_field() instead of - real_item()->get_tmp_table_field() because we want the field in - the temporary table, not the original field - */ - Field *field= item->get_tmp_table_field(); - - if (!field) - return 0; - if (field->is_null_in_record((uchar*)key1_arg) && - field->is_null_in_record((uchar*)key2_arg)) - return 0; +/** + Compares the packed values for fields in expr list of GROUP_CONCAT. - if (field->is_null_in_record((uchar*)key1_arg)) - return -1; + @return + @retval -1 : key1 < key2 + @retval 0 : key1 = key2 + @retval 1 : key1 > key2 +*/ - if (field->is_null_in_record((uchar*)key2_arg)) - return 1; +int group_concat_packed_key_cmp_with_distinct(void *arg, + const void *a_ptr, + const void *b_ptr) +{ + Item_func_group_concat *item_func= (Item_func_group_concat*)arg; - uint offset= (field->offset(field->table->record[0]) - - field->table->s->null_bytes); - int res= field->cmp(key1 + offset, key2 + offset); - if (res) - return res; - return 0; + DBUG_ASSERT(item_func->unique_filter); + uchar *a= (uchar*)a_ptr; + uchar *b= (uchar*)b_ptr; + return item_func->unique_filter->get_descriptor()->compare_keys(a, b); } @@ -3774,9 +3807,9 @@ void Item_func_group_concat::cut_max_length(String *result, Append data from current leaf to item->result. */ -extern "C" -int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), - void* item_arg) +int Item_func_group_concat::dump_leaf_key(void* key_arg, + element_count count __attribute__((unused)), + void* item_arg) { Item_func_group_concat *item= (Item_func_group_concat *) item_arg; TABLE *table= item->table; @@ -3784,7 +3817,7 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), String tmp((char *)table->record[1], table->s->reclength, default_charset_info); String tmp2; - uchar *key= (uchar *) key_arg; + const uchar *key= (const uchar *) key_arg; String *result= &item->result; Item **arg= item->args, **arg_end= item->args + item->arg_count_field; uint old_length= result->length(); @@ -3866,6 +3899,128 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), /** + Append data from current leaf of variable size to item->result. +*/ +int +Item_func_group_concat::dump_leaf_variable_sized_key(void *key_arg, + element_count __attribute__((unused)), + void *item_arg) +{ + Item_func_group_concat *item= (Item_func_group_concat *) item_arg; + TABLE *table= item->table; + uint max_length= (uint)table->in_use->variables.group_concat_max_len; + String tmp((char *)table->record[1], table->s->reclength, + default_charset_info); + String tmp2; + + const uchar *key= (const uchar *) key_arg; + const uchar *key_end= NULL; + String *result= &item->result; + + Item **arg= item->args, **arg_end= item->args + item->arg_count_field; + + uint old_length= result->length(); + SORT_FIELD *pos; + + pos= item->unique_filter->get_descriptor()->get_sortorder(); + key_end= key + item->unique_filter->get_full_size(); + key+= Variable_size_keys_descriptor::size_of_length_field; + + ulonglong *offset_limit= &item->copy_offset_limit; + ulonglong *row_limit = &item->copy_row_limit; + if (item->limit_clause && !(*row_limit)) + { + item->result_finalized= true; + return 1; + } + + tmp.length(0); + + if (item->limit_clause && (*offset_limit)) + { + item->row_count++; + (*offset_limit)--; + return 0; + } + + if (!item->result_finalized) + item->result_finalized= true; + else + result->append(*item->separator); + + for (; arg < arg_end; arg++) + { + String *res; + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + We also can't use table->field array to access the fields + because it contains both order and arg list fields. + */ + if ((*arg)->const_item()) + res= item->get_str_from_item(*arg, &tmp); + else + { + Field *field= (*arg)->get_tmp_table_field(); + if (field) + { + if (field->maybe_null()) + { + if (*key == 0) + { + // Case with NULL value + field->set_null(); + res= item->get_str_from_field(*arg, field, &tmp); + key++; + } + else + { + // Case with NOT NULL value + field->set_notnull(); + const uchar *end= field->unpack(field->ptr, key+1, key_end, 0); + res= item->get_str_from_field(*arg, field, &tmp); + key= end; + } + } + else + { + const uchar *end= field->unpack(field->ptr, key, key_end, 0); + res= item->get_str_from_field(*arg, field, &tmp); + key= end; + } + pos++; + } + } + + if (res) + result->append(*res); + } + + if (item->limit_clause) + (*row_limit)--; + item->row_count++; + + /* stop if length of result more than max_length */ + if (result->length() > max_length) + { + THD *thd= current_thd; + item->cut_max_length(result, old_length, max_length); + item->warning_for_row= TRUE; + report_cut_value_error(thd, item->row_count, item->func_name()); + + /** + To avoid duplicated warnings in Item_func_group_concat::val_str() + */ + if (table && table->blob_storage) + table->blob_storage->set_truncated_value(false); + return 1; + } + return 0; +} + + +/** Constructor of Item_func_group_concat. @param distinct_arg distinct @@ -4137,49 +4292,57 @@ bool Item_func_group_concat::add(bool exclude_nulls) { if (always_null && exclude_nulls) return 0; - copy_fields(tmp_table_param); - if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) - return TRUE; size_t row_str_len= 0; StringBuffer<MAX_FIELD_WIDTH> buf; String *res; - for (uint i= 0; i < arg_count_field; i++) - { - Item *show_item= args[i]; - if (show_item->const_item()) - continue; - - Field *field= show_item->get_tmp_table_field(); - if (field) - { - if (field->is_null_in_record((const uchar*) table->record[0]) && - exclude_nulls) - return 0; // Skip row if it contains null - if (tree && (res= field->val_str(&buf))) - row_str_len+= res->length(); - } - else - { - /* - should not reach here, we create temp table for all the arguments of - the group_concat function - */ - DBUG_ASSERT(0); - } - } - - null_value= FALSE; bool row_eligible= TRUE; - if (distinct) + if (distinct) { - /* Filter out duplicate rows. */ uint count= unique_filter->elements_in_tree(); - unique_filter->unique_add(get_record_pointer()); + int retval= insert_record_to_unique(); + if (retval == -1) + return false; if (count == unique_filter->elements_in_tree()) row_eligible= FALSE; } + else + { + copy_fields(tmp_table_param); + if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) + return TRUE; + } + + if (!distinct || tree) + { + for (uint i= 0; i < arg_count_field; i++) + { + Item *show_item= args[i]; + if (show_item->const_item()) + continue; + + Field *field= show_item->get_tmp_table_field(); + if (field) + { + if (field->is_null_in_record((const uchar*) table->record[0]) && + exclude_nulls) + return 0; // Skip row if it contains null + if (tree && (res= field->val_str(&buf))) + row_str_len+= res->length(); + } + else + { + /* + should not reach here, we create temp table for all the arguments of + the group_concat function + */ + DBUG_ASSERT(0); + } + } + } + + null_value= FALSE; TREE_ELEMENT *el= 0; // Only for safety if (row_eligible && tree) @@ -4292,16 +4455,22 @@ bool Item_func_group_concat::setup(THD *thd) /* Push all not constant fields to the list and create a temp table */ always_null= 0; + uint non_const_items= 0; for (uint i= 0; i < arg_count_field; i++) { Item *item= args[i]; if (list.push_back(item, thd->mem_root)) DBUG_RETURN(TRUE); - if (item->const_item() && item->is_null() && skip_nulls()) + if (item->const_item()) { - always_null= 1; - DBUG_RETURN(FALSE); + if (item->is_null() && skip_nulls()) + { + always_null= 1; + DBUG_RETURN(FALSE); + } } + else + non_const_items++; } List<Item> all_fields(list); @@ -4384,6 +4553,7 @@ bool Item_func_group_concat::setup(THD *thd) the row is not added to the result. */ uint tree_key_length= table->s->reclength - table->s->null_bytes; + bool allow_packing= is_packing_allowed(&tree_key_length); if (arg_count_order) { @@ -4402,10 +4572,19 @@ bool Item_func_group_concat::setup(THD *thd) } if (distinct) - unique_filter= new Unique(get_comparator_function_for_distinct(), - (void*)this, - tree_key_length + get_null_bytes(), - ram_limitation(thd)); + { + unique_filter= get_unique(get_comparator_function_for_distinct(allow_packing), + (void*)this, + tree_key_length + get_null_bytes(), + ram_limitation(thd), 0, allow_packing, + non_const_items); + + if (!unique_filter || + (unique_filter->get_descriptor()->setup_for_item(thd, this, + non_const_items, + arg_count_field))) + DBUG_RETURN(TRUE); + } if ((row_limit && row_limit->cmp_type() != INT_RESULT) || (offset_limit && offset_limit->cmp_type() != INT_RESULT)) { @@ -4440,7 +4619,10 @@ String* Item_func_group_concat::val_str(String* str) if (tree != NULL) // order by tree_walk(tree, &dump_leaf_key, this, left_root_right); else if (distinct) // distinct (and no order by). - unique_filter->walk(table, &dump_leaf_key, this); + unique_filter->walk(table, + unique_filter->is_variable_sized() ? + dump_leaf_variable_sized_key : + dump_leaf_key, this); else if (row_limit && copy_row_limit == (ulonglong)row_limit->val_int()) return &result; else @@ -4461,13 +4643,18 @@ String* Item_func_group_concat::val_str(String* str) /* @brief Get the comparator function for DISTINT clause + + @param packed TRUE if the record is stored in a packed format */ -qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct() +qsort_cmp2 +Item_func_group_concat::get_comparator_function_for_distinct(bool packed) { - return skip_nulls() ? - group_concat_key_cmp_with_distinct : - group_concat_key_cmp_with_distinct_with_nulls; + return packed ? + group_concat_packed_key_cmp_with_distinct : + (skip_nulls() ? + group_concat_key_cmp_with_distinct : + group_concat_key_cmp_with_distinct_with_nulls); } @@ -4495,9 +4682,9 @@ qsort_cmp2 Item_func_group_concat::get_comparator_function_for_order_by() uchar* Item_func_group_concat::get_record_pointer() { - return skip_nulls() ? - table->record[0] + table->s->null_bytes : - table->record[0]; + return skip_nulls() ? + table->record[0] + table->s->null_bytes : + table->record[0]; } @@ -4518,6 +4705,147 @@ uint Item_func_group_concat::get_null_bytes() } +/* + @brief + Checks whether the unique tree is packed or not + + @retval + TRUE tree stored packed values + FALSE otherwise +*/ + +bool Item_func_group_concat::is_distinct_packed() +{ + return unique_filter && unique_filter->is_variable_sized(); +} + + +/* + @brief + Checks if one can store packed values in the Unique tree + + @param + total_length [OUT] length of the key in the tree + + @retval + TRUE packing allowed + FALSE packing not allowed +*/ + +bool Item_func_group_concat::is_packing_allowed(uint* total_length) +{ + /* + Currently Unique is not packed if ORDER BY clause is used + This is a limitation as of now. + */ + if (!distinct || arg_count_order) + return false; + + return Item_sum::is_packing_allowed(table, total_length); +} + + +/* + @brief + Check if storing packed values inside the Unique tree is allowed + + @param table Table structure + @total_length [OUT] max length of the packed key(takes into account + the length bytes also) + + @retval + TRUE packing is allowed + FALSE otherwise +*/ + +bool Item_sum::is_packing_allowed(TABLE *table, uint* total_length) +{ + uint size_of_packable_fields= 0; + uint tot_length= 0; + for (uint i= 0; i < get_arg_count(); i++) + { + Item *item= args[i]; + if (item->const_item()) + continue; + Field *field= item->get_tmp_table_field(); + if (field) + { + /* + For blob columns packing is not allowed + */ + if (field->flags & BLOB_FLAG) + return false; + + // 1 byte for nullability; + tot_length+= MY_TEST(field->maybe_null()); + + tot_length+= field->sort_length_without_suffix(); + if (field->is_packable()) + { + size_of_packable_fields+= + number_storage_requirement(field->sort_length_without_suffix()); + } + } + } + + // All fields are of fixed size + if (size_of_packable_fields == 0) + return false; + + /* + TODO varun: we can introduce a heuristic here, no need to do packing if we + have small VARCHAR or CHAR + */ + *total_length= tot_length; + /* + Unique::size_of_lengt_field is the length bytes to store the packed length + for each record inserted in the Unique tree + */ + (*total_length)+= Variable_size_keys_descriptor::size_of_length_field + + size_of_packable_fields; + return true; +} + + +/* + @brief + Get unique instance to filter out duplicates + + @param comp_func compare function + @param comp_func_fixed_arg arg passed to the comparison function + @param size_arg max length of the key + @param max_in_memory_size_arg max memory available for Unique + @param min_dupl_count_arg > 0 , the count for each value needs + to be stored also + @param allow_packing TRUE: Variable size keys are allowed + @param number_of_args Number of args involved in DISTINCT + + + @retval + NOT NULL instance of Unique class returned + NULL ERROR + +*/ +Unique_impl* +Item_sum::get_unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, + uint size_arg, size_t max_in_memory_size_arg, + uint min_dupl_count_arg, bool allow_packing, + uint number_of_args) +{ + Descriptor *desc; + + if (allow_packing) + desc= get_descriptor_for_variable_size_keys(number_of_args, size_arg); + else + desc= get_descriptor_for_fixed_size_keys(number_of_args, size_arg); + + if (!desc) + return NULL; + return new Unique_impl(comp_func, comp_func_fixed_arg, size_arg, + max_in_memory_size_arg, min_dupl_count_arg, desc); +} + + void Item_func_group_concat::print(String *str, enum_query_type query_type) { str->append(func_name()); @@ -4570,3 +4898,104 @@ Item_func_group_concat::~Item_func_group_concat() if (!original && unique_filter) delete unique_filter; } + + +/* + @brief + Insert a record inside the Unique tree + + @retval + -1 NULL value, record rejected + 0 record successfully inserted into the tree + 1 error +*/ + +int Item_func_group_concat::insert_record_to_unique() +{ + if (unique_filter->is_variable_sized() && unique_filter->is_single_arg()) + return insert_packed_record_to_unique(); + + copy_fields(tmp_table_param); + if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) + return 1; + + for (Field **field=table->field ; *field ; field++) + if (skip_nulls() && (*field)->is_real_null(0)) + return -1; // Don't count NULL + + /* + The first few bytes of record (at least one) are just markers + for deleted and NULLs. We want to skip them since they will + bloat the tree without providing any valuable info. Besides, + key_length used to initialize the tree didn't include space for them. + */ + + if (unique_filter->is_variable_sized()) + return insert_packed_record_to_unique(); + + return unique_filter->unique_add(get_record_pointer()); +} + + +/* + @brief + Insert a packed record inside the Unique tree + + @retval + -1 NULL value, record rejected + 0 record successfully inserted into the tree + 1 error +*/ + +int Item_func_group_concat::insert_packed_record_to_unique() +{ + Descriptor *descriptor= unique_filter->get_descriptor(); + uchar *rec_ptr; + if ((rec_ptr= descriptor->make_record(skip_nulls())) == NULL) + return -1; // NULL value + DBUG_ASSERT(descriptor->get_length_of_key(rec_ptr) + <= unique_filter->get_size()); + return unique_filter->unique_add(rec_ptr); +} + + +Descriptor *Item_sum::get_descriptor_for_fixed_size_keys(uint args_count, + uint size_arg) +{ + if (args_count == 1) + return new Fixed_size_keys_descriptor(size_arg); + else + return new Fixed_size_composite_keys_descriptor(size_arg); +} + + +Descriptor *Item_sum::get_descriptor_for_variable_size_keys(uint args_count, + uint size_arg) +{ + if (args_count == 1) + return new Variable_size_keys_simple(size_arg); + else + return new Variable_size_composite_key_desc(size_arg); +} + + +Descriptor* +Item_func_group_concat::get_descriptor_for_fixed_size_keys(uint args_count, + uint size_arg) +{ + if (args_count == 1 && !skip_nulls()) + return new Fixed_size_keys_descriptor_with_nulls(size_arg); + else + return new Fixed_size_keys_for_group_concat(size_arg); +} + + +Descriptor* +Item_func_group_concat::get_descriptor_for_variable_size_keys(uint args_count, + uint size_arg) +{ + if (args_count == 1) + return new Variable_size_keys_simple(size_arg); + else + return new Variable_size_composite_key_desc_for_gconcat(size_arg); +} diff --git a/sql/item_sum.h b/sql/item_sum.h index 118f78ec5c1..a0ff16bec1b 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -29,6 +29,7 @@ class Item_sum; class Aggregator_distinct; class Aggregator_simple; +class Descriptor; /** The abstract base class for the Aggregator_* classes. @@ -311,6 +312,7 @@ class Window_spec; any particular table (like COUNT(*)), returm 0 from Item_sum::used_tables(), but still return false from Item_sum::const_item(). */ +class Unique_impl; class Item_sum :public Item_func_or_sum { @@ -590,12 +592,19 @@ public: bool with_sum_func() const { return true; } virtual void set_partition_row_count(ulonglong count) { DBUG_ASSERT(0); } + bool is_packing_allowed(TABLE* table, uint* total_length); + virtual Unique_impl *get_unique(qsort_cmp2 comp_func, + void *comp_func_fixed_arg, + uint size_arg, size_t max_in_memory_size_arg, + uint min_dupl_count_arg, bool allow_packing, + uint number_of_args); + virtual Descriptor *get_descriptor_for_fixed_size_keys(uint args_count, + uint size_arg); + virtual Descriptor *get_descriptor_for_variable_size_keys(uint args_count, + uint size_arg); }; -class Unique; - - /** The distinct aggregator. Implements AGGFN (DISTINCT ..) @@ -650,7 +659,7 @@ class Aggregator_distinct : public Aggregator For AVG/SUM(DISTINCT) we always use this tree (as it takes a single argument) to get the distinct rows. */ - Unique *tree; + Unique_impl *tree; /* The length of the temp table row. Must be a member of the class as it @@ -696,7 +705,8 @@ public: bool unique_walk_function(void *element); bool unique_walk_function_for_count(void *element); - static int composite_key_cmp(void* arg, uchar* key1, uchar* key2); + int insert_record_to_unique(); + static int key_cmp(void* arg, uchar* key1, uchar* key2); }; @@ -1856,9 +1866,6 @@ int group_concat_key_cmp_with_order(void* arg, const void* key1, const void* key2); int group_concat_key_cmp_with_order_with_nulls(void *arg, const void *key1, const void *key2); -int dump_leaf_key(void* key_arg, - element_count count __attribute__((unused)), - void* item_arg); C_MODE_END class Item_func_group_concat : public Item_sum @@ -1879,7 +1886,7 @@ protected: @see Item_func_group_concat::add @see Item_func_group_concat::clear */ - Unique *unique_filter; + Unique_impl *unique_filter; TABLE *table; ORDER **order; Name_resolution_context *context; @@ -1921,13 +1928,13 @@ protected: friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg, const void* key1, const void* key2); + friend int group_concat_packed_key_cmp_with_distinct(void *arg, + const void *key1, + const void *key2); friend int group_concat_key_cmp_with_order(void* arg, const void* key1, const void* key2); friend int group_concat_key_cmp_with_order_with_nulls(void *arg, const void *key1, const void *key2); - friend int dump_leaf_key(void* key_arg, - element_count count __attribute__((unused)), - void* item_arg); bool repack_tree(THD *thd); @@ -1942,6 +1949,9 @@ protected: virtual String *get_str_from_field(Item *i, Field *f, String *tmp, const uchar *key, size_t offset) { return f->val_str(tmp, key + offset); } + virtual String *get_str_from_field(Item *i, Field *f, String *tmp) + { return f->val_str(tmp); } + virtual void cut_max_length(String *result, uint old_length, uint max_length) const; public: @@ -2016,11 +2026,24 @@ public: { context= (Name_resolution_context *)cntx; return FALSE; } Item *get_copy(THD *thd) { return get_item_copy<Item_func_group_concat>(thd, this); } - qsort_cmp2 get_comparator_function_for_distinct(); + qsort_cmp2 get_comparator_function_for_distinct(bool packed); qsort_cmp2 get_comparator_function_for_order_by(); uchar* get_record_pointer(); uint get_null_bytes(); - + bool is_distinct_packed(); + bool is_packing_allowed(uint* total_length); + static int dump_leaf_key(void* key_arg, + element_count count __attribute__((unused)), + void* item_arg); + static int dump_leaf_variable_sized_key(void *key_arg, + element_count __attribute__((unused)), + void *item_arg); + int insert_record_to_unique(); + int insert_packed_record_to_unique(); + Descriptor *get_descriptor_for_fixed_size_keys(uint args_count, + uint size_arg) override; + Descriptor *get_descriptor_for_variable_size_keys(uint args_count, + uint size_arg) override; }; #endif /* ITEM_SUM_INCLUDED */ diff --git a/sql/opt_range.cc b/sql/opt_range.cc index cb5a0604733..8c69f7f5b0b 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -5227,7 +5227,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, /* Add Unique operations cost */ unique_calc_buff_size= - Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records, + Unique_impl::get_cost_calc_buff_size((ulong)non_cpk_scan_records, param->table->file->ref_length, (size_t)param->thd->variables.sortbuff_size); if (param->imerge_cost_buff_size < unique_calc_buff_size) @@ -5239,7 +5239,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, } { - const double dup_removal_cost= Unique::get_use_cost( + const double dup_removal_cost= Unique_impl::get_use_cost( param->imerge_cost_buff, (uint)non_cpk_scan_records, param->table->file->ref_length, (size_t)param->thd->variables.sortbuff_size, @@ -5849,7 +5849,7 @@ bool prepare_search_best_index_intersect(PARAM *param, return TRUE; size_t calc_cost_buff_size= - Unique::get_cost_calc_buff_size((size_t)records_in_scans, + Unique_impl::get_cost_calc_buff_size((size_t)records_in_scans, common->key_size, common->max_memory_size); if (!(common->buff_elems= (uint *) alloc_root(param->mem_root, @@ -6198,7 +6198,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, */ ha_rows elems_in_tree= common_info->search_scans[0]->records- common_info->search_scans[0]->filtered_out ; - next->in_memory_cost+= Unique::get_search_cost(elems_in_tree, + next->in_memory_cost+= Unique_impl::get_search_cost(elems_in_tree, common_info->compare_factor)* ext_index_scan_records; cost= next->in_memory_cost; @@ -6211,7 +6211,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, size_t max_memory_size= common_info->max_memory_size; records_sent_to_unique+= ext_index_scan_records; - cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size, + cost= Unique_impl::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size, max_memory_size, compare_factor, TRUE, &next->in_memory); if (records_filtered_out_by_cpk) @@ -6221,7 +6221,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, double cost2; bool in_memory2; ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk; - cost2= Unique::get_use_cost(buff_elems, (size_t) records2, key_size, + cost2= Unique_impl::get_use_cost(buff_elems, (size_t) records2, key_size, max_memory_size, compare_factor, TRUE, &in_memory2); cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan, @@ -11746,12 +11746,12 @@ int read_keys_and_merge_scans(THD *thd, READ_RECORD *read_record, bool intersection, key_map *filtered_scans, - Unique **unique_ptr) + Unique_impl **unique_ptr) { List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects); QUICK_RANGE_SELECT* cur_quick; int result; - Unique *unique= *unique_ptr; + Unique_impl *unique= *unique_ptr; handler *file= head->file; bool with_cpk_filter= pk_quick_select != NULL; DBUG_ENTER("read_keys_and_merge"); @@ -11778,10 +11778,14 @@ int read_keys_and_merge_scans(THD *thd, DBUG_EXECUTE_IF("only_one_Unique_may_be_created", DBUG_SET("+d,index_merge_may_not_create_a_Unique"); ); - unique= new Unique(refpos_order_cmp, (void *)file, - file->ref_length, - (size_t)thd->variables.sortbuff_size, - intersection ? quick_selects.elements : 0); + Descriptor *desc= new Fixed_size_keys_for_rowids(file); + + if (!desc) + goto err; + unique= new Unique_impl(refpos_cmp, (void *)desc, + file->ref_length, + (size_t)thd->variables.sortbuff_size, + intersection ? quick_selects.elements : 0, desc); if (!unique) goto err; *unique_ptr= unique; @@ -11792,7 +11796,7 @@ int read_keys_and_merge_scans(THD *thd, } DBUG_ASSERT(file->ref_length == unique->get_size()); - DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size()); + DBUG_ASSERT(thd->variables.sortbuff_size <= unique->get_max_in_memory_size()); for (;;) { @@ -11850,7 +11854,7 @@ int read_keys_and_merge_scans(THD *thd, */ head->file->ha_end_keyread(); if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0, - &unique->sort, 1 , 1, TRUE)) + unique->get_sort(), 1 , 1, TRUE)) result= 1; DBUG_RETURN(result); @@ -11893,7 +11897,7 @@ int QUICK_INDEX_MERGE_SELECT::get_next() result= HA_ERR_END_OF_FILE; end_read_record(&read_record); // Free things used by sort early. Shouldn't be strictly necessary - unique->sort.reset(); + unique->get_sort()->reset(); /* All rows from Unique have been retrieved, do a clustered PK scan */ if (pk_quick_select) { @@ -11928,7 +11932,7 @@ int QUICK_INDEX_INTERSECT_SELECT::get_next() { result= HA_ERR_END_OF_FILE; end_read_record(&read_record); - unique->sort.reset(); // Free things early + unique->get_sort()->reset(); // Free things early } DBUG_RETURN(result); diff --git a/sql/opt_range.h b/sql/opt_range.h index 664e821f8d1..6cd3a95ba0a 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1155,7 +1155,7 @@ private: READ_RECORD *read_record, bool intersection, key_map *filtered_scans, - Unique **unique_ptr); + Unique_impl **unique_ptr); }; @@ -1246,7 +1246,7 @@ public: class QUICK_INDEX_SORT_SELECT : public QUICK_SELECT_I { protected: - Unique *unique; + Unique_impl *unique; public: QUICK_INDEX_SORT_SELECT(THD *thd, TABLE *table); ~QUICK_INDEX_SORT_SELECT(); diff --git a/sql/sql_class.h b/sql/sql_class.h index 6bd08343c9e..c60827aa207 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -6532,9 +6532,13 @@ struct SORT_FIELD_ATTR uchar *b, size_t *b_len); int compare_packed_varstrings(uchar *a, size_t *a_len, uchar *b, size_t *b_len); + int compare_packed_varstrings(uchar *a, uchar *b); bool check_if_packing_possible(THD *thd) const; bool is_variable_sized() { return type == VARIABLE_SIZE; } void set_length_and_original_length(THD *thd, uint length_arg); + void setup_key_part_for_variable_size_key(Field *fld); + void setup_key_part_for_fixed_size_key(Field *fld); + int compare_nullability(uchar *a, uchar *b); }; @@ -6543,6 +6547,11 @@ struct SORT_FIELD: public SORT_FIELD_ATTR Field *field; /* Field to sort */ Item *item; /* Item if not sorting fields */ bool reverse; /* if descending sort */ + void setup_key_part_for_variable_size_key(Field *fld); + void setup_key_part_for_variable_size_key(Item *item); + void setup_key_part_for_fixed_size_key(Field *fld); + int compare_fixed_size_vals(uchar *a, size_t *a_len, + uchar *b, size_t *b_len); }; @@ -6654,7 +6663,7 @@ class SORT_INFO; class multi_delete :public select_result_interceptor { TABLE_LIST *delete_tables, *table_being_deleted; - Unique **tempfiles; + Unique_impl **tempfiles; ha_rows deleted, found; uint num_of_tables; int error; diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index f3472cac330..48359706a0f 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -319,7 +319,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, bool binlog_is_row; Explain_delete *explain; Delete_plan query_plan(thd->mem_root); - Unique * deltempfile= NULL; + Unique_impl *deltempfile= NULL; bool delete_record= false; bool delete_while_scanning; bool portion_of_time_through_update; @@ -715,9 +715,16 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, clause. Instead of deleting the rows, first mark them deleted. */ ha_rows tmplimit=limit; - deltempfile= new (thd->mem_root) Unique (refpos_order_cmp, table->file, - table->file->ref_length, - MEM_STRIP_BUF_SIZE); + Descriptor *desc= new Fixed_size_keys_for_rowids(table->file); + if (!desc) + goto terminate_delete; // OOM + + deltempfile= new (thd->mem_root) Unique_impl(refpos_cmp, desc, + table->file->ref_length, + MEM_STRIP_BUF_SIZE, 0, desc); + + if (!deltempfile) + goto terminate_delete; // OOM THD_STAGE_INFO(thd, stage_searching_rows_for_update); while (!(error=info.read_record()) && !thd->killed && @@ -726,8 +733,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, if (record_should_be_deleted(thd, table, select, explain, delete_history)) { table->file->position(table->record[0]); - if (unlikely((error= - deltempfile->unique_add((char*) table->file->ref)))) + if (unlikely((error= deltempfile->unique_add(table->file->ref)))) { error= 1; goto terminate_delete; @@ -739,7 +745,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, end_read_record(&info); if (unlikely(deltempfile->get(table)) || unlikely(table->file->ha_index_or_rnd_end()) || - unlikely(init_read_record(&info, thd, table, 0, &deltempfile->sort, 0, + unlikely(init_read_record(&info, thd, table, 0, deltempfile->get_sort(), 0, 1, false))) { error= 1; @@ -1071,6 +1077,13 @@ extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b) return file->cmp_ref((const uchar*)a, (const uchar*)b); } + +extern "C" int refpos_cmp(void* arg, const void *a, const void *b) +{ + Fixed_size_keys_for_rowids *desc= (Fixed_size_keys_for_rowids *) arg; + return desc->compare_keys((uchar*)a, (uchar *)b); +} + /* make delete specific preparation and checks after opening tables @@ -1174,7 +1187,8 @@ multi_delete::multi_delete(THD *thd_arg, TABLE_LIST *dt, uint num_of_tables_arg) num_of_tables(num_of_tables_arg), error(0), do_delete(0), transactional_tables(0), normal_tables(0), error_handled(0) { - tempfiles= (Unique **) thd_arg->calloc(sizeof(Unique *) * num_of_tables); + tempfiles= + (Unique_impl **) thd_arg->calloc(sizeof(Unique_impl *) * num_of_tables); } @@ -1208,7 +1222,7 @@ bool multi_delete::initialize_tables(JOIN *join) { TABLE_LIST *walk; - Unique **tempfiles_ptr; + Unique_impl **tempfiles_ptr; DBUG_ENTER("initialize_tables"); if (unlikely((thd->variables.option_bits & OPTION_SAFE_UPDATES) && @@ -1278,12 +1292,23 @@ multi_delete::initialize_tables(JOIN *join) table_being_deleted= delete_tables; walk= walk->next_local; } + Unique_impl *unique; + Descriptor *desc; for (;walk ;walk= walk->next_local) { TABLE *table=walk->table; - *tempfiles_ptr++= new (thd->mem_root) Unique (refpos_order_cmp, table->file, - table->file->ref_length, - MEM_STRIP_BUF_SIZE); + desc= new Fixed_size_keys_for_rowids(table->file); + if (!desc) + DBUG_RETURN(TRUE); // OOM + + unique= new (thd->mem_root) Unique_impl(refpos_cmp, + desc, + table->file->ref_length, + MEM_STRIP_BUF_SIZE, + 0, desc); + if (!unique) + DBUG_RETURN(TRUE); // OOM + *tempfiles_ptr++= unique; } init_ftfuncs(thd, thd->lex->current_select, 1); DBUG_RETURN(thd->is_fatal_error); @@ -1363,7 +1388,7 @@ int multi_delete::send_data(List<Item> &values) } else { - error=tempfiles[secure_counter]->unique_add((char*) table->file->ref); + error=tempfiles[secure_counter]->unique_add(table->file->ref); if (unlikely(error)) { error= 1; // Fatal error @@ -1463,7 +1488,7 @@ int multi_delete::do_deletes() if (unlikely(tempfiles[counter]->get(table))) DBUG_RETURN(1); - local_error= do_table_deletes(table, &tempfiles[counter]->sort, + local_error= do_table_deletes(table, tempfiles[counter]->get_sort(), thd->lex->ignore); if (unlikely(thd->killed) && likely(!local_error)) diff --git a/sql/sql_select.h b/sql/sql_select.h index f39a70cc3e1..b71679ec43e 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1868,6 +1868,8 @@ int opt_sum_query(THD* thd, /* from sql_delete.cc, used by opt_range.cc */ extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b); +extern "C" int refpos_cmp(void* arg, const void *a, const void *b); + /** class to copying an field/item to a key struct */ class store_key :public Sql_alloc diff --git a/sql/sql_sort.h b/sql/sql_sort.h index 3b23328183c..5a36f3b6f6a 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -324,6 +324,8 @@ public: bool is_parameters_computed() { return parameters_computed; } void set_parameters_computed(bool val) { parameters_computed= val; } + int compare_keys(uchar *a, uchar *b); + int compare_keys_for_single_arg(uchar *a, uchar *b); static const uint size_of_length_field= 4; @@ -596,8 +598,8 @@ public: bool using_packed_sortkeys() const { - DBUG_ASSERT(m_using_packed_sortkeys == - (sort_keys != NULL && sort_keys->using_packed_sortkeys())); + DBUG_ASSERT(sort_keys == NULL || + (m_using_packed_sortkeys == sort_keys->using_packed_sortkeys())); return m_using_packed_sortkeys; } @@ -607,6 +609,11 @@ public: return addon_fields != NULL; } + void set_using_packed_keys(bool val) + { + m_using_packed_sortkeys= val; + } + uint32 get_result_length(uchar *plen) { if (!m_using_packed_addons) @@ -688,6 +695,12 @@ public: { return m_packed_format; } + void set_packed_format(bool val) + { + m_packed_format= val; + } + + uint32 get_key_length_for_unique(uchar *to, uint size_of_dupl_count); private: uint m_packable_length; diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 7b600bd45c4..85eb5630418 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1517,6 +1517,41 @@ public: } }; + +/* + @brief + Get the offset to the value for the field in the buffer + + @param + field Field structure + + @retval + offset of the value from the start of the buffer +*/ + +uint get_offset_to_value(Field *field) +{ + return Variable_size_keys_descriptor::size_of_length_field + + MY_TEST(field->maybe_null()); +} + + +/* + @brief + Get the end of the buffer storing the value for the field + @param + buffer buffer storing the value + + @retval + return end of the buffer +*/ + +uchar* get_buffer_end(uchar *buffer) +{ + return buffer + Variable_size_keys_descriptor::read_packed_length(buffer); +} + + /* Histogram_builder is a helper class that is used to build histograms for columns @@ -1570,7 +1605,16 @@ public: return 0; if (count > bucket_capacity * (curr_bucket + 1)) { - column->store_field_value((uchar *) elem, col_length); + uchar *to= (uchar* )elem; + if (column->is_packable()) + { + column->unpack(column->ptr, + to + get_offset_to_value(column), + get_buffer_end(to), 0); + } + else + column->store_field_value(to, col_length); + histogram->set_value(curr_bucket, column->pos_in_interval(min_value, max_value)); curr_bucket++; @@ -1621,7 +1665,7 @@ protected: /* Field for which the number of distinct values is to be find out */ Field *table_field; - Unique *tree; /* The helper object to contain distinct values */ + Unique_impl *tree; /* The helper object to contain distinct values */ uint tree_key_length; /* The length of the keys for the elements of 'tree */ ulonglong distincts; @@ -1646,13 +1690,11 @@ public: of the parameters to be passed to the constructor of the Unique object. */ - Count_distinct_field(Field *field, size_t max_heap_table_size) + Count_distinct_field(Field *field) { table_field= field; - tree_key_length= field->pack_length(); - - tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field, - tree_key_length, max_heap_table_size, 1); + tree_key_length= 0; + tree= NULL; } virtual ~Count_distinct_field() @@ -1670,6 +1712,63 @@ public: return (tree != NULL); } + + /* + @brief + Calculate the max length to store the length of a packable field + + @param + field Field structure + + @retval + Return the max length for a packable field + */ + + uint compute_packable_length(Field *field) + { + return table_field->max_packed_col_length(table_field->pack_length()) + + Variable_size_keys_descriptor::size_of_length_field + + MY_TEST(table_field->maybe_null()); + } + + + /* + @brief + Create and setup the Unique object for the column + + @param + thd Thread structure + max_heap_table_size max allowed size of the unique tree + + @retval + TRUE ERROR + FALSE SUCCESS + */ + + virtual bool setup(THD *thd, size_t max_heap_table_size) + { + Descriptor *desc; + if (table_field->is_packable()) + { + tree_key_length= compute_packable_length(table_field); + desc= new Variable_size_keys_simple(tree_key_length); + } + else + { + tree_key_length= table_field->pack_length(); + desc= new Fixed_size_keys_descriptor(tree_key_length); + } + if (!desc) + return true; // OOM + tree= new Unique_impl((qsort_cmp2) key_cmp, + (void*) this, tree_key_length, + max_heap_table_size, 1, desc); + if (!tree) + return true; // OOM + return tree->get_descriptor()->setup_for_field(thd, table_field); + } + + /* @brief Add the value of 'field' to the container of the Unique object 'tree' @@ -1677,6 +1776,14 @@ public: virtual bool add() { table_field->mark_unused_memory_as_defined(); + DBUG_ASSERT(tree); + if (tree->is_variable_sized()) + { + Descriptor *descriptor= tree->get_descriptor(); + uchar *rec_ptr= descriptor->make_record(true); + DBUG_ASSERT(descriptor->get_length_of_key(rec_ptr) <= tree->get_size()); + return tree->unique_add(rec_ptr); + } return tree->unique_add(table_field->ptr); } @@ -1733,17 +1840,31 @@ public: return table_field->collected_stats->histogram.get_values(); } + static int key_cmp(void* arg, uchar* key1, uchar* key2); }; +/* + @brief + Compare function for packed keys + @param arg Pointer to the relevant class instance + @param key1 left key image + @param key2 right key image -static -int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2) + + @return comparison result + @retval < 0 if key1 < key2 + @retval = 0 if key1 = key2 + @retval > 0 if key1 > key2 +*/ +int Count_distinct_field::key_cmp(void* arg, + uchar* key1, + uchar* key2) { - ulonglong *val1= (ulonglong *) key1; - ulonglong *val2= (ulonglong *) key2; - return *val1 > *val2 ? 1 : *val1 == *val2 ? 0 : -1; + Count_distinct_field *compare_arg= (Count_distinct_field*)arg; + DBUG_ASSERT(compare_arg->tree->get_descriptor()); + return compare_arg->tree->get_descriptor()->compare_keys(key1, key2); } - + /* The class Count_distinct_field_bit is derived from the class @@ -1755,24 +1876,52 @@ class Count_distinct_field_bit: public Count_distinct_field { public: - Count_distinct_field_bit(Field *field, size_t max_heap_table_size) - { - table_field= field; - tree_key_length= sizeof(ulonglong); - - tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, - (void*) &tree_key_length, - tree_key_length, max_heap_table_size, 1); - } + Count_distinct_field_bit(Field *field): Count_distinct_field(field){} bool add() { - longlong val= table_field->val_int(); + longlong val= table_field->val_int(); return tree->unique_add(&val); } + bool setup(THD *thd, size_t max_heap_table_size) override + { + tree_key_length= sizeof(ulonglong); + Descriptor *desc= new Fixed_size_keys_mem_comparable(tree_key_length); + if (!desc) + return true; + tree= new Unique_impl((qsort_cmp2) simple_ulonglong_key_cmp, + (void*) this, + tree_key_length, max_heap_table_size, 1, desc); + return tree == NULL; + } + static int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2); }; +/* + @brief + Compare function for Bit fields + + @param arg Pointer to the relevant class instance + @param key1 left key image + @param key2 right key image + + + @return comparison result + @retval < 0 if key1 < key2 + @retval = 0 if key1 = key2 + @retval > 0 if key1 > key2 +*/ +int Count_distinct_field_bit::simple_ulonglong_key_cmp(void* arg, + uchar* key1, + uchar* key2) +{ + Count_distinct_field_bit *compare_arg= (Count_distinct_field_bit*)arg; + DBUG_ASSERT(compare_arg->tree->get_descriptor()); + return compare_arg->tree->get_descriptor()->compare_keys(key1, key2); +} + + /* The class Index_prefix_calc is a helper class used to calculate the values for the column 'avg_frequency' of the statistical table index_stats. @@ -2344,8 +2493,11 @@ void Column_statistics_collected::init(THD *thd, Field *table_field) { count_distinct= table_field->type() == MYSQL_TYPE_BIT ? - new Count_distinct_field_bit(table_field, max_heap_table_size) : - new Count_distinct_field(table_field, max_heap_table_size); + new Count_distinct_field_bit(table_field) : + new Count_distinct_field(table_field); + + if (count_distinct && count_distinct->setup(thd, max_heap_table_size)) + count_distinct= NULL; } if (count_distinct && !count_distinct->exists()) count_distinct= NULL; diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 20ecf06bfee..bcb9671643b 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -139,6 +139,8 @@ double get_column_range_cardinality(Field *field, uint range_flag); bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table); bool is_eits_usable(Field* field); +uint get_offset_to_value(Field *field); +uchar *get_buffer_end(uchar *to); class Histogram { diff --git a/sql/sql_type.h b/sql/sql_type.h index 271e74a9762..4e8f6d3d1e4 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -3977,10 +3977,16 @@ public: /* create a compact size key part for a sort key + + @param to buffer to store value of keypart + @param item item corresponding to the keypart + @param sort_field sort field structure + @param tmp_buffer temporary buffer to store the packed value + if needed */ virtual uint make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const=0; + String *tmp_buffer) const=0; virtual void sort_length(THD *thd, const Type_std_attributes *item, @@ -4397,7 +4403,7 @@ public: } uint make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override + String *tmp_buffer) const override { DBUG_ASSERT(0); return 0; @@ -4734,7 +4740,7 @@ public: Sort_param *param) const override; uint make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; + String *tmp_buffer) const override; void sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *attr) const override; @@ -4846,7 +4852,7 @@ public: Sort_param *param) const override; uint make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; + String *tmp_buffer) const override; void Column_definition_attributes_frm_pack(const Column_definition_attributes *at, uchar *buff) const override; @@ -5102,7 +5108,7 @@ public: Sort_param *param) const override; uint make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; + String *tmp_buffer) const override; void Column_definition_attributes_frm_pack(const Column_definition_attributes *at, uchar *buff) const override; @@ -5213,7 +5219,7 @@ public: Sort_param *param) const override; uint make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; + String *tmp_buffer) const override; void sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *attr) const override; @@ -5306,7 +5312,7 @@ public: Sort_param *param) const override; uint make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; + String *tmp_buffer) const override; void sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *attr) const override; @@ -6521,7 +6527,7 @@ public: Sort_param *param) const override; uint make_packed_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; + String *tmp_buffer) const override; void sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *attr) const override; diff --git a/sql/uniques.cc b/sql/uniques.cc index a0cebe3e4dd..c2c72e9f289 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -40,31 +40,25 @@ #include "uniques.h" // Unique #include "sql_sort.h" -int unique_write_to_file(uchar* key, element_count count, Unique *unique) +int unique_write_to_file(uchar* key, element_count count, Unique_impl *unique) { - /* - Use unique->size (size of element stored in the tree) and not - unique->tree.size_of_element. The latter is different from unique->size - when tree implementation chooses to store pointer to key in TREE_ELEMENT - (instead of storing the element itself there) - */ - return my_b_write(&unique->file, key, unique->size) ? 1 : 0; + return unique->write_record_to_file(key) ? 1 : 0; } -int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique) +int unique_write_to_file_with_count(uchar* key, element_count count, Unique_impl *unique) { - return my_b_write(&unique->file, key, unique->size) || + return unique_write_to_file(key, count, unique) || my_b_write(&unique->file, (uchar*)&count, sizeof(element_count)) ? 1 : 0; } -int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique) +int unique_write_to_ptrs(uchar* key, element_count count, Unique_impl *unique) { memcpy(unique->sort.record_pointers, key, unique->size); unique->sort.record_pointers+=unique->size; return 0; } -int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *unique) +int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique_impl *unique) { if (count >= unique->min_dupl_count) { @@ -77,11 +71,12 @@ int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *uniq } -Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, - uint size_arg, size_t max_in_memory_size_arg, - uint min_dupl_count_arg) +Unique_impl::Unique_impl(qsort_cmp2 comp_func, void * comp_func_fixed_arg, + uint size_arg, size_t max_in_memory_size_arg, + uint min_dupl_count_arg, Descriptor *desc) :max_in_memory_size(max_in_memory_size_arg), size(size_arg), + memory_used(0), elements(0) { my_b_clear(&file); @@ -90,7 +85,7 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, if (min_dupl_count_arg) full_size+= sizeof(element_count); with_counters= MY_TEST(min_dupl_count_arg); - init_tree(&tree, (max_in_memory_size / 16), 0, size, comp_func, + init_tree(&tree, (max_in_memory_size / 16), 0, 0, comp_func, NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC)); /* If the following fail's the next add will also fail */ my_init_dynamic_array(PSI_INSTRUMENT_ME, &file_ptrs, sizeof(Merge_chunk), 16, @@ -101,10 +96,18 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, max_elements= (ulong) (max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+size)); if (!max_elements) + { max_elements= 1; + /* + Need to ensure that we have memory to store atleast one record + in the Unique tree + */ + max_in_memory_size= sizeof(TREE_ELEMENT) + size; + } (void) open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE, MYF(MY_WME)); + m_descriptor= desc; } @@ -304,7 +307,7 @@ static double get_merge_many_buffs_cost(uint *buffer, these will be random seeks. */ -double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, +double Unique_impl::get_use_cost(uint *buffer, size_t nkeys, uint key_size, size_t max_in_memory_size, double compare_factor, bool intersect_fl, bool *in_memory) @@ -366,16 +369,17 @@ double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, return result; } -Unique::~Unique() +Unique_impl::~Unique_impl() { close_cached_file(&file); delete_tree(&tree, 0); delete_dynamic(&file_ptrs); + delete m_descriptor; } /* Write tree to disk; clear tree */ -bool Unique::flush() +bool Unique_impl::flush() { Merge_chunk file_ptr; elements+= tree.elements_in_tree; @@ -389,6 +393,10 @@ bool Unique::flush() (void*) this, left_root_right) || insert_dynamic(&file_ptrs, (uchar*) &file_ptr)) return 1; + /** + the tree gets reset so make sure the memory used is reset too + */ + memory_used= 0; delete_tree(&tree, 0); return 0; } @@ -400,7 +408,7 @@ bool Unique::flush() */ void -Unique::reset() +Unique_impl::reset() { reset_tree(&tree); /* @@ -495,7 +503,8 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, uint key_length, Merge_chunk *begin, Merge_chunk *end, tree_walk_action walk_action, void *walk_action_arg, qsort_cmp2 compare, void *compare_arg, - IO_CACHE *file, bool with_counters) + IO_CACHE *file, bool with_counters, + uint min_dupl_count, bool packed) { BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg }; QUEUE queue; @@ -521,7 +530,12 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, // read_to_buffer() needs only rec_length. Sort_param sort_param; sort_param.rec_length= key_length; + sort_param.sort_length= key_length; + sort_param.min_dupl_count= min_dupl_count; + DBUG_ASSERT(sort_param.res_length == 0); DBUG_ASSERT(!sort_param.using_addon_fields()); + sort_param.set_using_packed_keys(packed); + uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0; /* Invariant: queue must contain top element from each tree, until a tree @@ -534,7 +548,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, top->set_buffer(merge_buffer + (top - begin) * piece_size, merge_buffer + (top - begin) * piece_size + piece_size); top->set_max_keys(max_key_count_per_piece); - bytes_read= read_to_buffer(file, top, &sort_param, false); + bytes_read= read_to_buffer(file, top, &sort_param, packed); if (unlikely(bytes_read == (ulong) -1)) goto end; DBUG_ASSERT(bytes_read); @@ -555,6 +569,10 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, read next key from the cache or from the file and push it to the queue; this gives new top. */ + key_length= sort_param.get_key_length_for_unique((uchar*)old_key, + size_of_dupl_count); + + cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0); top->advance_current_key(key_length); top->decrement_mem_count(); if (top->mem_count()) @@ -564,7 +582,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, /* save old_key not to overwrite it in read_to_buffer */ memcpy(save_key_buff, old_key, key_length); old_key= save_key_buff; - bytes_read= read_to_buffer(file, top, &sort_param, false); + bytes_read= read_to_buffer(file, top, &sort_param, packed); if (unlikely(bytes_read == (ulong) -1)) goto end; else if (bytes_read) /* top->key, top->mem_count are reset */ @@ -604,7 +622,9 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, { do { - + key_length= sort_param.get_key_length_for_unique(top->current_key(), + size_of_dupl_count); + cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0); cnt= with_counters ? get_counter_from_merged_element(top->current_key(), cnt_ofs) : 1; if (walk_action(top->current_key(), cnt, walk_action_arg)) @@ -612,7 +632,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, top->advance_current_key(key_length); } while (top->decrement_mem_count()); - bytes_read= read_to_buffer(file, top, &sort_param, false); + bytes_read= read_to_buffer(file, top, &sort_param, packed); if (unlikely(bytes_read == (ulong) -1)) goto end; } @@ -645,7 +665,7 @@ end: <> 0 error */ -bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) +bool Unique_impl::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) { int res= 0; uchar *merge_buffer; @@ -678,7 +698,8 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) (Merge_chunk *) file_ptrs.buffer, (Merge_chunk *) file_ptrs.buffer + file_ptrs.elements, action, walk_action_arg, - tree.compare, tree.custom_arg, &file, with_counters); + tree.compare, tree.custom_arg, &file, with_counters, + min_dupl_count, is_variable_sized()); } my_free(merge_buffer); return res; @@ -693,7 +714,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) TRUE. SYNOPSIS - Unique:merge() + Unique_impl::merge() All params are 'IN': table the parameter to access sort context buff merge buffer @@ -704,7 +725,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) <> 0 error */ -bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size, +bool Unique_impl::merge(TABLE *table, uchar *buff, size_t buff_size, bool without_last_merge) { IO_CACHE *outfile= &sort.io_cache; @@ -730,9 +751,11 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size, sort_param.max_keys_per_buffer= (uint) MY_MAX((max_in_memory_size / sort_param.sort_length), MERGEBUFF2); sort_param.not_killable= 1; + sort_param.set_using_packed_keys(is_variable_sized()); + sort_param.set_packed_format(is_variable_sized()); sort_param.unique_buff= buff +(sort_param.max_keys_per_buffer * - sort_param.sort_length); + sort_param.sort_length); sort_param.compare= (qsort2_cmp) buffpek_compare; sort_param.cmp_context.key_compare= tree.compare; @@ -760,7 +783,8 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size, file_ptrs.elements= maxbuffer+1; return 0; } - if (merge_index(&sort_param, Bounds_checked_array<uchar>(buff, buff_size), + if (merge_index(&sort_param, + Bounds_checked_array<uchar>(buff, buff_size), file_ptr, maxbuffer, &file, outfile)) goto err; error= 0; @@ -782,12 +806,14 @@ err: rows will be read in priority order. */ -bool Unique::get(TABLE *table) +bool Unique_impl::get(TABLE *table) { bool rc= 1; uchar *sort_buffer= NULL; sort.return_rows= elements+tree.elements_in_tree; - DBUG_ENTER("Unique::get"); + DBUG_ENTER("Unique_impl::get"); + + DBUG_ASSERT(is_variable_sized() == FALSE); if (my_b_tell(&file) == 0) { @@ -831,3 +857,509 @@ err: my_free(sort_buffer); DBUG_RETURN(rc); } + + +/* + @brief + Write an intermediate unique record to the file + + @param key key to be written + + @retval + >0 Error + =0 Record successfully written +*/ + +int Unique_impl::write_record_to_file(uchar *key) +{ + return my_b_write(get_file(), key, m_descriptor->get_length_of_key(key)); +} + + +/* VARIABLE SIZE KEYS DESCRIPTOR */ + + +Variable_size_keys_descriptor::Variable_size_keys_descriptor(uint length) +{ + max_length= length; + flags= (1 << VARIABLE_SIZED_KEYS); + sort_keys= NULL; + sortorder= NULL; +} + + +/* + @brief + Setup the structures that are used when Unique stores packed values + + @param thd thread structure + @param item item of aggregate function + @param non_const_args number of non constant arguments + @param arg_count total number of arguments + + @note + This implementation is used by GROUP_CONCAT and COUNT_DISTINCT + as it can have more than one arguments in the argument list. + + @retval + TRUE error + FALSE setup successful +*/ + +bool +Variable_size_keys_descriptor::setup_for_item(THD *thd, Item_sum *item, + uint non_const_args, + uint arg_count) +{ + SORT_FIELD *pos; + if (init(thd, non_const_args)) + return true; + pos= sortorder; + + for (uint i= 0; i < arg_count; i++) + { + Item *arg= item->get_arg(i); + if (arg->const_item()) + continue; + + if (arg->type() == Item::FIELD_ITEM) + { + Field *field= ((Item_field*)arg)->field; + pos->setup_key_part_for_variable_size_key(field); + } + else + pos->setup_key_part_for_variable_size_key(arg); + pos++; + } + return false; +} + + +/* + @brief + Setup the structures that are used when Unique stores packed values + + @param thd thread structure + @param field field structure + + @retval + TRUE error + FALSE setup successful +*/ + +bool Variable_size_keys_descriptor::setup_for_field(THD *thd, Field *field) +{ + SORT_FIELD *pos; + if (init(thd, 1)) + return true; + pos= sortorder; + pos->setup_key_part_for_variable_size_key(field); + return false; +} + + +/* + @brief + Compare two packed keys inside the Unique tree + + @param a_ptr packed sort key + @param b_ptr packed sort key + + @retval + >0 key a_ptr greater than b_ptr + =0 key a_ptr equal to b_ptr + <0 key a_ptr less than b_ptr + +*/ + +int Variable_size_composite_key_desc::compare_keys(uchar *a_ptr, + uchar *b_ptr) +{ + uchar *a= a_ptr + Variable_size_keys_descriptor::size_of_length_field; + uchar *b= b_ptr + Variable_size_keys_descriptor::size_of_length_field; + int retval= 0; + size_t a_len, b_len; + for (SORT_FIELD *sort_field= sort_keys->begin(); + sort_field != sort_keys->end(); sort_field++) + { + retval= sort_field->is_variable_sized() ? + sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) : + sort_field->compare_packed_fixed_size_vals(a, &a_len, b, &b_len); + + if (retval) + return sort_field->reverse ? -retval : retval; + + a+= a_len; + b+= b_len; + } + return retval; +} + + +int Variable_size_composite_key_desc_for_gconcat::compare_keys(uchar *a_ptr, + uchar *b_ptr) +{ + uchar *a= a_ptr + Variable_size_keys_descriptor::size_of_length_field; + uchar *b= b_ptr + Variable_size_keys_descriptor::size_of_length_field; + int retval= 0; + size_t a_len, b_len; + for (SORT_FIELD *sort_field= sort_keys->begin(); + sort_field != sort_keys->end(); sort_field++) + { + retval= sort_field->is_variable_sized() ? + sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) : + sort_field->compare_fixed_size_vals(a, &a_len, b, &b_len); + + if (retval) + return sort_field->reverse ? -retval : retval; + + a+= a_len; + b+= b_len; + } + return retval; +} + + +int Variable_size_keys_simple::compare_keys(uchar *a, uchar *b) +{ + return sort_keys->compare_keys_for_single_arg(a + size_of_length_field, + b + size_of_length_field); +} + + +uchar* Variable_size_composite_key_desc::make_record(bool exclude_nulls) +{ + return make_encoded_record(sort_keys, exclude_nulls); +} + +uchar* +Variable_size_composite_key_desc_for_gconcat::make_record(bool exclude_nulls) +{ + return make_encoded_record(sort_keys, exclude_nulls); +} + + +uchar* Variable_size_keys_simple::make_record(bool exclude_nulls) +{ + return make_encoded_record(sort_keys, exclude_nulls); +} + + +/* + @brief + Create the sortorder and Sort keys structures for a descriptor + + @param thd THD structure + @param count Number of key parts to be allocated + + @retval + TRUE ERROR + FALSE structures successfully created +*/ + +bool Descriptor::init(THD *thd, uint count) +{ + if (sortorder) + return false; + DBUG_ASSERT(sort_keys == NULL); + sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * count); + if (!sortorder) + return true; // OOM + sort_keys= new Sort_keys(sortorder, count); + if (!sort_keys) + return true; // OOM + return false; +} + + +bool Variable_size_composite_key_desc::init(THD *thd, uint count) +{ + return Descriptor::init(thd, count) || + Encode_variable_size_key::init(max_length); +} + + +bool Variable_size_composite_key_desc_for_gconcat::init(THD *thd, uint count) +{ + return Descriptor::init(thd, count) || + Encode_key_for_group_concat::init(max_length); +} + + +bool Variable_size_keys_simple::init(THD *thd, uint count) +{ + return Descriptor::init(thd, count) || + Encode_variable_size_key::init(max_length); +} + + +bool +Variable_size_composite_key_desc_for_gconcat::setup_for_item(THD *thd, + Item_sum *item, + uint non_const_args, + uint arg_count) +{ + if (init(thd, non_const_args)) + return true; + SORT_FIELD *pos; + pos= sortorder; + + for (uint i= 0; i < arg_count; i++) + { + Item *arg= item->get_arg(i); + if (arg->const_item()) + continue; + + Field *field= arg->get_tmp_table_field(); + pos->setup_key_part_for_variable_size_key(field); + pos++; + } + return false; +} + + +/* FIXED SIZE KEYS DESCRIPTOR */ + + +Fixed_size_keys_descriptor::Fixed_size_keys_descriptor(uint length) +{ + max_length= length; + flags= (1 << FIXED_SIZED_KEYS); + sort_keys= NULL; + sortorder= NULL; +} + + +int Fixed_size_keys_descriptor::compare_keys(uchar *a, uchar *b) +{ + DBUG_ASSERT(sort_keys); + SORT_FIELD *sort_field= sort_keys->begin(); + DBUG_ASSERT(sort_field->field); + return sort_field->field->cmp(a, b); +} + + +bool +Fixed_size_keys_descriptor::setup_for_item(THD *thd, Item_sum *item, + uint non_const_args, + uint arg_count) +{ + SORT_FIELD *pos; + if (Descriptor::init(thd, non_const_args)) + return true; + pos= sortorder; + + for (uint i= 0; i < arg_count; i++) + { + Item *arg= item->get_arg(i); + if (arg->const_item()) + continue; + + Field *field= arg->get_tmp_table_field(); + + DBUG_ASSERT(field); + pos->setup_key_part_for_fixed_size_key(field); + pos++; + } + return false; +} + + +bool +Fixed_size_keys_descriptor::setup_for_field(THD *thd, Field *field) +{ + SORT_FIELD *pos; + if (Descriptor::init(thd, 1)) + return true; + pos= sortorder; + pos->setup_key_part_for_fixed_size_key(field); + + return false; +} + + +int Fixed_size_keys_mem_comparable::compare_keys(uchar *key1, uchar *key2) +{ + return memcmp(key1, key2, max_length); +} + + +int +Fixed_size_composite_keys_descriptor::compare_keys(uchar *key1, uchar *key2) +{ + for (SORT_FIELD *sort_field= sort_keys->begin(); + sort_field != sort_keys->end(); sort_field++) + { + Field *field= sort_field->field; + int res = field->cmp(key1, key2); + if (res) + return res; + key1 += sort_field->length; + key2 += sort_field->length; + } + return 0; +} + + +int Fixed_size_keys_for_rowids::compare_keys(uchar *key1, uchar *key2) +{ + return file->cmp_ref(key1, key2); +} + + +int +Fixed_size_keys_descriptor_with_nulls::compare_keys(uchar *key1_arg, + uchar *key2_arg) +{ + + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + */ + for (SORT_FIELD *sort_field= sort_keys->begin(); + sort_field != sort_keys->end(); sort_field++) + { + Field *field= sort_field->field; + if (field->is_null_in_record(key1_arg) && + field->is_null_in_record(key2_arg)) + return 0; + + if (field->is_null_in_record(key1_arg)) + return -1; + + if (field->is_null_in_record(key2_arg)) + return 1; + + uchar *key1= (uchar*)key1_arg + field->table->s->null_bytes; + uchar *key2= (uchar*)key2_arg + field->table->s->null_bytes; + + uint offset= (field->offset(field->table->record[0]) - + field->table->s->null_bytes); + int res= field->cmp(key1 + offset, key2 + offset); + if (res) + return res; + } + return 0; +} + + +int Fixed_size_keys_for_group_concat::compare_keys(uchar *key1, uchar *key2) +{ + for (SORT_FIELD *sort_field= sort_keys->begin(); + sort_field != sort_keys->end(); sort_field++) + { + Field *field= sort_field->field; + uint offset= (field->offset(field->table->record[0]) - + field->table->s->null_bytes); + int res= field->cmp(key1 + offset, key2 + offset); + if (res) + return res; + } + return 0; +} + + +bool Encode_key::init(uint length) +{ + if (tmp_buffer.alloc(length)) + return true; + rec_ptr= (uchar *)my_malloc(PSI_INSTRUMENT_ME, + length, + MYF(MY_WME | MY_THREAD_SPECIFIC)); + return rec_ptr == NULL; +} + + +Encode_key::~Encode_key() +{ + my_free(rec_ptr); +} + + +/* + @brief + Make a record with packed values for a key + + @retval + 0 NULL value + >0 length of the packed record +*/ +uchar* Encode_variable_size_key::make_encoded_record(Sort_keys *sort_keys, + bool exclude_nulls) +{ + Field *field; + SORT_FIELD *sort_field; + uint length; + uchar *orig_to, *to; + + orig_to= to= rec_ptr; + to+= Variable_size_keys_descriptor::size_of_length_field; + + for (sort_field=sort_keys->begin() ; + sort_field != sort_keys->end() ; + sort_field++) + { + bool maybe_null=0; + if ((field=sort_field->field)) + { + // Field + length= field->make_packed_sort_key_part(to, sort_field); + } + else + { // Item + Item *item= sort_field->item; + length= item->type_handler()->make_packed_sort_key_part(to, item, + sort_field, + &tmp_buffer); + } + + if ((maybe_null= sort_field->maybe_null)) + { + if (exclude_nulls && length == 0) // rejecting NULLS + return NULL; + to++; + } + to+= length; + } + + length= static_cast<uint>(to - orig_to); + Variable_size_keys_descriptor::store_packed_length(orig_to, length); + return rec_ptr; +} + + +uchar* +Encode_key_for_group_concat::make_encoded_record(Sort_keys *sort_keys, + bool exclude_nulls) +{ + Field *field; + SORT_FIELD *sort_field; + uint length; + uchar *orig_to, *to; + + orig_to= to= rec_ptr; + to+= Variable_size_keys_descriptor::size_of_length_field; + + for (sort_field=sort_keys->begin() ; + sort_field != sort_keys->end() ; + sort_field++) + { + bool maybe_null=0; + DBUG_ASSERT(sort_field->field); + field=sort_field->field; + length= field->make_packed_key_part(to, sort_field); + + if ((maybe_null= sort_field->maybe_null)) + { + if (exclude_nulls && length == 0) // rejecting NULLS + return NULL; + to++; + } + to+= length; + } + + length= static_cast<uint>(to - orig_to); + Variable_size_keys_descriptor::store_packed_length(orig_to, length); + return rec_ptr; +} diff --git a/sql/uniques.h b/sql/uniques.h index 7e12a391fbd..c664c1351f3 100644 --- a/sql/uniques.h +++ b/sql/uniques.h @@ -18,18 +18,352 @@ #include "filesort.h" + +/* + Encode a key into a particular format. The format depends whether + the key is of fixed size or variable size. + + @notes + Currently this encoding is only done for variable size keys +*/ + +class Encode_key +{ +protected: + /* + Packed record ptr for a record of the table, the packed value in this + record is added to the unique tree + */ + uchar* rec_ptr; + + String tmp_buffer; +public: + virtual ~Encode_key(); + virtual uchar* make_encoded_record(Sort_keys *keys, bool exclude_nulls) = 0; + bool init(uint length); + uchar *get_rec_ptr() { return rec_ptr; } +}; + + +class Encode_variable_size_key : public Encode_key +{ +public: + Encode_variable_size_key() + { + rec_ptr= NULL; + } + virtual ~Encode_variable_size_key() {} + uchar* make_encoded_record(Sort_keys *keys, bool exclude_nulls) override; +}; + + +class Encode_key_for_group_concat : public Encode_variable_size_key +{ +public: + Encode_key_for_group_concat() : Encode_variable_size_key(){} + ~Encode_key_for_group_concat() {} + uchar* make_encoded_record(Sort_keys *keys, bool exclude_nulls) override; +}; + + +/* + + Descriptor class storing information about the keys that would be + inserted in the Unique tree. This is an abstract class which is + extended by other class to support descriptors for keys with fixed and + variable size. +*/ + +class Descriptor : public Sql_alloc +{ +protected: + + /* maximum possible size of any key, in bytes */ + uint max_length; + enum attributes + { + FIXED_SIZED_KEYS= 0, + VARIABLE_SIZED_KEYS + }; + + /* + Storing information about the attributes for the keys + Each bit for the flag points to an attribute + Currently only 2 bits are used, so the remaining bit can be used + in the future if some extension is required for descriptors + */ + uint flags; + /* + Array of SORT_FIELD structure storing the information about the key parts + in the sort key of the Unique tree + @see Unique::setup() + */ + SORT_FIELD *sortorder; + + /* + Structure storing information about usage of keys + */ + Sort_keys *sort_keys; + +public: + virtual ~Descriptor() {}; + virtual uint get_length_of_key(uchar *ptr) = 0; + bool is_variable_sized() + { + return flags & (1 << VARIABLE_SIZED_KEYS); + } + virtual int compare_keys(uchar *a, uchar *b) = 0; + + // Fill structures like sort_keys, sortorder + virtual bool setup_for_item(THD *thd, Item_sum *item, + uint non_const_args, uint arg_count) + { return false; } + virtual bool setup_for_field(THD *thd, Field *field) { return false; } + + virtual Sort_keys *get_keys() { return sort_keys; } + SORT_FIELD *get_sortorder() { return sortorder; } + + virtual uchar* make_record(bool exclude_nulls) { return NULL; } + virtual bool is_single_arg() = 0; + virtual bool init(THD *thd, uint count); +}; + + +/* + Descriptor for fixed size keys with single key part +*/ + +class Fixed_size_keys_descriptor : public Descriptor +{ +public: + Fixed_size_keys_descriptor(uint length); + virtual ~Fixed_size_keys_descriptor() {} + uint get_length_of_key(uchar *ptr) override { return max_length; } + bool setup_for_field(THD *thd, Field *field); + bool setup_for_item(THD *thd, Item_sum *item, + uint non_const_args, uint arg_count); + virtual int compare_keys(uchar *a, uchar *b) override; + virtual bool is_single_arg() override { return true; } +}; + + +/* + Descriptor for fixed size mem-comparable keys with single key part +*/ +class Fixed_size_keys_mem_comparable: public Fixed_size_keys_descriptor +{ +public: + Fixed_size_keys_mem_comparable(uint length) + :Fixed_size_keys_descriptor(length){} + ~Fixed_size_keys_mem_comparable() {} + int compare_keys(uchar *a, uchar *b) override; +}; + + +/* + Descriptor for fixed size keys for rowid comparison +*/ +class Fixed_size_keys_for_rowids: public Fixed_size_keys_descriptor +{ +private: + handler *file; + +public: + Fixed_size_keys_for_rowids(handler *file_arg) + :Fixed_size_keys_descriptor(file_arg->ref_length) + { + file= file_arg; + } + ~Fixed_size_keys_for_rowids() {} + int compare_keys(uchar *a, uchar *b) override; +}; + + +/* + Descriptor for fixed size keys where a key part can be NULL + Used currently in JSON_ARRAYAGG +*/ + +class Fixed_size_keys_descriptor_with_nulls : public Fixed_size_keys_descriptor +{ +public: + Fixed_size_keys_descriptor_with_nulls(uint length) + : Fixed_size_keys_descriptor(length) {} + ~Fixed_size_keys_descriptor_with_nulls() {} + int compare_keys(uchar *a, uchar *b) override; +}; + + +/* + Descriptor for fixed size keys in group_concat +*/ +class Fixed_size_keys_for_group_concat : public Fixed_size_keys_descriptor +{ +public: + Fixed_size_keys_for_group_concat(uint length) + : Fixed_size_keys_descriptor(length) {} + ~Fixed_size_keys_for_group_concat() {} + int compare_keys(uchar *a, uchar *b) override; +}; + + +/* + Descriptor for fixed size keys with multiple key parts +*/ + +class Fixed_size_composite_keys_descriptor : public Fixed_size_keys_descriptor +{ +public: + Fixed_size_composite_keys_descriptor(uint length) + : Fixed_size_keys_descriptor(length) {} + ~Fixed_size_composite_keys_descriptor() {} + int compare_keys(uchar *a, uchar *b) override; + bool is_single_arg() override { return false; } +}; + + /* - Unique -- class for unique (removing of duplicates). + Base class for the descriptor for variable size keys +*/ + +class Variable_size_keys_descriptor : public Descriptor +{ +public: + Variable_size_keys_descriptor(uint length); + virtual ~Variable_size_keys_descriptor() {} + uint get_length_of_key(uchar *ptr) override + { + return read_packed_length(ptr); + } + virtual int compare_keys(uchar *a, uchar *b) override { return 0; } + virtual bool is_single_arg() override { return false; } + + virtual bool setup_for_item(THD *thd, Item_sum *item, + uint non_const_args, uint arg_count) override; + virtual bool setup_for_field(THD *thd, Field *field) override; + + // All need to be moved to some new class + // returns the length of the key along with the length bytes for the key + static uint read_packed_length(uchar *p) + { + return size_of_length_field + uint4korr(p); + } + static void store_packed_length(uchar *p, uint sz) + { + int4store(p, sz - size_of_length_field); + } + static const uint size_of_length_field= 4; +}; + + +/* + Descriptor for variable size keys with only one component + + Used by EITS, JSON_ARRAYAGG. + COUNT(DISTINCT col) AND GROUP_CONCAT(DISTINCT col) are also allowed + that the number of arguments with DISTINCT is 1. +*/ + +class Variable_size_keys_simple : public Variable_size_keys_descriptor, + public Encode_variable_size_key +{ +public: + Variable_size_keys_simple(uint length) + :Variable_size_keys_descriptor(length), Encode_variable_size_key() {} + ~Variable_size_keys_simple() {} + int compare_keys(uchar *a, uchar *b) override; + uchar* make_record(bool exclude_nulls) override; + uchar* get_rec_ptr() { return rec_ptr; } + bool is_single_arg() override { return true; } + bool init(THD *thd, uint count) override; +}; + + +/* + Descriptor for variable sized keys with multiple key parts +*/ +class Variable_size_composite_key_desc : public Variable_size_keys_descriptor, + public Encode_variable_size_key +{ +public: + Variable_size_composite_key_desc(uint length) + : Variable_size_keys_descriptor(length), Encode_variable_size_key() {} + ~Variable_size_composite_key_desc() {} + int compare_keys(uchar *a, uchar *b) override; + uchar* make_record(bool exclude_nulls) override; + bool init(THD *thd, uint count) override; +}; + + + +/* + Descriptor for variable sized keys with multiple key parts for GROUP_CONCAT +*/ + +class Variable_size_composite_key_desc_for_gconcat : + public Variable_size_keys_descriptor, + public Encode_key_for_group_concat +{ +public: + Variable_size_composite_key_desc_for_gconcat(uint length) + : Variable_size_keys_descriptor(length), Encode_key_for_group_concat() {} + ~Variable_size_composite_key_desc_for_gconcat() {} + int compare_keys(uchar *a, uchar *b) override; + uchar* make_record(bool exclude_nulls) override; + bool setup_for_item(THD *thd, Item_sum *item, + uint non_const_args, uint arg_count) override; + bool init(THD *thd, uint count) override; +}; + + +/* + Unique -- An abstract class for unique (removing duplicates). +*/ + +class Unique : public Sql_alloc { + +protected: + + /* + Storing all meta-data information of the expressions whose value are + being added to the Unique tree + */ + Descriptor *m_descriptor; +public: + + virtual void reset() = 0; + virtual bool unique_add(void *ptr) = 0; + virtual ~Unique() {}; + + virtual void close_for_expansion() = 0; + + virtual bool get(TABLE *table) = 0; + virtual bool walk(TABLE *table, tree_walk_action action, + void *walk_action_arg)= 0; + + virtual SORT_INFO *get_sort() = 0; + + virtual ulong get_n_elements() = 0; + virtual size_t get_max_in_memory_size() const = 0; + virtual bool is_in_memory() = 0; + + virtual ulong elements_in_tree() = 0; + Descriptor *get_descriptor() { return m_descriptor; } +}; + + +/* + Unique_impl -- class for unique (removing of duplicates). Puts all values to the TREE. If the tree becomes too big, it's dumped to the file. User can request sorted values, or just iterate through them. In the last case tree merging is performed in memory simultaneously with iteration, so it should be ~2-3x faster. - */ +*/ -class Unique :public Sql_alloc -{ +class Unique_impl : public Unique { DYNAMIC_ARRAY file_ptrs; - ulong max_elements; /* Total number of elements that will be stored in-memory */ + /* Total number of elements that will be stored in-memory */ + ulong max_elements; size_t max_in_memory_size; IO_CACHE file; TREE tree; @@ -40,30 +374,93 @@ class Unique :public Sql_alloc uint full_size; /* Size of element + space needed to store the number of duplicates found for the element. */ - uint min_dupl_count; /* Minimum number of occurences of element required for + uint min_dupl_count; /* Minimum number of occurrences of element required for it to be written to record_pointers. always 0 for unions, > 0 for intersections */ bool with_counters; + // size in bytes used for storing keys in the Unique tree + size_t memory_used; + ulong elements; + SORT_INFO sort; + bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge); bool flush(); -public: - ulong elements; - SORT_INFO sort; - Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, - uint size_arg, size_t max_in_memory_size_arg, - uint min_dupl_count_arg= 0); - ~Unique(); - ulong elements_in_tree() { return tree.elements_in_tree; } - inline bool unique_add(void *ptr) + // return the amount of unused memory in the Unique tree + size_t space_left() + { + DBUG_ASSERT(max_in_memory_size >= memory_used); + return max_in_memory_size - memory_used; + } + + // Check if the Unique tree is full or not + bool is_full(size_t record_size) + { + if (!tree.elements_in_tree) // Atleast insert one element in the tree + return false; + return record_size > space_left(); + } + + /* + @brief + Add a record to the Unique tree + @param + ptr key value + size length of the key + @retval + TRUE ERROE + FALSE key successfully inserted in the Unique tree + */ + + bool unique_add(void *ptr, uint size_arg) { DBUG_ENTER("unique_add"); DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements)); - if (!(tree.flag & TREE_ONLY_DUPS) && - tree.elements_in_tree >= max_elements && flush()) + TREE_ELEMENT *res; + size_t rec_size= size_arg + sizeof(TREE_ELEMENT) + tree.size_of_element; + + if (!(tree.flag & TREE_ONLY_DUPS) && is_full(rec_size) && flush()) DBUG_RETURN(1); - DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg)); + uint count= tree.elements_in_tree; + res= tree_insert(&tree, ptr, size_arg, tree.custom_arg); + if (tree.elements_in_tree != count) + { + /* + increment memory used only when a unique element is inserted + in the tree + */ + memory_used+= rec_size; + } + DBUG_RETURN(!res); + } + +public: + + /* + @brief + Returns the number of elements in the unique instance + + @details + If all the elements fit in the memory, then this returns all the + distinct elements. + */ + ulong get_n_elements() override + { + return is_in_memory() ? elements_in_tree() : elements; + } + + SORT_INFO *get_sort() override { return &sort; } + + Unique_impl(qsort_cmp2 comp_func, void *comp_func_fixed_arg, + uint size_arg, size_t max_in_memory_size_arg, + uint min_dupl_count_arg, Descriptor *desc); + ~Unique_impl(); + ulong elements_in_tree() { return tree.elements_in_tree; } + + bool unique_add(void *ptr) override + { + return unique_add(ptr, m_descriptor->get_length_of_key((uchar*)ptr)); } bool is_in_memory() { return (my_b_tell(&file) == 0); } @@ -92,19 +489,30 @@ public: return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree)); } - void reset(); + void reset() override; bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg); uint get_size() const { return size; } + uint get_full_size() const { return full_size; } size_t get_max_in_memory_size() const { return max_in_memory_size; } + bool is_count_stored() { return with_counters; } + IO_CACHE *get_file () { return &file; } + int write_record_to_file(uchar *key); + + // returns TRUE if the unique tree stores packed values + bool is_variable_sized() { return m_descriptor->is_variable_sized(); } + + // returns TRUE if the key to be inserted has only one component + bool is_single_arg() { return m_descriptor->is_single_arg(); } + Descriptor* get_descriptor() { return m_descriptor; } - friend int unique_write_to_file(uchar* key, element_count count, Unique *unique); - friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique); + friend int unique_write_to_file(uchar* key, element_count count, Unique_impl *unique); + friend int unique_write_to_ptrs(uchar* key, element_count count, Unique_impl *unique); friend int unique_write_to_file_with_count(uchar* key, element_count count, - Unique *unique); - friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, - Unique *unique); + Unique_impl *unique); + friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, + Unique_impl *unique); }; #endif /* UNIQUE_INCLUDED */ |