diff options
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | mysql-test/main/analyze_format_json.result | 2 | ||||
-rw-r--r-- | mysql-test/main/order_by.result | 305 | ||||
-rw-r--r-- | mysql-test/main/order_by.test | 81 | ||||
-rw-r--r-- | mysql-test/main/order_by_pack_big.result | 98 | ||||
-rw-r--r-- | mysql-test/main/order_by_pack_big.test | 48 | ||||
-rw-r--r-- | plugin/type_inet/sql_type_inet.cc | 35 | ||||
-rw-r--r-- | plugin/type_inet/sql_type_inet.h | 10 | ||||
-rw-r--r-- | sql/bounded_queue.h | 8 | ||||
-rw-r--r-- | sql/field.cc | 99 | ||||
-rw-r--r-- | sql/field.h | 58 | ||||
-rw-r--r-- | sql/filesort.cc | 1004 | ||||
-rw-r--r-- | sql/filesort.h | 15 | ||||
-rw-r--r-- | sql/filesort_utils.cc | 9 | ||||
-rw-r--r-- | sql/filesort_utils.h | 4 | ||||
-rw-r--r-- | sql/records.cc | 38 | ||||
-rw-r--r-- | sql/sql_analyze_stmt.cc | 5 | ||||
-rw-r--r-- | sql/sql_analyze_stmt.h | 8 | ||||
-rw-r--r-- | sql/sql_class.h | 48 | ||||
-rw-r--r-- | sql/sql_select.cc | 2 | ||||
-rw-r--r-- | sql/sql_sort.h | 348 | ||||
-rw-r--r-- | sql/sql_type.h | 132 | ||||
-rw-r--r-- | sql/uniques.cc | 6 |
23 files changed, 2072 insertions, 296 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index cf8b42d0430..b1adbce5565 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,6 +260,11 @@ IF (ENABLE_GCOV) MY_CHECK_AND_SET_COMPILER_FLAG("-fprofile-arcs -ftest-coverage -lgcov" DEBUG) ENDIF() +OPTION(WITHOUT_PACKED_SORT_KEYS "disable packed sort keys" OFF) +IF(WITHOUT_PACKED_SORT_KEYS) + ADD_DEFINITIONS(-DWITHOUT_PACKED_SORT_KEYS) +ENDIF() + MY_CHECK_AND_SET_COMPILER_FLAG(-ggdb3 DEBUG) SET(ENABLED_LOCAL_INFILE "AUTO" CACHE STRING "If we should should enable LOAD DATA LOCAL by default (OFF/ON/AUTO)") diff --git a/mysql-test/main/analyze_format_json.result b/mysql-test/main/analyze_format_json.result index c505aae563b..f45433a1572 100644 --- a/mysql-test/main/analyze_format_json.result +++ b/mysql-test/main/analyze_format_json.result @@ -704,7 +704,7 @@ ANALYZE "r_used_priority_queue": false, "r_output_rows": 0, "r_buffer_size": "REPLACED", - "r_sort_mode": "sort_key,rowid", + "r_sort_mode": "packed_sort_key,rowid", "temporary_table": { "filesort": { "sort_key": "(subquery#2)", diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result index a1b167d6189..4c8c640e03b 100644 --- a/mysql-test/main/order_by.result +++ b/mysql-test/main/order_by.result @@ -3471,14 +3471,13 @@ drop table t1,t2,t3,t4; # set @save_sql_mode= @@sql_mode; set sql_mode= 'PAD_CHAR_TO_FULL_LENGTH'; -CREATE TABLE t1 ( a CHAR(255) charset utf8, -b CHAR(255) charset utf8, c TEXT); +CREATE TABLE t1 ( a CHAR(255), b CHAR(255), c TEXT); INSERT INTO t1 VALUES ('1','a', 'a'), ('2','b', 'b'), ('3','c', 'c'), ('4','d','d'), ('5','e', 'e'), ('6','f', 'f'), ('7','g','g'), ('8','h','h'), ('9','i', 'i'), ('10','j','j'), ('11','k','k'), ('12','l','l'), ('13','m','m'), ('14','n','n'), ('15','o','o'); -set sort_buffer_size=517*30; +set sort_buffer_size=524*15; select c from t1 order by a,b; c a @@ -3496,5 +3495,305 @@ f g h i +set sort_buffer_size= default; set sql_mode= @save_sql_mode; drop table t1; +# +# MDEV-21580: Allow packed sort keys in sort buffer +# +# +# This example should not pack sort keys +# all fields are fixed-size fields in the ORDER BY clause +# +create table t1 (a bigint, b bigint, c bigint); +insert into t1 select seq, seq, seq from seq_1_to_100; +# in r_sort_mode it should show sort_key and not packed_sort_key +ANALYZE FORMAT=JSON select * from t1 order by a,b,c; +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "read_sorted_file": { + "r_rows": 100, + "filesort": { + "sort_key": "t1.a, t1.b, t1.c", + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "r_used_priority_queue": false, + "r_output_rows": 100, + "r_buffer_size": "REPLACED", + "r_sort_mode": "sort_key,packed_addon_fields", + "table": { + "table_name": "t1", + "access_type": "ALL", + "r_loops": 1, + "rows": 100, + "r_rows": 100, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + } + } + } + } +} +select * from t1 order by a,b,c; +a b c +1 1 1 +2 2 2 +3 3 3 +4 4 4 +5 5 5 +6 6 6 +7 7 7 +8 8 8 +9 9 9 +10 10 10 +11 11 11 +12 12 12 +13 13 13 +14 14 14 +15 15 15 +16 16 16 +17 17 17 +18 18 18 +19 19 19 +20 20 20 +21 21 21 +22 22 22 +23 23 23 +24 24 24 +25 25 25 +26 26 26 +27 27 27 +28 28 28 +29 29 29 +30 30 30 +31 31 31 +32 32 32 +33 33 33 +34 34 34 +35 35 35 +36 36 36 +37 37 37 +38 38 38 +39 39 39 +40 40 40 +41 41 41 +42 42 42 +43 43 43 +44 44 44 +45 45 45 +46 46 46 +47 47 47 +48 48 48 +49 49 49 +50 50 50 +51 51 51 +52 52 52 +53 53 53 +54 54 54 +55 55 55 +56 56 56 +57 57 57 +58 58 58 +59 59 59 +60 60 60 +61 61 61 +62 62 62 +63 63 63 +64 64 64 +65 65 65 +66 66 66 +67 67 67 +68 68 68 +69 69 69 +70 70 70 +71 71 71 +72 72 72 +73 73 73 +74 74 74 +75 75 75 +76 76 76 +77 77 77 +78 78 78 +79 79 79 +80 80 80 +81 81 81 +82 82 82 +83 83 83 +84 84 84 +85 85 85 +86 86 86 +87 87 87 +88 88 88 +89 89 89 +90 90 90 +91 91 91 +92 92 92 +93 93 93 +94 94 94 +95 95 95 +96 96 96 +97 97 97 +98 98 98 +99 99 99 +100 100 100 +drop table t1; +# +# Test with Binary columns (using suffix length to determine ordering) +# Should show packed_sortkey in the r_sort_mode +# +create table t1 (a int, b blob); +set @save_max_sort_length= @@max_sort_length; +insert into t1 select 1, CONCAT(repeat('a', @save_max_sort_length), 'A'); +insert into t1 select 2, CONCAT(repeat('a', @save_max_sort_length), 'AB'); +insert into t1 select 3, CONCAT(repeat('a', @save_max_sort_length), 'ABE'); +insert into t1 select 4, CONCAT(repeat('a', @save_max_sort_length), 'APBX'); +insert into t1 select 5, CONCAT(repeat('a', @save_max_sort_length), 'ABAAX'); +show variables like '%sort_buffer_size'; +Variable_name Value +aria_sort_buffer_size 268434432 +myisam_sort_buffer_size 134216704 +sort_buffer_size 262144 +select a, substr(b, @save_max_sort_length+1) from t1 order by b desc; +a substr(b, @save_max_sort_length+1) +5 ABAAX +4 APBX +3 ABE +2 AB +1 A +analyze format=json +select a, substr(b, @save_max_sort_length+1) from t1 order by b desc; +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "read_sorted_file": { + "r_rows": 5, + "filesort": { + "sort_key": "t1.b desc", + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "r_used_priority_queue": false, + "r_output_rows": 5, + "r_buffer_size": "REPLACED", + "r_sort_mode": "packed_sort_key,rowid", + "table": { + "table_name": "t1", + "access_type": "ALL", + "r_loops": 1, + "rows": 5, + "r_rows": 5, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + } + } + } + } +} +drop table t1; +# +# Packing sort keys with complex collations +# +create table t1(a varchar(255) charset utf8, b int, c decimal); +insert into t1 values ('abc', 1, 1) , ('bcd', 2, 2), ('cde',3, 3); +insert into t1 values ('def', 4, 4) , ('efg', 5, 5), ('fgh', 6, 6); +# +# Should show packed_sortkey in the r_sort_mode +# +ANALYZE FORMAT=JSON select a, b, c from t1 order by a, b; +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "read_sorted_file": { + "r_rows": 6, + "filesort": { + "sort_key": "t1.a, t1.b", + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "r_used_priority_queue": false, + "r_output_rows": 6, + "r_buffer_size": "REPLACED", + "r_sort_mode": "packed_sort_key,rowid", + "table": { + "table_name": "t1", + "access_type": "ALL", + "r_loops": 1, + "rows": 6, + "r_rows": 6, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + } + } + } + } +} +select a, b, c from t1 order by a, b; +a b c +abc 1 1 +bcd 2 2 +cde 3 3 +def 4 4 +efg 5 5 +fgh 6 6 +set @save_max_sort_length= @@max_sort_length; +set max_sort_length=5; +# +# should show sortkey in r_sort_mode as the collation is complex and +# truncation is not possible +# +ANALYZE FORMAT=JSON select a, b, c from t1 order by a, b; +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "read_sorted_file": { + "r_rows": 6, + "filesort": { + "sort_key": "t1.a, t1.b", + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "r_used_priority_queue": false, + "r_output_rows": 6, + "r_buffer_size": "REPLACED", + "r_sort_mode": "sort_key,packed_addon_fields", + "table": { + "table_name": "t1", + "access_type": "ALL", + "r_loops": 1, + "rows": 6, + "r_rows": 6, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + } + } + } + } +} +select a, b, c from t1 order by a, b; +a b c +abc 1 1 +bcd 2 2 +cde 3 3 +def 4 4 +efg 5 5 +fgh 6 6 +set max_sort_length= @save_max_sort_length; +drop table t1; diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test index c283b805bee..d479529d51b 100644 --- a/mysql-test/main/order_by.test +++ b/mysql-test/main/order_by.test @@ -9,6 +9,7 @@ drop table if exists t1,t2,t3; --enable_warnings call mtr.add_suppression("Out of sort memory; increase server sort buffer size"); +--source include/have_sequence.inc # # Test old ORDER BY bug @@ -2299,17 +2300,89 @@ drop table t1,t2,t3,t4; set @save_sql_mode= @@sql_mode; set sql_mode= 'PAD_CHAR_TO_FULL_LENGTH'; -CREATE TABLE t1 ( a CHAR(255) charset utf8, - b CHAR(255) charset utf8, c TEXT); +CREATE TABLE t1 ( a CHAR(255), b CHAR(255), c TEXT); INSERT INTO t1 VALUES ('1','a', 'a'), ('2','b', 'b'), ('3','c', 'c'), ('4','d','d'), ('5','e', 'e'), ('6','f', 'f'), ('7','g','g'), ('8','h','h'), ('9','i', 'i'), ('10','j','j'), ('11','k','k'), ('12','l','l'), ('13','m','m'), ('14','n','n'), ('15','o','o'); -set sort_buffer_size=517*30; +set sort_buffer_size=524*15; select c from t1 order by a,b; - +set sort_buffer_size= default; set sql_mode= @save_sql_mode; +drop table t1; + +--echo # +--echo # MDEV-21580: Allow packed sort keys in sort buffer +--echo # + +--echo # +--echo # This example should not pack sort keys +--echo # all fields are fixed-size fields in the ORDER BY clause +--echo # + +create table t1 (a bigint, b bigint, c bigint); +insert into t1 select seq, seq, seq from seq_1_to_100; + +--echo # in r_sort_mode it should show sort_key and not packed_sort_key +--source include/analyze-format.inc +ANALYZE FORMAT=JSON select * from t1 order by a,b,c; +select * from t1 order by a,b,c; + +drop table t1; + +--echo # +--echo # Test with Binary columns (using suffix length to determine ordering) +--echo # Should show packed_sortkey in the r_sort_mode +--echo # + +create table t1 (a int, b blob); + +set @save_max_sort_length= @@max_sort_length; +insert into t1 select 1, CONCAT(repeat('a', @save_max_sort_length), 'A'); +insert into t1 select 2, CONCAT(repeat('a', @save_max_sort_length), 'AB'); +insert into t1 select 3, CONCAT(repeat('a', @save_max_sort_length), 'ABE'); +insert into t1 select 4, CONCAT(repeat('a', @save_max_sort_length), 'APBX'); +insert into t1 select 5, CONCAT(repeat('a', @save_max_sort_length), 'ABAAX'); + +show variables like '%sort_buffer_size'; + +select a, substr(b, @save_max_sort_length+1) from t1 order by b desc; +--source include/analyze-format.inc +analyze format=json +select a, substr(b, @save_max_sort_length+1) from t1 order by b desc; + +drop table t1; + +--echo # +--echo # Packing sort keys with complex collations +--echo # + +create table t1(a varchar(255) charset utf8, b int, c decimal); +insert into t1 values ('abc', 1, 1) , ('bcd', 2, 2), ('cde',3, 3); +insert into t1 values ('def', 4, 4) , ('efg', 5, 5), ('fgh', 6, 6); + +--echo # +--echo # Should show packed_sortkey in the r_sort_mode +--echo # + +--source include/analyze-format.inc +ANALYZE FORMAT=JSON select a, b, c from t1 order by a, b; +select a, b, c from t1 order by a, b; + +set @save_max_sort_length= @@max_sort_length; +set max_sort_length=5; + +--echo # +--echo # should show sortkey in r_sort_mode as the collation is complex and +--echo # truncation is not possible +--echo # + +--source include/analyze-format.inc +ANALYZE FORMAT=JSON select a, b, c from t1 order by a, b; +select a, b, c from t1 order by a, b; + +set max_sort_length= @save_max_sort_length; drop table t1; diff --git a/mysql-test/main/order_by_pack_big.result b/mysql-test/main/order_by_pack_big.result index 50c71420818..a7cf2436bcc 100644 --- a/mysql-test/main/order_by_pack_big.result +++ b/mysql-test/main/order_by_pack_big.result @@ -393,6 +393,104 @@ Sort_range 0 Sort_rows 10000 Sort_scan 1 set sort_buffer_size=default; +# +# CASE #1 Packed sort keys with addon fields +# +ALTER TABLE t3 ADD INDEX idx(names, address); +set sort_buffer_size= 2097152; +ANALYZE FORMAT=JSON SELECT id, names, address FROM t3 ORDER BY names, address; +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "read_sorted_file": { + "r_rows": 10000, + "filesort": { + "sort_key": "t3.`names`, t3.address", + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "r_used_priority_queue": false, + "r_output_rows": 10000, + "r_buffer_size": "REPLACED", + "r_sort_mode": "packed_sort_key,packed_addon_fields", + "table": { + "table_name": "t3", + "access_type": "ALL", + "r_loops": 1, + "rows": 10000, + "r_rows": 10000, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + } + } + } + } +} +flush status; +SELECT id, names, address INTO OUTFILE '$file1' FROM t3 ORDER BY names, address; +# Sort_merge_passes should be 0 +show status like '%sort%'; +Variable_name Value +Sort_merge_passes 0 +Sort_priority_queue_sorts 0 +Sort_range 0 +Sort_rows 10000 +Sort_scan 1 +SELECT id, names, address INTO OUTFILE '$file2' FROM t3 FORCE INDEX(idx) ORDER BY names, address; +# +# CASE #2 Packed sort keys and ROW_ID +# +set @save_max_length_for_sort_data=@@max_length_for_sort_data; +set max_length_for_sort_data= 300; +set sort_buffer_size= 1097152; +ANALYZE FORMAT=JSON SELECT id, names, address FROM t3 ORDER BY names, address; +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "read_sorted_file": { + "r_rows": 10000, + "filesort": { + "sort_key": "t3.`names`, t3.address", + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "r_used_priority_queue": false, + "r_output_rows": 10000, + "r_buffer_size": "REPLACED", + "r_sort_mode": "packed_sort_key,rowid", + "table": { + "table_name": "t3", + "access_type": "ALL", + "r_loops": 1, + "rows": 10000, + "r_rows": 10000, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + } + } + } + } +} +flush status; +SELECT id, names, address INTO OUTFILE '$file1' FROM t3 ORDER BY names, address; +# Sort_merge_passes should be 0 +show status like '%sort%'; +Variable_name Value +Sort_merge_passes 0 +Sort_priority_queue_sorts 0 +Sort_range 0 +Sort_rows 10000 +Sort_scan 1 +set @@max_length_for_sort_data=@save_max_length_for_sort_data; +set @@sort_buffer_size=default; set @@RAND_SEED1= @save_rand_seed1; set @@RAND_SEED2= @save_rand_seed2; drop function generate_normal_distribution_sample; diff --git a/mysql-test/main/order_by_pack_big.test b/mysql-test/main/order_by_pack_big.test index 14cd22acde4..dce7bcb905c 100644 --- a/mysql-test/main/order_by_pack_big.test +++ b/mysql-test/main/order_by_pack_big.test @@ -128,6 +128,54 @@ eval $query; show status like '%sort%'; set sort_buffer_size=default; +--echo # +--echo # CASE #1 Packed sort keys with addon fields +--echo # + +ALTER TABLE t3 ADD INDEX idx(names, address); + +let $file1 = `SELECT CONCAT(@@datadir, "t1.txt")`; +let $file2 = `SELECT CONCAT(@@datadir, "t2.txt")`; + +set sort_buffer_size= 2097152; +--source include/analyze-format.inc +eval ANALYZE FORMAT=JSON SELECT id, names, address FROM t3 ORDER BY names, address; +flush status; +evalp SELECT id, names, address INTO OUTFILE '$file1' FROM t3 ORDER BY names, address; + +--echo # Sort_merge_passes should be 0 +show status like '%sort%'; + +evalp SELECT id, names, address INTO OUTFILE '$file2' FROM t3 FORCE INDEX(idx) ORDER BY names, address; + +diff_files $file1 $file2; + +--remove_file $file1 + +--echo # +--echo # CASE #2 Packed sort keys and ROW_ID +--echo # + +set @save_max_length_for_sort_data=@@max_length_for_sort_data; +set max_length_for_sort_data= 300; + +set sort_buffer_size= 1097152; +--source include/analyze-format.inc +eval ANALYZE FORMAT=JSON SELECT id, names, address FROM t3 ORDER BY names, address; +flush status; +evalp SELECT id, names, address INTO OUTFILE '$file1' FROM t3 ORDER BY names, address; + +--echo # Sort_merge_passes should be 0 +show status like '%sort%'; + +diff_files $file1 $file2; + +--remove_file $file1 +--remove_file $file2 + +set @@max_length_for_sort_data=@save_max_length_for_sort_data; +set @@sort_buffer_size=default; + set @@RAND_SEED1= @save_rand_seed1; set @@RAND_SEED2= @save_rand_seed2; diff --git a/plugin/type_inet/sql_type_inet.cc b/plugin/type_inet/sql_type_inet.cc index 6803bdba434..e9c1c27269b 100644 --- a/plugin/type_inet/sql_type_inet.cc +++ b/plugin/type_inet/sql_type_inet.cc @@ -1355,9 +1355,9 @@ void Type_handler_inet6::Item_param_setup_conversion(THD *thd, } -void Type_handler_inet6::make_sort_key(uchar *to, Item *item, - const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const +void Type_handler_inet6::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const { DBUG_ASSERT(item->type_handler() == this); NativeBufferInet6 tmp; @@ -1377,10 +1377,33 @@ void Type_handler_inet6::make_sort_key(uchar *to, Item *item, memcpy(to, tmp.ptr(), tmp.length()); } +uint +Type_handler_inet6::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const +{ + DBUG_ASSERT(item->type_handler() == this); + NativeBufferInet6 tmp; + item->val_native_result(current_thd, &tmp); + if (item->maybe_null) + { + if (item->null_value) + { + *to++=0; + return 0; + } + *to++= 1; + } + DBUG_ASSERT(!item->null_value); + DBUG_ASSERT(Inet6::binary_length() == tmp.length()); + DBUG_ASSERT(Inet6::binary_length() == sort_field->length); + memcpy(to, tmp.ptr(), tmp.length()); + return tmp.length(); +} -void Type_handler_inet6::sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *attr) const +void Type_handler_inet6::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const { attr->length= Inet6::binary_length(); attr->suffix_length= 0; diff --git a/plugin/type_inet/sql_type_inet.h b/plugin/type_inet/sql_type_inet.h index b483ff94e8d..a13931a83a0 100644 --- a/plugin/type_inet/sql_type_inet.h +++ b/plugin/type_inet/sql_type_inet.h @@ -513,10 +513,14 @@ public: def->frm_unpack_basic(buffer); return def->frm_unpack_charset(share, buffer); } - void make_sort_key(uchar *to, Item *item, - const SORT_FIELD_ATTR *sort_field, Sort_param *param) + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; - void sortlength(THD *thd, + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + void sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *attr) const override; uint32 max_display_length(const Item *item) const override diff --git a/sql/bounded_queue.h b/sql/bounded_queue.h index cd710d835aa..07ab6dbaab9 100644 --- a/sql/bounded_queue.h +++ b/sql/bounded_queue.h @@ -59,7 +59,8 @@ public: */ typedef uint (*keymaker_function)(Sort_param *param, Key_type *to, - Element_type *from); + Element_type *from, + bool packing_keys); /** Function for comparing two keys. @@ -181,11 +182,12 @@ void Bounded_queue<Element_type, Key_type>::push(Element_type *element) { // Replace top element with new key, and re-order the queue. Key_type **pq_top= reinterpret_cast<Key_type **>(queue_top(&m_queue)); - (void)(*m_keymaker)(m_sort_param, *pq_top, element); + (void)(*m_keymaker)(m_sort_param, *pq_top, element, false); queue_replace_top(&m_queue); } else { // Insert new key into the queue. - (*m_keymaker)(m_sort_param, m_sort_keys[m_queue.elements], element); + (*m_keymaker)(m_sort_param, m_sort_keys[m_queue.elements], + element, false); queue_insert(&m_queue, reinterpret_cast<uchar*>(&m_sort_keys[m_queue.elements])); } diff --git a/sql/field.cc b/sql/field.cc index 1ce49b0bdfa..c9b29af1155 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1013,8 +1013,15 @@ CPP_UNNAMED_NS_END Static help functions *****************************************************************************/ +/* + @brief + Create a fixed size sort key part + + @param buff buffer where values are written + @param length fixed size of the sort column +*/ -void Field::make_sort_key(uchar *buff,uint length) +void Field::make_sort_key_part(uchar *buff,uint length) { if (maybe_null()) { @@ -1029,6 +1036,62 @@ void Field::make_sort_key(uchar *buff,uint length) } +/* + @brief + Create a packed sort key part + + @param buff buffer where values are written + @param sort_field sort column structure + + @retval + length of the bytes written, does not include the NULL bytes +*/ +uint +Field::make_packed_sort_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field) +{ + if (maybe_null()) + { + if (is_null()) + { + *buff++= 0; + return 0; // For NULL values don't write any data + } + *buff++=1; + } + sort_string(buff, sort_field->original_length); + return sort_field->original_length; +} + + +uint +Field_longstr::make_packed_sort_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field) +{ + if (maybe_null()) + { + if (is_null()) + { + *buff++= 0; + return 0; // For NULL values don't write any data + } + *buff++=1; + } + uchar *end= pack_sort_string(buff, sort_field); + return static_cast<int>(end-buff); +} + + +uchar* +Field_longstr::pack_sort_string(uchar *to, const SORT_FIELD_ATTR *sort_field) +{ + String buf; + val_str(&buf, &buf); + return to + sort_field->pack_sort_string(to, buf.lex_cstring(), + field_charset()); +} + + /** @brief Determine the relative position of the field value in a numeric interval @@ -5395,29 +5458,6 @@ static longlong read_native(const uchar *from, uint bytes) } #endif -static void store_lowendian(ulonglong num, uchar *to, uint bytes) -{ - switch(bytes) { - case 1: *to= (uchar)num; break; - case 2: int2store(to, num); break; - case 3: int3store(to, num); break; - case 4: int4store(to, num); break; - case 8: int8store(to, num); break; - default: DBUG_ASSERT(0); - } -} - -static longlong read_lowendian(const uchar *from, uint bytes) -{ - switch(bytes) { - case 1: return from[0]; - case 2: return uint2korr(from); - case 3: return uint3korr(from); - case 4: return uint4korr(from); - case 8: return sint8korr(from); - default: DBUG_ASSERT(0); return 0; - } -} void Field_timestamp_hires::store_TIMEVAL(const timeval &tv) { @@ -8526,8 +8566,15 @@ Binlog_type_info Field_blob::binlog_type_info() const uint32 Field_blob::sort_length() const { - return (uint32) (get_thd()->variables.max_sort_length + - (field_charset() == &my_charset_bin ? 0 : packlength)); + return packlength == 4 ? + UINT_MAX32 : + (uint32) field_length + sort_suffix_length(); +} + + +uint32 Field_blob::sort_suffix_length() const +{ + return field_charset() == &my_charset_bin ? packlength : 0; } diff --git a/sql/field.h b/sql/field.h index 4a8eec35b05..71e9b3a2128 100644 --- a/sql/field.h +++ b/sql/field.h @@ -51,6 +51,8 @@ class Table_ident; class SEL_ARG; class RANGE_OPT_PARAM; struct KEY_PART; +struct SORT_FIELD; +struct SORT_FIELD_ATTR; enum enum_check_fields { @@ -1075,6 +1077,12 @@ public: virtual uint32 data_length() { return pack_length(); } virtual uint32 sort_length() const { return pack_length(); } + /* + sort_suffix_length() return the length bytes needed to store the length + for binary charset + */ + virtual uint32 sort_suffix_length() const { return 0; } + /* Get the number bytes occupied by the value in the field. CHAR values are stripped of trailing spaces. @@ -1410,7 +1418,18 @@ public: return bytes; } - void make_sort_key(uchar *buff, uint length); + /* + Create mem-comparable sort key part for a sort key + */ + void make_sort_key_part(uchar *buff, uint length); + + /* + create a compact sort key which can be compared with a comparison + function. They are called packed sort keys + */ + virtual uint make_packed_sort_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field); + virtual void make_send_field(Send_field *); virtual void sort_string(uchar *buff,uint length)=0; virtual bool optimize_range(uint idx, uint part) const; @@ -2140,7 +2159,10 @@ public: bool can_optimize_range(const Item_bool_func *cond, const Item *item, bool is_eq_func) const; - bool is_packable() { return true; } + bool is_packable() override { return true; } + uint make_packed_sort_key_part(uchar *buff, + const SORT_FIELD_ATTR *sort_field)override; + uchar* pack_sort_string(uchar *to, const SORT_FIELD_ATTR *sort_field); }; /* base class for float and double and decimal (old one) */ @@ -4042,8 +4064,11 @@ public: uint32 key_length() const override { return (uint32) field_length; } uint32 sort_length() const override { - return (uint32) field_length + (field_charset() == &my_charset_bin ? - length_bytes : 0); + return (uint32) field_length + sort_suffix_length(); + } + virtual uint32 sort_suffix_length() const override + { + return (field_charset() == &my_charset_bin ? length_bytes : 0); } Copy_func *get_copy_func(const Field *from) const override; bool memcpy_field_possible(const Field *from) const override; @@ -4187,6 +4212,30 @@ static inline longlong read_bigendian(const uchar *from, uint bytes) } } +static inline void store_lowendian(ulonglong num, uchar *to, uint bytes) +{ + switch(bytes) { + case 1: *to= (uchar)num; break; + case 2: int2store(to, num); break; + case 3: int3store(to, num); break; + case 4: int4store(to, num); break; + case 8: int8store(to, num); break; + default: DBUG_ASSERT(0); + } +} + +static inline longlong read_lowendian(const uchar *from, uint bytes) +{ + switch(bytes) { + case 1: return from[0]; + case 2: return uint2korr(from); + case 3: return uint3korr(from); + case 4: return uint4korr(from); + case 8: return sint8korr(from); + default: DBUG_ASSERT(0); return 0; + } +} + extern LEX_CSTRING temp_lex_str; @@ -4353,6 +4402,7 @@ public: { return (uint32) (packlength); } uint row_pack_length() const override { return pack_length_no_ptr(); } uint32 sort_length() const override; + uint32 sort_suffix_length() const override; uint32 value_length() override { return get_length(); } virtual uint32 max_data_length() const override { diff --git a/sql/filesort.cc b/sql/filesort.cc index 34fecd516b4..56f060b8c26 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -48,13 +48,18 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, ha_rows *found_rows); static bool write_keys(Sort_param *param, SORT_INFO *fs_info, uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile); -static uint make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos); +static uint make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos, + bool using_packed_sortkeys= false); +static uint make_sortkey(Sort_param *param, uchar *to); +static uint make_packed_sortkey(Sort_param *param, uchar *to); + static void register_used_fields(Sort_param *param); static bool save_index(Sort_param *param, uint count, SORT_INFO *table_sort); static uint suffix_length(ulong string_length); -static uint sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length, - bool *multi_byte_charset); +static uint sortlength(THD *thd, Sort_keys *sortorder, + bool *multi_byte_charset, + bool *allow_packing_for_sortkeys); static Addon_fields *get_addon_fields(TABLE *table, uint sortlength, uint *addon_length, uint *m_packable_length); @@ -62,7 +67,7 @@ static Addon_fields *get_addon_fields(TABLE *table, uint sortlength, static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info, TABLE *table, ha_rows records, size_t memory_available); - +// @param sortlen [Maximum] length of the sort key void Sort_param::init_for_filesort(uint sortlen, TABLE *table, ha_rows maxrows, bool sort_positions) { @@ -108,7 +113,7 @@ void Sort_param::try_to_pack_addons(ulong max_length_for_sort_data) if (!Addon_fields::can_pack_addon_fields(res_length)) return; - const uint sz= Addon_fields::size_of_length_field;; + const uint sz= Addon_fields::size_of_length_field; if (rec_length + sz > max_length_for_sort_data) return; @@ -125,6 +130,7 @@ void Sort_param::try_to_pack_addons(ulong max_length_for_sort_data) addon_fields->set_using_packed_addons(true); m_using_packed_addons= true; + m_packed_format= true; addon_length+= sz; res_length+= sz; @@ -171,18 +177,21 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, ha_rows num_rows= HA_POS_ERROR; IO_CACHE tempfile, buffpek_pointers, *outfile; Sort_param param; - bool multi_byte_charset; + bool multi_byte_charset, allow_packing_for_sortkeys; Bounded_queue<uchar, uchar> pq; SQL_SELECT *const select= filesort->select; ha_rows max_rows= filesort->limit; uint s_length= 0; + Sort_keys *sort_keys; DBUG_ENTER("filesort"); - if (!(s_length= filesort->make_sortorder(thd, join, first_table_bit))) + if (!(sort_keys= filesort->make_sortorder(thd, join, first_table_bit))) DBUG_RETURN(NULL); /* purecov: inspected */ - DBUG_EXECUTE("info",TEST_filesort(filesort->sortorder,s_length);); + s_length= static_cast<uint>(sort_keys->size()); + + DBUG_EXECUTE("info",TEST_filesort(filesort->sortorder, s_length);); #ifdef SKIP_DBUG_IN_FILESORT DBUG_PUSH(""); /* No DBUG here */ #endif @@ -215,16 +224,14 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, error= 1; sort->found_rows= HA_POS_ERROR; - param.init_for_filesort(sortlength(thd, filesort->sortorder, s_length, - &multi_byte_charset), - table, max_rows, filesort->sort_positions); + param.sort_keys= sort_keys; + uint sort_len= sortlength(thd, sort_keys, &multi_byte_charset, + &allow_packing_for_sortkeys); - sort->addon_fields= param.addon_fields; + param.init_for_filesort(sort_len, table, max_rows, filesort->sort_positions); - if (multi_byte_charset && - !(param.tmp_buffer= (char*) my_malloc(param.sort_length, - MYF(MY_WME | MY_THREAD_SPECIFIC)))) - goto err; + sort->addon_fields= param.addon_fields; + sort->sort_keys= param.sort_keys; if (select && select->quick) thd->inc_status_sort_range(); @@ -245,6 +252,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, tracker->incr_pq_used(); param.using_pq= true; const size_t compare_length= param.sort_length; + DBUG_ASSERT(param.using_packed_sortkeys() == false); /* For PQ queries (with limit) we know exactly how many pointers/records we have in the buffer, so to simplify things, we initialize @@ -271,9 +279,19 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, { DBUG_PRINT("info", ("filesort PQ is not applicable")); + if (allow_packing_for_sortkeys) + param.try_to_pack_sortkeys(); + param.try_to_pack_addons(thd->variables.max_length_for_sort_data); + tracker->report_sort_keys_format(param.using_packed_sortkeys()); param.using_pq= false; + if ((multi_byte_charset || param.using_packed_sortkeys()) && + !(param.tmp_buffer= (char*) my_malloc(param.sort_length, + MYF(MY_WME | MY_THREAD_SPECIFIC)))) + goto err; + + size_t min_sort_memory= MY_MAX(MIN_SORT_MEMORY, param.sort_length*MERGEBUFF2); set_if_bigger(min_sort_memory, sizeof(Merge_chunk*)*MERGEBUFF2); @@ -485,24 +503,42 @@ void Filesort::cleanup() } -uint Filesort::make_sortorder(THD *thd, JOIN *join, table_map first_table_bit) +/* + Create the Sort_keys array and fill the sort_keys[i]->{item|field}. + + This indicates which field/item values will be used as sort keys. + Attributes like lengths are not filled yet. +*/ + +Sort_keys* +Filesort::make_sortorder(THD *thd, JOIN *join, table_map first_table_bit) { uint count; SORT_FIELD *sort,*pos; ORDER *ord; DBUG_ENTER("make_sortorder"); - count=0; for (ord = order; ord; ord= ord->next) count++; - if (!sortorder) - sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * (count + 1)); + + if (sortorder) + DBUG_RETURN(sort_keys); + + DBUG_ASSERT(sort_keys == NULL); + + sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * count); pos= sort= sortorder; if (!pos) DBUG_RETURN(0); + sort_keys= new Sort_keys(sortorder, count); + + if (!sort_keys) + DBUG_RETURN(0); + + pos= sort_keys->begin(); for (ord= order; ord; ord= ord->next, pos++) { Item *first= ord->item[0]; @@ -543,7 +579,7 @@ uint Filesort::make_sortorder(THD *thd, JOIN *join, table_map first_table_bit) pos->reverse= (ord->direction == ORDER::ORDER_DESC); DBUG_ASSERT(pos->field != NULL || pos->item != NULL); } - DBUG_RETURN(count); + DBUG_RETURN(sort_keys); } @@ -752,7 +788,7 @@ static void dbug_print_record(TABLE *table, bool print_rowid) static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, SORT_INFO *fs_info, - IO_CACHE *buffpek_pointers, + IO_CACHE *buffpek_pointers, IO_CACHE *tempfile, Bounded_queue<uchar, uchar> *pq, ha_rows *found_rows) @@ -765,7 +801,9 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, MY_BITMAP *save_read_set, *save_write_set; Item *sort_cond; ha_rows num_records= 0; - const bool packed_addon_fields= param->using_packed_addons(); + const bool packed_format= param->is_packed_format(); + const bool using_packed_sortkeys= param->using_packed_sortkeys(); + DBUG_ENTER("find_all_keys"); DBUG_PRINT("info",("using: %s", (select ? select->quick ? "ranges" : "where": @@ -821,6 +859,7 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, } DEBUG_SYNC(thd, "after_index_merge_phase1"); + for (;;) { if (quick_select) @@ -888,8 +927,9 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, fs_info->init_next_record_pointer(); uchar *start_of_rec= fs_info->get_next_record_pointer(); - const uint rec_sz= make_sortkey(param, start_of_rec, ref_pos); - if (packed_addon_fields && rec_sz != param->rec_length) + const uint rec_sz= make_sortkey(param, start_of_rec, + ref_pos, using_packed_sortkeys); + if (packed_format && rec_sz != param->rec_length) fs_info->adjust_next_record_pointer(rec_sz); idx++; } @@ -970,12 +1010,9 @@ static bool write_keys(Sort_param *param, SORT_INFO *fs_info, uint count, IO_CACHE *buffpek_pointers, IO_CACHE *tempfile) { - size_t rec_length; Merge_chunk buffpek; DBUG_ENTER("write_keys"); - rec_length= param->rec_length; - fs_info->sort_buffer(param, count); if (!my_b_inited(tempfile) && @@ -991,19 +1028,12 @@ write_keys(Sort_param *param, SORT_INFO *fs_info, uint count, count=(uint) param->max_rows; /* purecov: inspected */ buffpek.set_rowcount(static_cast<ha_rows>(count)); - const bool packed_addon_fields= param->using_packed_addons(); for (uint ix= 0; ix < count; ++ix) { uchar *record= fs_info->get_sorted_record(ix); - if (packed_addon_fields) - { - rec_length= param->sort_length + - Addon_fields::read_addon_length(record + param->sort_length); - } - else - rec_length= param->rec_length; - if (my_b_write(tempfile, record, rec_length)) + + if (my_b_write(tempfile, record, param->get_record_length(record))) DBUG_RETURN(1); /* purecov: inspected */ } @@ -1016,10 +1046,9 @@ write_keys(Sort_param *param, SORT_INFO *fs_info, uint count, /** - Store length as suffix in high-byte-first order. + Store length in high-byte-first order. */ - -static inline void store_length(uchar *to, uint length, uint pack_length) +void store_length(uchar *to, uint length, uint pack_length) { switch (pack_length) { case 1: @@ -1039,9 +1068,9 @@ static inline void store_length(uchar *to, uint length, uint pack_length) void -Type_handler_string_result::make_sort_key(uchar *to, Item *item, - const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const +Type_handler_string_result::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const { CHARSET_INFO *cs= item->collation.collation; bool maybe_null= item->maybe_null; @@ -1111,9 +1140,9 @@ Type_handler_string_result::make_sort_key(uchar *to, Item *item, void -Type_handler_int_result::make_sort_key(uchar *to, Item *item, - const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const +Type_handler_int_result::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const { longlong value= item->val_int_result(); make_sort_key_longlong(to, item->maybe_null, item->null_value, @@ -1122,7 +1151,7 @@ Type_handler_int_result::make_sort_key(uchar *to, Item *item, void -Type_handler_temporal_result::make_sort_key(uchar *to, Item *item, +Type_handler_temporal_result::make_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, Sort_param *param) const { @@ -1144,7 +1173,7 @@ Type_handler_temporal_result::make_sort_key(uchar *to, Item *item, void -Type_handler_timestamp_common::make_sort_key(uchar *to, Item *item, +Type_handler_timestamp_common::make_sort_key_part(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, Sort_param *param) const { @@ -1177,6 +1206,24 @@ Type_handler_timestamp_common::make_sort_key(uchar *to, Item *item, void +Type_handler::store_sort_key_longlong(uchar *to, bool unsigned_flag, + longlong value) const +{ + to[7]= (uchar) value; + to[6]= (uchar) (value >> 8); + to[5]= (uchar) (value >> 16); + to[4]= (uchar) (value >> 24); + to[3]= (uchar) (value >> 32); + to[2]= (uchar) (value >> 40); + to[1]= (uchar) (value >> 48); + if (unsigned_flag) /* Fix sign */ + to[0]= (uchar) (value >> 56); + else + to[0]= (uchar) (value >> 56) ^ 128; /* Reverse signbit */ +} + + +void Type_handler::make_sort_key_longlong(uchar *to, bool maybe_null, bool null_value, @@ -1193,24 +1240,35 @@ Type_handler::make_sort_key_longlong(uchar *to, } *to++= 1; } - to[7]= (uchar) value; - to[6]= (uchar) (value >> 8); - to[5]= (uchar) (value >> 16); - to[4]= (uchar) (value >> 24); - to[3]= (uchar) (value >> 32); - to[2]= (uchar) (value >> 40); - to[1]= (uchar) (value >> 48); - if (unsigned_flag) /* Fix sign */ - to[0]= (uchar) (value >> 56); - else - to[0]= (uchar) (value >> 56) ^ 128; /* Reverse signbit */ + store_sort_key_longlong(to, unsigned_flag, value); +} + + +uint +Type_handler::make_packed_sort_key_longlong(uchar *to, bool maybe_null, + bool null_value, bool unsigned_flag, + longlong value, + const SORT_FIELD_ATTR *sort_field) const +{ + if (maybe_null) + { + if (null_value) + { + *to++= 0; + return 0; + } + *to++= 1; + } + store_sort_key_longlong(to, unsigned_flag, value); + DBUG_ASSERT(sort_field->original_length == sort_field->length); + return sort_field->original_length; } void -Type_handler_decimal_result::make_sort_key(uchar *to, Item *item, - const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const +Type_handler_decimal_result::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const { my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf); if (item->maybe_null) @@ -1228,9 +1286,9 @@ Type_handler_decimal_result::make_sort_key(uchar *to, Item *item, void -Type_handler_real_result::make_sort_key(uchar *to, Item *item, - const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const +Type_handler_real_result::make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const { double value= item->val_result(); if (item->maybe_null) @@ -1248,48 +1306,14 @@ Type_handler_real_result::make_sort_key(uchar *to, Item *item, /** Make a sort-key from record. */ -static uint make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos) +static uint make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos, + bool using_packed_sortkeys) { - Field *field; - SORT_FIELD *sort_field; - uint length; uchar *orig_to= to; - for (sort_field=param->local_sortorder.begin() ; - sort_field != param->local_sortorder.end() ; - sort_field++) - { - bool maybe_null=0; - if ((field=sort_field->field)) - { // Field - field->make_sort_key(to, sort_field->length); - if ((maybe_null = field->maybe_null())) - to++; - } - else - { // Item - sort_field->item->type_handler()->make_sort_key(to, sort_field->item, - sort_field, param); - if ((maybe_null= sort_field->item->maybe_null)) - to++; - } - if (sort_field->reverse) - { /* Revers key */ - if (maybe_null && (to[-1]= !to[-1])) - { - to+= sort_field->length; // don't waste the time reversing all 0's - continue; - } - length=sort_field->length; - while (length--) - { - *to = (uchar) (~ *to); - to++; - } - } - else - to+= sort_field->length; - } + to+= using_packed_sortkeys ? + make_packed_sortkey(param, to) : + make_sortkey(param, to); if (param->using_addon_fields()) { @@ -1385,7 +1409,7 @@ static void register_used_fields(Sort_param *param) static bool save_index(Sort_param *param, uint count, SORT_INFO *table_sort) { - uint offset,res_length; + uint offset,res_length, length; uchar *to; DBUG_ENTER("save_index"); DBUG_ASSERT(table_sort->record_pointers == 0); @@ -1399,6 +1423,7 @@ static bool save_index(Sort_param *param, uint count, DBUG_RETURN(0); } + bool using_packed_sortkeys= param->using_packed_sortkeys(); res_length= param->res_length; offset= param->rec_length-res_length; if (!(to= table_sort->record_pointers= @@ -1408,7 +1433,11 @@ static bool save_index(Sort_param *param, uint count, for (uint ix= 0; ix < count; ++ix) { uchar *record= table_sort->get_sorted_record(ix); - memcpy(to, record + offset, res_length); + + length= using_packed_sortkeys ? + Sort_keys::read_sortkey_length(record) : offset; + + memcpy(to, record + length, res_length); to+= res_length; } DBUG_RETURN(0); @@ -1611,7 +1640,7 @@ cleanup: */ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, - Sort_param *param) + Sort_param *param, bool packed_format) { ha_rows count; uint rec_length= param->rec_length; @@ -1619,7 +1648,7 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, if ((count= MY_MIN(buffpek->max_keys(),buffpek->rowcount()))) { size_t bytes_to_read; - if (param->using_packed_addons()) + if (packed_format) { count= buffpek->rowcount(); bytes_to_read= MY_MIN(buffpek->buffer_size(), @@ -1634,26 +1663,43 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, return ((ulong) -1); size_t num_bytes_read; - if (param->using_packed_addons()) + + if (packed_format) { /* The last record read is most likely not complete here. We need to loop through all the records, reading the length fields, and then "chop off" the final incomplete record. - */ + */ uchar *record= buffpek->buffer_start(); uint ix= 0; + uint size_of_addon_length= param->using_packed_addons() ? + Addon_fields::size_of_length_field : 0; + + uint size_of_sort_length= param->using_packed_sortkeys() ? + Sort_keys::size_of_length_field : 0; + for (; ix < count; ++ix) { - if (record + param->sort_length + Addon_fields::size_of_length_field > + if (record + size_of_sort_length > buffpek->buffer_end()) + break; + uint sort_length= param->using_packed_sortkeys() ? + Sort_keys::read_sortkey_length(record) : + param->sort_length; + + DBUG_ASSERT(sort_length <= param->sort_length); + + if (record + sort_length + size_of_addon_length > buffpek->buffer_end()) break; // Incomplete record. - uchar *plen= record + param->sort_length; - uint res_length= Addon_fields::read_addon_length(plen); + + uchar *plen= record + sort_length; + uint res_length= param->get_result_length(plen); if (plen + res_length > buffpek->buffer_end()) break; // Incomplete record. DBUG_ASSERT(res_length > 0); - record+= param->sort_length; + DBUG_ASSERT(sort_length + res_length <= param->rec_length); + record+= sort_length; record+= res_length; } DBUG_ASSERT(ix > 0); @@ -1710,7 +1756,10 @@ void reuse_freed_buff(QUEUE *queue, Merge_chunk *reuse, uint key_length) @param lastbuff OUT Store here BUFFPEK describing data written to to_file @param Fb First element in source BUFFPEKs array @param Tb Last element in source BUFFPEKs array - @param flag + @param flag 0 <=> write {sort_key, addon_fields} pairs as further + sorting will be performed + 1 <=> write just addon_fields as this is the final + merge pass @retval 0 OK @@ -1754,6 +1803,11 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, (flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length); uint wr_len= flag ? res_length : rec_length; uint wr_offset= flag ? offset : 0; + + const bool using_packed_sortkeys= param->using_packed_sortkeys(); + bool offset_for_packing= (flag == 1 && using_packed_sortkeys); + const bool packed_format= param->is_packed_format(); + maxcount= (ulong) (param->max_keys_per_buffer/((uint) (Tb-Fb) +1)); to_start_filepos= my_b_tell(to_file); strpos= sort_buffer.array(); @@ -1768,8 +1822,8 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, } else { - cmp= get_ptr_compare(sort_length); - first_cmp_arg= (void*) &sort_length; + cmp= param->get_compare_function(); + first_cmp_arg= param->get_compare_argument(&sort_length); } if (unlikely(init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(Merge_chunk,m_current_key), 0, @@ -1781,7 +1835,7 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, strpos + (sort_buffer.size()/((uint) (Tb-Fb) +1))); buffpek->set_max_keys(maxcount); - bytes_read= read_to_buffer(from_file, buffpek, param); + bytes_read= read_to_buffer(from_file, buffpek, param, packed_format); if (unlikely(bytes_read == (ulong) -1)) goto err; /* purecov: inspected */ strpos+= bytes_read; @@ -1808,7 +1862,7 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, if (buffpek->mem_count() == 0) { if (unlikely(!(bytes_read= read_to_buffer(from_file, buffpek, - param)))) + param, packed_format)))) { (void) queue_remove_top(&queue); reuse_freed_buff(&queue, buffpek, rec_length); @@ -1866,7 +1920,11 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, */ if (!check_dupl_count || dupl_count >= min_dupl_count) { - if (my_b_write(to_file, src + wr_offset, bytes_to_write)) + if(my_b_write(to_file, + src + (offset_for_packing ? + rec_length - res_length : // sort length + wr_offset), + bytes_to_write)) goto err; /* purecov: inspected */ } if (cmp) @@ -1889,7 +1947,7 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, if (buffpek->mem_count() == 0) { if (unlikely(!(bytes_read= read_to_buffer(from_file, buffpek, - param)))) + param, packed_format)))) { (void) queue_remove_top(&queue); reuse_freed_buff(&queue, buffpek, rec_length); @@ -1949,7 +2007,8 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, max_rows-= buffpek->mem_count(); for (uint ix= 0; ix < buffpek->mem_count(); ++ix) { - param->get_rec_and_res_len(buffpek->current_key(), + uchar *src= buffpek->current_key(); + param->get_rec_and_res_len(src, &rec_length, &res_length); const uint bytes_to_write= (flag == 0) ? rec_length : res_length; if (check_dupl_count) @@ -1960,15 +2019,18 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, if (dupl_count < min_dupl_count) continue; } - if (my_b_write(to_file, buffpek->current_key() + wr_offset, - bytes_to_write)) + if(my_b_write(to_file, + src + (offset_for_packing ? + rec_length - res_length : // sort length + wr_offset), + bytes_to_write)) goto err; buffpek->advance_current_key(rec_length); } } while (likely(!(error= - (bytes_read= read_to_buffer(from_file, buffpek, - param)) == (ulong) -1)) && + (bytes_read= read_to_buffer(from_file, buffpek, param, + packed_format)) == (ulong) -1)) && bytes_read != 0); end: @@ -2013,13 +2075,14 @@ static uint suffix_length(ulong string_length) void -Type_handler_string_result::sortlength(THD *thd, +Type_handler_string_result::sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *sortorder) const { CHARSET_INFO *cs; sortorder->length= item->max_length; - set_if_smaller(sortorder->length, thd->variables.max_sort_length); + sortorder->original_length= item->max_length; + if (use_strnxfrm((cs= item->collation.collation))) { sortorder->length= (uint) cs->strnxfrmlen(sortorder->length); @@ -2029,54 +2092,57 @@ Type_handler_string_result::sortlength(THD *thd, /* Store length last to be able to sort blob/varbinary */ sortorder->suffix_length= suffix_length(sortorder->length); sortorder->length+= sortorder->suffix_length; + sortorder->original_length+= sortorder->suffix_length; } } void -Type_handler_temporal_result::sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *sortorder) const +Type_handler_temporal_result::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *sortorder) const { - sortorder->length= 8; // Sizof intern longlong + sortorder->original_length= sortorder->length= 8; // Sizof intern longlong } void -Type_handler_timestamp_common::sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *sortorder) const +Type_handler_timestamp_common::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *sortorder) const { sortorder->length= my_timestamp_binary_length(item->decimals); + sortorder->original_length= sortorder->length; } void -Type_handler_int_result::sortlength(THD *thd, +Type_handler_int_result::sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *sortorder) const { - sortorder->length= 8; // Sizof intern longlong + sortorder->original_length= sortorder->length= 8; // Sizof intern longlong } void -Type_handler_real_result::sortlength(THD *thd, +Type_handler_real_result::sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *sortorder) const { - sortorder->length= sizeof(double); + sortorder->original_length= sortorder->length= sizeof(double); } void -Type_handler_decimal_result::sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *sortorder) const +Type_handler_decimal_result::sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *sortorder) const { sortorder->length= my_decimal_get_binary_size(item->max_length - (item->decimals ? 1 : 0), item->decimals); + sortorder->original_length= sortorder->length; } @@ -2088,54 +2154,100 @@ Type_handler_decimal_result::sortlength(THD *thd, @param s_length Number of items to sort @param[out] multi_byte_charset Set to 1 if we are using multi-byte charset (In which case we have to use strxnfrm()) + @param allow_packing_for_sortkeys [out] set to false if packing sort keys is not + allowed @note - sortorder->length is updated for each sort item. + * sortorder->length and other members are updated for each sort item. + * TODO what is the meaning of this value if some fields are using packing while + others are not? @return Total length of sort buffer in bytes */ static uint -sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length, - bool *multi_byte_charset) +sortlength(THD *thd, Sort_keys *sort_keys, bool *multi_byte_charset, + bool *allow_packing_for_sortkeys) { uint length; *multi_byte_charset= 0; + *allow_packing_for_sortkeys= true; + bool allow_packing_for_keys= true; length=0; - for (; s_length-- ; sortorder++) + uint nullable_cols=0; + + for (SORT_FIELD *sortorder= sort_keys->begin(); + sortorder != sort_keys->end(); + sortorder++) { sortorder->suffix_length= 0; + sortorder->length_bytes= 0; if (sortorder->field) { + Field *field= sortorder->field; CHARSET_INFO *cs= sortorder->field->sort_charset(); sortorder->length= sortorder->field->sort_length(); + sortorder->suffix_length= sortorder->field->sort_suffix_length(); + sortorder->original_length= sortorder->length; + sortorder->type= field->is_packable() ? + SORT_FIELD_ATTR::VARIABLE_SIZE : + SORT_FIELD_ATTR::FIXED_SIZE; + sortorder->cs= cs; + if (use_strnxfrm((cs=sortorder->field->sort_charset()))) { *multi_byte_charset= true; sortorder->length= (uint) cs->strnxfrmlen(sortorder->length); } - if (sortorder->field->maybe_null()) - length++; // Place for NULL marker + if (sortorder->is_variable_sized() && allow_packing_for_keys) + { + allow_packing_for_keys= sortorder->check_if_packing_possible(thd); + sortorder->length_bytes= + number_storage_requirement(MY_MIN(sortorder->original_length, + thd->variables.max_sort_length)); + } + + if ((sortorder->maybe_null= sortorder->field->maybe_null())) + nullable_cols++; // Place for NULL marker } else { - sortorder->item->type_handler()->sortlength(thd, sortorder->item, - sortorder); - if (use_strnxfrm(sortorder->item->collation.collation)) + CHARSET_INFO *cs; + sortorder->item->type_handler()->sort_length(thd, sortorder->item, + sortorder); + sortorder->type= sortorder->item->type_handler()->is_packable() ? + SORT_FIELD_ATTR::VARIABLE_SIZE : + SORT_FIELD_ATTR::FIXED_SIZE; + if (use_strnxfrm((cs=sortorder->item->collation.collation))) { *multi_byte_charset= true; } - if (sortorder->item->maybe_null) - length++; // Place for NULL marker + sortorder->cs= cs; + if (sortorder->is_variable_sized() && allow_packing_for_keys) + { + allow_packing_for_keys= sortorder->check_if_packing_possible(thd); + sortorder->length_bytes= + number_storage_requirement(MY_MIN(sortorder->original_length, + thd->variables.max_sort_length)); + } + + if ((sortorder->maybe_null= sortorder->item->maybe_null)) + nullable_cols++; // Place for NULL marker } set_if_smaller(sortorder->length, thd->variables.max_sort_length); + set_if_smaller(sortorder->original_length, thd->variables.max_sort_length); length+=sortorder->length; + + sort_keys->increment_size_of_packable_fields(sortorder->length_bytes); + sort_keys->increment_original_sort_length(sortorder->original_length); } - sortorder->field= NULL; // end marker + // add bytes for nullable_cols + sort_keys->increment_original_sort_length(nullable_cols); + *allow_packing_for_sortkeys= allow_packing_for_keys; DBUG_PRINT("info",("sort_length: %d",length)); - return length; + return length + nullable_cols; } @@ -2289,23 +2401,6 @@ get_addon_fields(TABLE *table, uint sortlength, } -/** - Copy (unpack) values appended to sorted fields from a buffer back to - their regular positions specified by the Field::ptr pointers. - - @param addon_field Array of descriptors for appended fields - @param buff Buffer which to unpack the value from - - @note - The function is supposed to be used only as a callback function - when getting field values for the sorted result set. - - @return - void. -*/ - - - /* ** functions to change a double or float to a sortable string ** The following should work for IEEE @@ -2365,6 +2460,14 @@ void SORT_INFO::free_addon_buff() addon_fields->free_addon_buff(); } +/* + Check if packed sortkeys are used or not +*/ +bool SORT_INFO::using_packed_sortkeys() +{ + return sort_keys != NULL && sort_keys->using_packed_sortkeys(); +} + /** Free SORT_INFO */ @@ -2375,3 +2478,560 @@ SORT_INFO::~SORT_INFO() free_data(); DBUG_VOID_RETURN; } + + +void Sort_param::try_to_pack_sortkeys() +{ + #ifdef WITHOUT_PACKED_SORT_KEYS + return; + #endif + + uint size_of_packable_fields= sort_keys->get_size_of_packable_fields(); + + /* + Disable packing when all fields are fixed-size fields. + */ + if (size_of_packable_fields == 0) + return; + + const uint sz= Sort_keys::size_of_length_field; + uint sort_len= sort_keys->get_sort_length(); + + /* + Heuristic introduced, skip packing sort keys if saving less than 128 bytes + */ + + if (sort_len < 128 + sz + size_of_packable_fields) + return; + + sort_keys->set_using_packed_sortkeys(true); + m_packed_format= true; + m_using_packed_sortkeys= true; + sort_length= sort_len + sz + size_of_packable_fields + + (using_addon_fields() ? 0 : res_length); + /* Only the record length needs to be updated, the res_length does not need + to be updated + */ + rec_length= sort_length + addon_length; +} + + +uint +Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const +{ + CHARSET_INFO *cs= item->collation.collation; + bool maybe_null= item->maybe_null; + + if (maybe_null) + *to++= 1; + + String tmp(param->tmp_buffer, param->sort_length, cs); + String *res= item->str_result(&tmp); + if (!res) + { + if (maybe_null) + { + *(to-1)= 0; + return 0; + } + else + { + /* purecov: begin deadcode */ + /* + This should only happen during extreme conditions if we run out + of memory or have an item marked not null when it can be null. + This code is here mainly to avoid a hard crash in this case. + */ + DBUG_ASSERT(0); + DBUG_PRINT("warning", + ("Got null on something that shouldn't be null")); + memset(to, 0, sort_field->length); // Avoid crash + /* purecov: end */ + return sort_field->original_length; + } + } + return sort_field->pack_sort_string(to, res->lex_cstring(), cs); +} + + +uint +Type_handler_int_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const +{ + longlong value= item->val_int_result(); + return make_packed_sort_key_longlong(to, item->maybe_null, + item->null_value, item->unsigned_flag, + value, sort_field); +} + + +uint +Type_handler_decimal_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const +{ + my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf); + if (item->maybe_null) + { + if (item->null_value) + { + *to++=0; + return 0; + } + *to++= 1; + } + dec_val->to_binary(to, item->max_length - (item->decimals ? 1 : 0), + item->decimals); + DBUG_ASSERT(sort_field->original_length == sort_field->length); + return sort_field->original_length; +} + + +uint +Type_handler_real_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const +{ + double value= item->val_result(); + if (item->maybe_null) + { + if (item->null_value) + { + *to++=0; + return 0; + } + *to++= 1; + } + change_double_for_sort(value, to); + DBUG_ASSERT(sort_field->original_length == sort_field->length); + return sort_field->original_length; +} + + +uint +Type_handler_temporal_result::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const +{ + MYSQL_TIME buf; + // This is a temporal type. No nanoseconds. Rounding mode is not important. + DBUG_ASSERT(item->cmp_type() == TIME_RESULT); + static const Temporal::Options opt(TIME_INVALID_DATES, TIME_FRAC_NONE); + if (item->get_date_result(current_thd, &buf, opt)) + { + DBUG_ASSERT(item->maybe_null); + DBUG_ASSERT(item->null_value); + return make_packed_sort_key_longlong(to, item->maybe_null, true, + item->unsigned_flag, 0, sort_field); + } + return make_packed_sort_key_longlong(to, item->maybe_null, false, + item->unsigned_flag, pack_time(&buf), + sort_field); +} + + +uint +Type_handler_timestamp_common::make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const +{ + THD *thd= current_thd; + uint binlen= my_timestamp_binary_length(item->decimals); + Timestamp_or_zero_datetime_native_null native(thd, item); + if (native.is_null() || native.is_zero_datetime()) + { + // NULL or '0000-00-00 00:00:00' + if (item->maybe_null) + { + *to++=0; + return 0; + } + else + { + bzero(to, binlen); + return binlen; + } + } + else + { + if (item->maybe_null) + *to++= 1; + if (native.length() != binlen) + { + /* + Some items can return native representation with a different + number of fractional digits, e.g.: GREATEST(ts_3, ts_4) can + return a value with 3 fractional digits, although its fractional + precision is 4. Re-pack with a proper precision now. + */ + Timestamp(native).to_native(&native, item->datetime_precision(thd)); + } + DBUG_ASSERT(native.length() == binlen); + memcpy((char *) to, native.ptr(), binlen); + return binlen; + } +} + + +/* + @brief + Reverse the key for DESC clause + @param to buffer where values are written + @param maybe_null nullability of a column + @param sort_field Sort field structure + @details + used for mem-comparable sort keys +*/ + +void reverse_key(uchar *to, const SORT_FIELD_ATTR *sort_field) +{ + uint length; + if (sort_field->maybe_null && (to[-1]= !to[-1])) + { + to+= sort_field->length; // don't waste the time reversing all 0's + return; + } + length=sort_field->length; + while (length--) + { + *to = (uchar) (~ *to); + to++; + } +} + + +/* + @brief + Check if packing sort keys is allowed + @param THD thread structure + @retval + TRUE packing allowed + FALSE packing not allowed +*/ +bool SORT_FIELD_ATTR::check_if_packing_possible(THD *thd) const +{ + /* + Packing not allowed when original length is greater than max_sort_length + and we have a complex collation because cutting a prefix is not safe in + such a case + */ + if (original_length > thd->variables.max_sort_length && + cs->state & MY_CS_NON1TO1) + return false; + return true; +} + + +/* + Compare function used for packing sort keys +*/ + +qsort2_cmp get_packed_keys_compare_ptr() +{ + return (qsort2_cmp) compare_packed_sort_keys; +} + + +/* + Compare two varstrings. + + The strings are in this data format: + + [null_byte] [length of string + suffix_bytes] [the string] [suffix_bytes] + + suffix_bytes are used only for binary columns. +*/ + +int SORT_FIELD_ATTR::compare_packed_varstrings(uchar *a, size_t *a_len, + uchar *b, size_t *b_len) +{ + int retval; + size_t a_length, b_length; + if (maybe_null) + { + *a_len= *b_len= 1; // NULL bytes are always stored + if (*a != *b) + { + // Note we don't return a proper value in *{a|b}_len for the non-NULL + // value but that's ok + if (*a == 0) + return -1; + else + return 1; + } + else + { + if (*a == 0) + return 0; + } + a++; + b++; + } + else + *a_len= *b_len= 0; + + a_length= read_lowendian(a, length_bytes); + b_length= read_lowendian(b, length_bytes); + + *a_len+= length_bytes + a_length; + *b_len+= length_bytes + b_length; + + retval= cs->strnncollsp(a + length_bytes, + a_length - suffix_length, + b + length_bytes, + b_length - suffix_length); + + if (!retval && suffix_length) + { + DBUG_ASSERT(cs == &my_charset_bin); + // comparing the length stored in suffix bytes for binary strings + a= a + length_bytes + a_length - suffix_length; + b= b + length_bytes + b_length - suffix_length; + retval= memcmp(a, b, suffix_length); + } + + return retval; +} + + +/* + A value comparison function that has a signature that's suitable for + comparing packed values, but actually compares fixed-size values with memcmp. + + This is used for ordering fixed-size columns when the sorting procedure used + packed-value format. +*/ + +int SORT_FIELD_ATTR::compare_packed_fixed_size_vals(uchar *a, size_t *a_len, + uchar *b, size_t *b_len) +{ + if (maybe_null) + { + *a_len=1; + *b_len=1; + if (*a != *b) + { + if (*a == 0) + return -1; + else + return 1; + } + else + { + if (*a == 0) + return 0; + } + a++; + b++; + } + else + *a_len= *b_len= 0; + + *a_len+= length; + *b_len+= length; + return memcmp(a,b, length); +} + + +/* + @brief + Comparison function to compare two packed sort keys + + @param sort_param cmp argument + @param a_ptr packed sort key + @param b_ptr packed sort key + + @retval + >0 key a_ptr greater than b_ptr + =0 key a_ptr equal to b_ptr + <0 key a_ptr less than b_ptr + +*/ + +int compare_packed_sort_keys(void *sort_param, + unsigned char **a_ptr, unsigned char **b_ptr) +{ + int retval= 0; + size_t a_len, b_len; + Sort_param *param= (Sort_param*)sort_param; + Sort_keys *sort_keys= param->sort_keys; + uchar *a= *a_ptr; + uchar *b= *b_ptr; + + a+= Sort_keys::size_of_length_field; + b+= Sort_keys::size_of_length_field; + for (SORT_FIELD *sort_field= sort_keys->begin(); + sort_field != sort_keys->end(); sort_field++) + { + retval= sort_field->is_variable_sized() ? + sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) : + sort_field->compare_packed_fixed_size_vals(a, &a_len, b, &b_len); + + if (retval) + return sort_field->reverse ? -retval : retval; + + a+= a_len; + b+= b_len; + + } + /* + this comparison is done for the case when the sort keys is appended with + the ROW_ID pointer. For such cases we don't have addon fields + so we can make a memcmp check over both the sort keys + */ + if (!param->using_addon_fields()) + retval= memcmp(a, b, param->res_length); + return retval; +} + + +/* + @brief + Store a packed string in the buffer + + @param to buffer + @param str packed string value + @param cs character set + + @details + This function writes to the buffer the packed value of a key_part + of the sort key. + + The values written to the buffer are in this order + - value for null byte + - length of the string + - value of the string + - suffix length (for binary character set) +*/ + +uint +SORT_FIELD_ATTR::pack_sort_string(uchar *to, const LEX_CSTRING &str, + CHARSET_INFO *cs) const +{ + uchar *orig_to= to; + size_t length, data_length; + length= str.length; + + if (length + suffix_length <= original_length) + data_length= length; + else + data_length= original_length - suffix_length; + + // length stored in lowendian form + store_lowendian(data_length + suffix_length, to, length_bytes); + to+= length_bytes; + // copying data length bytes to the buffer + memcpy(to, (uchar*)str.str, data_length); + to+= data_length; + + if (cs == &my_charset_bin && suffix_length) + { + // suffix length stored in bigendian form + store_bigendian(str.length, to, suffix_length); + to+= suffix_length; + } + return static_cast<uint>(to - orig_to); +} + + +/* + @brief + Create a mem-comparable sort key + + @param param sort param structure + @param to buffer where values are written + + @retval + length of the bytes written including the NULL bytes +*/ + +static uint make_sortkey(Sort_param *param, uchar *to) +{ + Field *field; + SORT_FIELD *sort_field; + uchar *orig_to= to; + + for (sort_field=param->local_sortorder.begin() ; + sort_field != param->local_sortorder.end() ; + sort_field++) + { + bool maybe_null=0; + if ((field=sort_field->field)) + { + // Field + field->make_sort_key_part(to, sort_field->length); + if ((maybe_null= field->maybe_null())) + to++; + } + else + { // Item + sort_field->item->type_handler()->make_sort_key_part(to, + sort_field->item, + sort_field, param); + if ((maybe_null= sort_field->item->maybe_null)) + to++; + } + + if (sort_field->reverse) + reverse_key(to, sort_field); + to+= sort_field->length; + } + + DBUG_ASSERT(static_cast<uint>(to - orig_to) <= param->sort_length); + return static_cast<uint>(to - orig_to); +} + + +/* + @brief + create a compact sort key which can be compared with a comparison + function. They are called packed sort keys + + @param param sort param structure + @param to buffer where values are written + + @retval + length of the bytes written including the NULL bytes +*/ + +static uint make_packed_sortkey(Sort_param *param, uchar *to) +{ + Field *field; + SORT_FIELD *sort_field; + uint length; + uchar *orig_to= to; + + to+= Sort_keys::size_of_length_field; + + for (sort_field=param->local_sortorder.begin() ; + sort_field != param->local_sortorder.end() ; + sort_field++) + { + bool maybe_null=0; + if ((field=sort_field->field)) + { + // Field + length= field->make_packed_sort_key_part(to, sort_field); + if ((maybe_null= field->maybe_null())) + to++; + } + else + { // Item + Item *item= sort_field->item; + length= item->type_handler()->make_packed_sort_key_part(to, item, + sort_field, + param); + if ((maybe_null= sort_field->item->maybe_null)) + to++; + } + to+= length; + } + + length= static_cast<int>(to - orig_to); + DBUG_ASSERT(length <= param->sort_length); + Sort_keys::store_sortkey_length(orig_to, length); + return length; +} diff --git a/sql/filesort.h b/sql/filesort.h index 5102ee2326f..9f71da02c96 100644 --- a/sql/filesort.h +++ b/sql/filesort.h @@ -25,9 +25,12 @@ class THD; struct TABLE; class Filesort_tracker; struct SORT_FIELD; +struct SORT_FIELD_ATTR; typedef struct st_order ORDER; class JOIN; class Addon_fields; +class Sort_keys; + /** Sorting related info. @@ -57,6 +60,7 @@ public: bool sort_positions; Filesort_tracker *tracker; + Sort_keys *sort_keys; Filesort(ORDER *order_arg, ha_rows limit_arg, bool sort_positions_arg, SQL_SELECT *select_arg): @@ -66,14 +70,15 @@ public: select(select_arg), own_select(false), using_pq(false), - sort_positions(sort_positions_arg) + sort_positions(sort_positions_arg), + sort_keys(NULL) { DBUG_ASSERT(order); }; ~Filesort() { cleanup(); } /* Prepare ORDER BY list for sorting. */ - uint make_sortorder(THD *thd, JOIN *join, table_map first_table_bit); + Sort_keys* make_sortorder(THD *thd, JOIN *join, table_map first_table_bit); private: void cleanup(); @@ -88,6 +93,7 @@ class SORT_INFO public: SORT_INFO() :addon_fields(NULL), record_pointers(0), + sort_keys(NULL), sorted_result_in_fsbuf(FALSE) { buffpek.str= 0; @@ -121,6 +127,7 @@ public: LEX_STRING buffpek; /* Buffer for buffpek structures */ Addon_fields *addon_fields; /* Addon field descriptors */ uchar *record_pointers; /* If sorted in memory */ + Sort_keys *sort_keys; /* Sort key descriptors*/ /** If the entire result of filesort fits in memory, we skip the merge phase. @@ -201,6 +208,7 @@ public: template<bool Packed_addon_fields> inline void unpack_addon_fields(uchar *buff); + bool using_packed_sortkeys(); friend SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, Filesort_tracker* tracker, JOIN *join, @@ -216,5 +224,8 @@ bool filesort_use_addons(TABLE *table, uint sortlength, uint *m_packable_length); void change_double_for_sort(double nr,uchar *to); +void store_length(uchar *to, uint length, uint pack_length); +void +reverse_key(uchar *to, const SORT_FIELD_ATTR *sort_field); #endif /* FILESORT_INCLUDED */ diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc index 95b9dc51cf8..a3b493b4c7a 100644 --- a/sql/filesort_utils.cc +++ b/sql/filesort_utils.cc @@ -175,7 +175,8 @@ void Filesort_buffer::sort_buffer(const Sort_param *param, uint count) reverse_record_pointers(); uchar **buffer= NULL; - if (radixsort_is_appliccable(count, param->sort_length) && + if (!param->using_packed_sortkeys() && + radixsort_is_appliccable(count, param->sort_length) && (buffer= (uchar**) my_malloc(count*sizeof(char*), MYF(MY_THREAD_SPECIFIC)))) { @@ -183,6 +184,8 @@ void Filesort_buffer::sort_buffer(const Sort_param *param, uint count) my_free(buffer); return; } - - my_qsort2(m_sort_keys, count, sizeof(uchar*), get_ptr_compare(size), &size); + + my_qsort2(m_sort_keys, count, sizeof(uchar*), + param->get_compare_function(), + param->get_compare_argument(&size)); } diff --git a/sql/filesort_utils.h b/sql/filesort_utils.h index e8b93940abf..1962f149893 100644 --- a/sql/filesort_utils.h +++ b/sql/filesort_utils.h @@ -279,4 +279,8 @@ private: longlong m_idx; }; +int compare_packed_sort_keys(void *sort_keys, unsigned char **a, + unsigned char **b); +qsort2_cmp get_packed_keys_compare_ptr(); + #endif // FILESORT_UTILS_INCLUDED diff --git a/sql/records.cc b/sql/records.cc index 2b146abb005..e1766322e2f 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -39,7 +39,7 @@ static int rr_quick(READ_RECORD *info); int rr_sequential(READ_RECORD *info); static int rr_from_tempfile(READ_RECORD *info); template<bool> static int rr_unpack_from_tempfile(READ_RECORD *info); -template<bool> static int rr_unpack_from_buffer(READ_RECORD *info); +template<bool,bool> static int rr_unpack_from_buffer(READ_RECORD *info); int rr_from_pointers(READ_RECORD *info); static int rr_from_cache(READ_RECORD *info); static int init_rr_cache(THD *thd, READ_RECORD *info); @@ -190,6 +190,7 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, DBUG_ENTER("init_read_record"); const bool using_addon_fields= filesort && filesort->using_addon_fields(); + bool using_packed_sortkeys= filesort && filesort->using_packed_sortkeys(); bzero((char*) info,sizeof(*info)); info->thd=thd; @@ -287,10 +288,19 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, DBUG_PRINT("info",("using rr_unpack_from_buffer")); DBUG_ASSERT(filesort->sorted_result_in_fsbuf); info->unpack_counter= 0; + if (filesort->using_packed_addons()) - info->read_record_func= rr_unpack_from_buffer<true>; + { + info->read_record_func= using_packed_sortkeys ? + rr_unpack_from_buffer<true, true> : + rr_unpack_from_buffer<true, false>; + } else - info->read_record_func= rr_unpack_from_buffer<false>; + { + info->read_record_func= using_packed_sortkeys ? + rr_unpack_from_buffer<false, true> : + rr_unpack_from_buffer<false, false>; + } } else { @@ -626,7 +636,7 @@ int rr_from_pointers(READ_RECORD *info) -1 There is no record to be read anymore. */ -template<bool Packed_addon_fields> +template<bool Packed_addon_fields, bool Packed_sort_keys> static int rr_unpack_from_buffer(READ_RECORD *info) { if (info->unpack_counter == info->sort_info->return_rows) @@ -634,7 +644,12 @@ static int rr_unpack_from_buffer(READ_RECORD *info) uchar *record= info->sort_info->get_sorted_record( static_cast<uint>(info->unpack_counter)); - uchar *plen= record + info->sort_info->get_sort_length(); + + uint sort_length= Packed_sort_keys ? + Sort_keys::read_sortkey_length(record): + info->sort_info->get_sort_length(); + + uchar *plen= record + sort_length; info->sort_info->unpack_addon_fields<Packed_addon_fields>(plen); info->unpack_counter++; return 0; @@ -772,6 +787,19 @@ static int rr_cmp(uchar *a,uchar *b) #endif } + +/** + Copy (unpack) values appended to sorted fields from a buffer back to + their regular positions specified by the Field::ptr pointers. + + @param addon_field Array of descriptors for appended fields + @param buff Buffer which to unpack the value from + + @note + The function is supposed to be used only as a callback function + when getting field values for the sorted result set. + +*/ template<bool Packed_addon_fields> inline void SORT_INFO::unpack_addon_fields(uchar *buff) { diff --git a/sql/sql_analyze_stmt.cc b/sql/sql_analyze_stmt.cc index 2147d6d7ffc..2f87b9b0d40 100644 --- a/sql/sql_analyze_stmt.cc +++ b/sql/sql_analyze_stmt.cc @@ -86,7 +86,10 @@ void Filesort_tracker::print_json_members(Json_writer *writer) void Filesort_tracker::get_data_format(String *str) { - str->append("sort_key"); + if (r_sort_keys_packed) + str->append("packed_sort_key"); + else + str->append("sort_key"); str->append(","); if (r_using_addons) diff --git a/sql/sql_analyze_stmt.h b/sql/sql_analyze_stmt.h index dfe29517b63..bc7c60a318b 100644 --- a/sql/sql_analyze_stmt.h +++ b/sql/sql_analyze_stmt.h @@ -223,7 +223,8 @@ public: sort_passes(0), sort_buffer_size(0), r_using_addons(false), - r_packed_addon_fields(false) + r_packed_addon_fields(false), + r_sort_keys_packed(false) {} /* Functions that filesort uses to report various things about its execution */ @@ -271,6 +272,10 @@ public: r_using_addons= true; r_packed_addon_fields= addons_packed; } + inline void report_sort_keys_format(bool sort_keys_packed) + { + r_sort_keys_packed= sort_keys_packed; + } void get_data_format(String *str); @@ -334,6 +339,7 @@ private: ulonglong sort_buffer_size; bool r_using_addons; bool r_packed_addon_fields; + bool r_sort_keys_packed; }; diff --git a/sql/sql_class.h b/sql/sql_class.h index 939dc918003..d756b047ffe 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -6277,8 +6277,52 @@ public: /* Structs used when sorting */ struct SORT_FIELD_ATTR { - uint length; /* Length of sort field */ - uint suffix_length; /* Length suffix (0-4) */ + /* + If using mem-comparable fixed-size keys: + length of the mem-comparable image of the field, in bytes. + + If using packed keys: still the same? Not clear what is the use of it. + */ + uint length; + + /* + For most datatypes, this is 0. + The exception are the VARBINARY columns. + For those columns, the comparison actually compares + + (value_prefix(N), suffix=length(value)) + + Here value_prefix is either the whole value or its prefix if it was too + long, and the suffix is the length of the original value. + (this way, for values X and Y: if X=prefix(Y) then X compares as less + than Y + */ + uint suffix_length; + + /* + If using packed keys, number of bytes that are used to store the length + of the packed key. + + */ + uint length_bytes; + + /* Max. length of the original value, in bytes */ + uint original_length; + enum Type { FIXED_SIZE, VARIABLE_SIZE } type; + /* + TRUE : if the item or field is NULLABLE + FALSE : otherwise + */ + bool maybe_null; + CHARSET_INFO *cs; + uint pack_sort_string(uchar *to, const LEX_CSTRING &str, + CHARSET_INFO *cs) const; + int compare_packed_fixed_size_vals(uchar *a, size_t *a_len, + uchar *b, size_t *b_len); + int compare_packed_varstrings(uchar *a, size_t *a_len, + uchar *b, size_t *b_len); + bool check_if_packing_possible(THD *thd) const; + bool is_variable_sized() { return type == VARIABLE_SIZE; } }; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index afd6ccb3cd3..ccf4a18a91f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -24060,7 +24060,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table, field_length=field_lengths; for (ptr= first_field ; *ptr ; ptr++) { - (*ptr)->make_sort_key(key_pos, *field_length); + (*ptr)->make_sort_key_part(key_pos, *field_length); key_pos+= (*ptr)->maybe_null() + *field_length++; } /* Check if it exists before */ diff --git a/sql/sql_sort.h b/sql/sql_sort.h index 2c01b01b607..3635bc32974 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -20,8 +20,8 @@ #include "my_base.h" /* ha_rows */ #include <my_sys.h> /* qsort2_cmp */ #include "queues.h" +#include "sql_class.h" -struct SORT_FIELD; class Field; struct TABLE; @@ -156,6 +156,7 @@ public: }; typedef Bounds_checked_array<SORT_ADDON_FIELD> Addon_fields_array; +typedef Bounds_checked_array<SORT_FIELD> Sort_keys_array; /** This class wraps information about usage of addon fields. @@ -241,22 +242,263 @@ private: bool m_using_packed_addons; ///< Are we packing the addon fields? }; +/** + This class wraps information about usage of sort keys. + A Sort_keys object is used both during packing of data in the filesort + buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'. + + @see SORT_FIELD struct. +*/ + +class Sort_keys :public Sql_alloc, + public Sort_keys_array +{ +public: + Sort_keys(SORT_FIELD* arr, size_t count): + Sort_keys_array(arr, count), + m_using_packed_sortkeys(false), + size_of_packable_fields(0), + sort_length(0) + { + DBUG_ASSERT(!is_null()); + } + + bool using_packed_sortkeys() const + { return m_using_packed_sortkeys; } + + void set_using_packed_sortkeys(bool val) + { + m_using_packed_sortkeys= val; + } + void set_size_of_packable_fields(uint len) + { + size_of_packable_fields= len; + } + + uint get_size_of_packable_fields() + { + return size_of_packable_fields; + } + + void set_sort_length(uint len) + { + sort_length= len; + } + + uint get_sort_length() + { + return sort_length; + } + + static void store_sortkey_length(uchar *p, uint sz) + { + int4store(p, sz - size_of_length_field); + } + + static uint read_sortkey_length(uchar *p) + { + return size_of_length_field + uint4korr(p); + } + + void increment_size_of_packable_fields(uint len) + { + size_of_packable_fields+= len; + } + + void increment_original_sort_length(uint len) + { + sort_length+= len; + } + + static const uint size_of_length_field= 4; + +private: + bool m_using_packed_sortkeys; // Are we packing sort keys + uint size_of_packable_fields; // Total length bytes for packable columns + + /* + The length that would be needed if we stored non-packed mem-comparable + images of fields? + */ + uint sort_length; +}; + + +/** +PACKED SORT KEYS + +Description + +In this optimization where we would like the pack the values of the sort key +inside the sort buffer for each record. + +Contents: +1. Background +1.1 Implementation details +2. Solution : Packed Sort Keys +2.1 Packed key format +2.2 Which format to use +3. Special cases +3.1 Handling very long strings +3.2 Handling for long binary strings +3.3 Handling very long strings with Packed sort keys +4. Sort key columns in addon_fields + +1. Background +Before this optimization of using packed sort keys, filesort() sorted the +data using mem-comparable keys. + +That is, if we wanted to sort by + + ORDER BY col1, col2, ... colN +then the filesort code would for each row generate one "Sort Key" +and then sort the rows by their Sort Keys. + +The Sort Keys are mem-comparable (that is, are compared by memcmp()) and +they are of FIXED SIZE. The sort key has the same length regardless of +what value it represents. This causes INEFFICIENT MEMORY USAGE. + +1.1 Implementation details + +make_sortkey() is the function that produces a sort key +from a record. + +The function treats Field and Item objects differently. + +class Field has: + +a) void make_sort_key(uchar *buff, uint length); + make_sort_key is a non-virtual function which handles encoding of + SQL null values. + +b) virtual void sort_string(uchar *buff,uint length)=0; + sort_string produces mem-comparable image of the field value + for each datatype. + +For Items, Type_handler has a virtual function: + + virtual void make_sort_key(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const= 0; + which various datatypes overload. + + +2. SOLUTION: PACKED SORT KEYS + +Note that one can have mem-comparable keys are that are not fixed-size. +MyRocks uses such encoding for example. + +However for this optimization it was decided to store the original +(non-mem-comparable) values instead and use a datatype-aware +key comparison function. + +2.1 Packed key format +The keys are stored in a new variable-size data format called "packed". + +The format is as follows: + + <sort_key_length><packed_value_1><packed_value2> ....... <packed_valueN> + + format for a n-part sort key + +<sort_key_length> is the length of the whole key. +Each packed value is encoded as follows: + + <null_byte=0> // This is a an SQL NULL + [<null_byte=1>] <packed_value> // this a non-NULL value +null_byte is present if the field/item is NULLable. +SQL NULL is encoded as just one NULL-indicator byte. The value itself is omitted. + +The format of the packed_value depends on the datatype. +For "non-packable" datatypes it is just their mem-comparable form, as before. + +The "packable" datatypes are currently variable-length strings and the +packed format for them is (for binary blobs, see a note below): + +<length> <string> +2.2 Which format to use + +The advantage of Packed Key Format is potential space savings for +variable-length fields. + +The disadvantages are: + +a) It may actually take more space, because of sort_key_length and + length fields. +b) The comparison function is more expensive. + +Currently the logic is: use Packed Key Format if we would save 128 or more +bytes when constructing a sort key from values that have empty string +for each packable component. + +3. SPECIAL CASES +3.1 HANDLING VERY LONG STRINGS +the size of sort key part was limited by @@max_sort_length variable. +It is defined as: + +The number of bytes to use when sorting data values. The server uses only the +first max_sort_length bytes of each value and ignores the rest. + +3.2 HANDLING VERY LONG BINARY STRINGS +Long binary strings receive special treatment. A sort key for the long +binary string is truncated at max_sort_length bytes like described above, +but then a "suffix" is appended which contains the total length of the +value before the truncation. + +3.3 HANDLING VERY LONG STRINGS WITH PACKED SORT KEY +Truncating multi-byte string at N bytes is not safe because one can cut in the +middle of a character. One is tempted to solve this by discarding the partial +character but that's also not a good idea as in some collations multiple +characters may produce one weight (this is called "contraction"). + +This combination of circumstances: + +The string value is very long, so truncation is necessary +The collation is "complex", so truncation is dangerous +is deemed to be relatively rare so it was decided to just use +the non-packed sort keys in this case. + +4. SORT KEY COLUMNS IN ADDON FIELDS +Currently, each sort key column is actually stored twice +1. as part of the sort key +2. in the addon_fields +This made total sense when sort key stored the mem-comparable image +(from which one cannot restore the original value in general case). +But since we now store the original value, we could also remove it from the +addon_fields and further save space. This is still a limitation and needs +to be fixed later + +@see Sort_keys + +**/ /** - There are two record formats for sorting: - |<key a><key b>...|<rowid>| - / sort_length / ref_l / + The sort record format may use one of two formats for the non-sorted part of + the record: + + 1. Use the rowid + + |<sort_key>| <rowid> | + / / ref_length / - or with "addon fields" - |<key a><key b>...|<null bits>|<field a><field b>...| - / sort_length / addon_length / + 2. Use "addon fields" + + |<sort_key>|<null bits>|<field a><field b>...| + / / addon_length / The packed format for "addon fields" - |<key a><key b>...|<length>|<null bits>|<field a><field b>...| - / sort_length / addon_length / + + |<sort_key>|<length>|<null bits>|<field a><field b>...| + / / addon_length / + + <sort_key> The key may use one of the two formats: + A. fixed-size mem-comparable form. The record is always + sort_length bytes long. + B. "PackedKeyFormat" - the records are variable-size. <key> Fields are fixed-size, specially encoded with Field::make_sort_key() so we can do byte-by-byte compare. + <length> Contains the *actual* packed length (after packing) of everything after the sort keys. The size of the length field is 2 bytes, @@ -289,6 +531,7 @@ public: */ Bounds_checked_array<SORT_FIELD> local_sortorder; Addon_fields *addon_fields; // Descriptors for companion fields. + Sort_keys *sort_keys; bool using_pq; uchar *unique_buff; @@ -316,12 +559,59 @@ public: return m_using_packed_addons; } + bool using_packed_sortkeys() const + { + DBUG_ASSERT(m_using_packed_sortkeys == + (sort_keys != NULL && sort_keys->using_packed_sortkeys())); + return m_using_packed_sortkeys; + } + /// Are we using "addon fields"? bool using_addon_fields() const { return addon_fields != NULL; } + uint32 get_result_length(uchar *plen) + { + if (!m_using_packed_addons) + return res_length; + return Addon_fields::read_addon_length(plen); + } + + uint32 get_addon_length(uchar *plen) + { + if (using_packed_addons()) + return Addon_fields::read_addon_length(plen); + else + return addon_length; + } + + uint32 get_sort_length(uchar *plen) + { + if (using_packed_sortkeys()) + return Sort_keys::read_sortkey_length(plen) + + /* + when addon fields are not present, then the sort_length also + includes the res_length. For packed keys here we add + the res_length + */ + (using_addon_fields() ? 0: res_length); + else + return sort_length; + } + + uint get_record_length(uchar *plen) + { + if (m_packed_format) + { + uint sort_len= get_sort_length(plen); + return sort_len + get_addon_length(plen + sort_len); + } + else + return rec_length; + } + /** Getter for record length and result length. @param record_start Pointer to record. @@ -330,22 +620,46 @@ public: */ void get_rec_and_res_len(uchar *record_start, uint *recl, uint *resl) { - if (!using_packed_addons()) + if (m_packed_format) + { + uint sort_len= get_sort_length(record_start); + uint addon_len= get_addon_length(record_start + sort_len); + *recl= sort_len + addon_len; + *resl= using_addon_fields() ? addon_len : res_length; + } + else { *recl= rec_length; *resl= res_length; - return; } - uchar *plen= record_start + sort_length; - *resl= Addon_fields::read_addon_length(plen); - DBUG_ASSERT(*resl <= res_length); - const uchar *record_end= plen + *resl; - *recl= static_cast<uint>(record_end - record_start); + } + + void try_to_pack_sortkeys(); + + qsort2_cmp get_compare_function() const + { + return using_packed_sortkeys() ? + get_packed_keys_compare_ptr() : + get_ptr_compare(sort_length); + } + void* get_compare_argument(size_t *sort_len) const + { + return using_packed_sortkeys() ? + (void*) this : + (void*) sort_len; + } + + bool is_packed_format() const + { + return m_packed_format; } private: uint m_packable_length; bool m_using_packed_addons; ///< caches the value of using_packed_addons() + /* caches the value of using_packed_sortkeys() */ + bool m_using_packed_sortkeys; + bool m_packed_format; }; typedef Bounds_checked_array<uchar> Sort_buffer; @@ -353,7 +667,7 @@ typedef Bounds_checked_array<uchar> Sort_buffer; int merge_many_buff(Sort_param *param, Sort_buffer sort_buffer, Merge_chunk *buffpek, uint *maxbuffer, IO_CACHE *t_file); ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, - Sort_param *param); + Sort_param *param, bool packing_format); bool merge_buffers(Sort_param *param,IO_CACHE *from_file, IO_CACHE *to_file, Sort_buffer sort_buffer, Merge_chunk *lastbuff, Merge_chunk *Fb, diff --git a/sql/sql_type.h b/sql/sql_type.h index ce87c8e9d93..ee56e7146da 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -86,6 +86,7 @@ class handler; struct Schema_specification_st; struct TABLE; struct SORT_FIELD_ATTR; +struct SORT_FIELD; class Vers_history_point; class Virtual_column_info; class Conv_source; @@ -3332,6 +3333,14 @@ protected: bool maybe_null, bool null_value, bool unsigned_flag, longlong value) const; + void store_sort_key_longlong(uchar *to, bool unsigned_flag, + longlong value) const; + + uint make_packed_sort_key_longlong(uchar *to, bool maybe_null, + bool null_value, bool unsigned_flag, + longlong value, + const SORT_FIELD_ATTR *sort_field) const; + bool Item_func_or_sum_illegal_param(const char *name) const; bool Item_func_or_sum_illegal_param(const Item_func_or_sum *) const; bool check_null(const Item *item, st_value *value) const; @@ -3752,12 +3761,25 @@ public: const uchar *buffer, LEX_CUSTRING *gis_options) const; - virtual void make_sort_key(uchar *to, Item *item, - const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const= 0; - virtual void sortlength(THD *thd, + /* + Create a fixed size key part for a sort key + */ + virtual void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const= 0; + + /* + create a compact size key part for a sort key + */ + virtual uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const=0; + + virtual void sort_length(THD *thd, const Type_std_attributes *item, SORT_FIELD_ATTR *attr) const= 0; + virtual bool is_packable() const { return false; } + virtual uint32 max_display_length(const Item *item) const= 0; virtual uint32 Item_decimal_notation_int_digits(const Item *item) const { return 0; } @@ -4149,14 +4171,21 @@ public: const Bit_addr &bit, const Column_definition_attributes *attr, uint32 flags) const override; - void make_sort_key(uchar *to, Item *item, - const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override + { + DBUG_ASSERT(0); + } + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override { DBUG_ASSERT(0); + return 0; } - void sortlength(THD *thd, const Type_std_attributes *item, - SORT_FIELD_ATTR *attr) const override + void sort_length(THD *thd, const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override { DBUG_ASSERT(0); } @@ -4481,11 +4510,15 @@ public: bool subquery_type_allows_materialization(const Item *inner, const Item *outer) const override; - void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; - void sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *attr) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; bool Item_const_eq(const Item_const *a, const Item_const *b, bool binary_cmp) const override; bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, @@ -4588,8 +4621,12 @@ public: bool show_field) const override; Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const override; - void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; void Column_definition_attributes_frm_pack(const Column_definition_attributes *at, uchar *buff) const override; @@ -4599,9 +4636,9 @@ public: const uchar *buffer, LEX_CUSTRING *gis_options) const override; - void sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *attr) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; uint32 max_display_length(const Item *item) const override; uint32 Item_decimal_notation_int_digits(const Item *item) const override; Item *create_typecast_item(THD *thd, Item *item, @@ -4838,14 +4875,18 @@ public: const Record_addr &addr, const Type_all_attributes &attr, TABLE_SHARE *share) const override; - void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; void Column_definition_attributes_frm_pack(const Column_definition_attributes *at, uchar *buff) const override; - void sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *attr) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; bool Item_const_eq(const Item_const *a, const Item_const *b, bool binary_cmp) const override; bool Item_eq_value(THD *thd, const Type_cmp_attributes *attr, @@ -4945,11 +4986,15 @@ public: void Column_definition_attributes_frm_pack(const Column_definition_attributes *at, uchar *buff) const override; - void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; - void sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *attr) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; bool Item_const_eq(const Item_const *a, const Item_const *b, bool binary_cmp) const override; bool Item_param_set_from_value(THD *thd, @@ -5034,11 +5079,16 @@ public: const Type_handler * type_handler_adjusted_to_max_octet_length(uint max_octet_length, CHARSET_INFO *cs) const override; - void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; - void sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *attr) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; + bool is_packable()const override { return true; } bool union_element_finalize(const Item * item) const override; uint calc_key_length(const Column_definition &def) const override; bool Column_definition_prepare_stage1(THD *thd, @@ -6230,11 +6280,15 @@ public: cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const override; in_vector *make_in_vector(THD *thd, const Item_func_in *f, uint nargs) const override; - void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, - Sort_param *param) const override; - void sortlength(THD *thd, - const Type_std_attributes *item, - SORT_FIELD_ATTR *attr) const override; + void make_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + uint make_packed_sort_key_part(uchar *to, Item *item, + const SORT_FIELD_ATTR *sort_field, + Sort_param *param) const override; + void sort_length(THD *thd, + const Type_std_attributes *item, + SORT_FIELD_ATTR *attr) const override; bool Column_definition_fix_attributes(Column_definition *c) const override; uint Item_decimal_scale(const Item *item) const override { diff --git a/sql/uniques.cc b/sql/uniques.cc index 931aa868199..db9891158b0 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -531,7 +531,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, top->set_buffer(merge_buffer + (top - begin) * piece_size, merge_buffer + (top - begin) * piece_size + piece_size); top->set_max_keys(max_key_count_per_piece); - bytes_read= read_to_buffer(file, top, &sort_param); + bytes_read= read_to_buffer(file, top, &sort_param, false); if (unlikely(bytes_read == (ulong) -1)) goto end; DBUG_ASSERT(bytes_read); @@ -561,7 +561,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, /* save old_key not to overwrite it in read_to_buffer */ memcpy(save_key_buff, old_key, key_length); old_key= save_key_buff; - bytes_read= read_to_buffer(file, top, &sort_param); + bytes_read= read_to_buffer(file, top, &sort_param, false); if (unlikely(bytes_read == (ulong) -1)) goto end; else if (bytes_read) /* top->key, top->mem_count are reset */ @@ -609,7 +609,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, top->advance_current_key(key_length); } while (top->decrement_mem_count()); - bytes_read= read_to_buffer(file, top, &sort_param); + bytes_read= read_to_buffer(file, top, &sort_param, false); if (unlikely(bytes_read == (ulong) -1)) goto end; } |