summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Golubchik <serg@mariadb.org>2016-09-28 17:55:28 +0200
committerSergei Golubchik <serg@mariadb.org>2016-09-28 17:55:28 +0200
commit66d9696596edbc20ad36bf3d5bffb5595e8235c3 (patch)
treebbef37c9a90b63d25bee59386cac04298a13846f
parent66a58f46e937cdc3d7e0529b52ad8b658d9b2cd4 (diff)
parent23af6f5942e7235a7c14a36cb8dd0d2796b5ef37 (diff)
downloadmariadb-git-66d9696596edbc20ad36bf3d5bffb5595e8235c3.tar.gz
Merge branch '10.0' into 10.1
-rw-r--r--CREDITS1
-rw-r--r--cmake/cpack_rpm.cmake3
-rw-r--r--include/my_global.h3
-rw-r--r--include/my_sys.h4
-rw-r--r--mysql-test/extra/binlog_tests/database.test2
-rw-r--r--mysql-test/include/index_merge2.inc1
-rw-r--r--mysql-test/lib/My/CoreDump.pm6
-rw-r--r--mysql-test/lib/mtr_cases.pm4
-rw-r--r--mysql-test/lib/mtr_report.pm3
-rwxr-xr-xmysql-test/mysql-test-run.pl10
-rw-r--r--mysql-test/r/contributors.result1
-rw-r--r--mysql-test/r/ctype_utf32.result5
-rw-r--r--mysql-test/r/group_min_max_innodb.result16
-rw-r--r--mysql-test/r/index_merge_innodb.result3
-rw-r--r--mysql-test/r/index_merge_myisam.result3
-rw-r--r--mysql-test/r/merge.result17
-rw-r--r--mysql-test/r/ps.result33
-rw-r--r--mysql-test/r/type_uint.result19
-rw-r--r--mysql-test/suite/innodb/r/innodb_bug54044.result3
-rw-r--r--mysql-test/suite/innodb/r/system_tables.result8
-rw-r--r--mysql-test/suite/innodb/t/innodb_bug54044.test6
-rw-r--r--mysql-test/suite/innodb/t/system_tables.test12
-rw-r--r--mysql-test/suite/perfschema/r/aggregate.result121
-rw-r--r--mysql-test/suite/perfschema/t/aggregate.test197
-rw-r--r--mysql-test/suite/plugins/r/server_audit.result3
-rw-r--r--mysql-test/suite/plugins/r/thread_pool_server_audit.result3
-rw-r--r--mysql-test/suite/rpl/t/rpl_drop_db.test4
-rw-r--r--mysql-test/t/ctype_utf32.test5
-rw-r--r--mysql-test/t/group_min_max_innodb.test13
-rw-r--r--mysql-test/t/merge.test13
-rw-r--r--mysql-test/t/ps.test29
-rw-r--r--mysql-test/t/type_uint.test7
-rw-r--r--mysys/my_redel.c7
-rw-r--r--mysys/my_static.c1
-rw-r--r--plugin/server_audit/server_audit.c5
-rw-r--r--sql/contributors.h1
-rw-r--r--sql/field.cc2
-rw-r--r--sql/item.cc23
-rw-r--r--sql/log.cc2
-rw-r--r--sql/mysqld.cc1
-rw-r--r--sql/net_serv.cc4
-rw-r--r--sql/parse_file.h6
-rw-r--r--sql/signal_handler.cc4
-rw-r--r--sql/sql_admin.cc14
-rw-r--r--sql/sql_base.cc1
-rw-r--r--sql/sql_class.cc2
-rw-r--r--sql/sql_class.h8
-rw-r--r--sql/sql_plugin.cc148
-rw-r--r--sql/sql_select.cc30
-rw-r--r--sql/table_cache.cc2
-rw-r--r--storage/innobase/dict/dict0stats.cc2
-rw-r--r--storage/innobase/fts/fts0fts.cc31
-rw-r--r--storage/innobase/handler/ha_innodb.cc9
-rw-r--r--storage/innobase/handler/handler0alter.cc9
-rw-r--r--storage/innobase/handler/i_s.cc2
-rw-r--r--storage/innobase/include/fts0fts.h10
-rw-r--r--storage/innobase/include/univ.i2
-rw-r--r--storage/innobase/row/row0log.cc14
-rw-r--r--storage/innobase/row/row0mysql.cc34
-rw-r--r--storage/perfschema/ha_perfschema.cc2
-rw-r--r--storage/tokudb/CMakeLists.txt2
-rw-r--r--storage/tokudb/PerconaFT/buildheader/make_tdb.cc4
-rw-r--r--storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake1
-rw-r--r--storage/tokudb/PerconaFT/ft/CMakeLists.txt2
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-flusher.cc4
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.cc215
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-recount-rows.cc29
-rw-r--r--storage/tokudb/PerconaFT/ft/ft.cc3
-rw-r--r--storage/tokudb/PerconaFT/ft/loader/loader-internal.h2
-rw-r--r--storage/tokudb/PerconaFT/ft/loader/loader.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/node.cc125
-rw-r--r--storage/tokudb/PerconaFT/ft/node.h1
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc473
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_allocator.h162
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc224
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_table.cc632
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_table.h143
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/compress.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc350
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc69
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc833
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h351
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc126
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc380
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc403
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc281
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc231
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc831
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-test.cc11
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc4
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc6
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc (renamed from storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h)86
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc102
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/roll.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc23
-rw-r--r--storage/tokudb/PerconaFT/ft/ule.cc4
-rw-r--r--storage/tokudb/PerconaFT/portability/CMakeLists.txt3
-rw-r--r--storage/tokudb/PerconaFT/portability/huge_page_detection.cc6
-rw-r--r--storage/tokudb/PerconaFT/portability/tests/test-max-data.cc2
-rw-r--r--storage/tokudb/PerconaFT/portability/toku_config.h.in1
-rw-r--r--storage/tokudb/PerconaFT/portability/toku_time.h8
-rw-r--r--storage/tokudb/PerconaFT/src/indexer-internal.h2
-rw-r--r--storage/tokudb/PerconaFT/src/indexer-undo-do.cc4
-rw-r--r--storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc4
-rw-r--r--storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/test_stress0.cc2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc3
-rw-r--r--storage/tokudb/PerconaFT/src/ydb-internal.h2
-rw-r--r--storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess449
-rw-r--r--storage/tokudb/PerconaFT/tools/CMakeLists.txt3
-rw-r--r--storage/tokudb/PerconaFT/tools/ba_replay.cc629
-rw-r--r--storage/tokudb/PerconaFT/tools/ftverify.cc2
-rw-r--r--storage/tokudb/PerconaFT/tools/tokuftdump.cc1
-rw-r--r--storage/tokudb/PerconaFT/util/tests/x1764-test.cc2
-rw-r--r--storage/tokudb/ha_tokudb.cc26
-rw-r--r--storage/tokudb/ha_tokudb_admin.cc278
-rw-r--r--storage/tokudb/hatoku_defines.h7
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result51
-rw-r--r--storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test3
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/background_job_manager.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test26
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test26
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test26
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test41
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test41
-rw-r--r--storage/tokudb/tokudb_background.cc27
-rw-r--r--storage/tokudb/tokudb_background.h49
-rw-r--r--storage/tokudb/tokudb_information_schema.cc47
-rw-r--r--storage/xtradb/btr/btr0btr.cc15
-rw-r--r--storage/xtradb/buf/buf0flu.cc2
-rw-r--r--storage/xtradb/dict/dict0stats.cc2
-rw-r--r--storage/xtradb/fil/fil0fil.cc6
-rw-r--r--storage/xtradb/fts/fts0fts.cc67
-rw-r--r--storage/xtradb/fts/fts0opt.cc2
-rw-r--r--storage/xtradb/handler/ha_innodb.cc120
-rw-r--r--storage/xtradb/handler/i_s.cc39
-rw-r--r--storage/xtradb/include/buf0buf.h14
-rw-r--r--storage/xtradb/include/buf0buf.ic14
-rw-r--r--storage/xtradb/include/fts0fts.h4
-rw-r--r--storage/xtradb/include/srv0srv.h10
-rw-r--r--storage/xtradb/include/univ.i4
-rw-r--r--storage/xtradb/log/log0log.cc4
-rw-r--r--storage/xtradb/log/log0online.cc12
-rw-r--r--storage/xtradb/log/log0recv.cc17
-rw-r--r--storage/xtradb/row/row0merge.cc2
-rw-r--r--storage/xtradb/srv/srv0mon.cc7
-rw-r--r--storage/xtradb/srv/srv0srv.cc15
-rw-r--r--strings/ctype-ucs2.c2
-rw-r--r--strings/ctype-utf8.c2
-rw-r--r--support-files/mysql.server.sh2
-rw-r--r--tests/async_queries.c2
-rw-r--r--win/packaging/CMakeLists.txt11
-rw-r--r--win/packaging/create_msi.cmake.in1
160 files changed, 4948 insertions, 4270 deletions
diff --git a/CREDITS b/CREDITS
index f0e6de7f08f..35ab4d48a8f 100644
--- a/CREDITS
+++ b/CREDITS
@@ -10,6 +10,7 @@ Visma http://visma.com (2015 - 2016)
Acronis http://acronis.com (2016)
Nexedi https://www.nexedi.com (2016)
Automattic https://automattic.com (2014 - 2016)
+Tencent Game DBA http://tencentdba.com/about (2016)
Verkkokauppa.com https://www.verkkokauppa.com (2015 - 2016)
Virtuozzo https://virtuozzo.com (2016)
diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake
index 0e0a121dbb8..703e7424159 100644
--- a/cmake/cpack_rpm.cmake
+++ b/cmake/cpack_rpm.cmake
@@ -230,6 +230,9 @@ SETA(CPACK_RPM_test_PACKAGE_PROVIDES
"perl(mtr_io.pl)"
"perl(mtr_match)"
"perl(mtr_misc.pl)"
+ "perl(mtr_gcov.pl)"
+ "perl(mtr_gprof.pl)"
+ "perl(mtr_process.pl)"
"perl(mtr_report)"
"perl(mtr_results)"
"perl(mtr_unique)")
diff --git a/include/my_global.h b/include/my_global.h
index f5af8083cdc..bca03bfc4d6 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -880,8 +880,7 @@ typedef long long my_ptrdiff_t;
and related routines are refactored.
*/
-#define my_offsetof(TYPE, MEMBER) \
- ((size_t)((char *)&(((TYPE *)0x10)->MEMBER) - (char*)0x10))
+#define my_offsetof(TYPE, MEMBER) PTR_BYTE_DIFF(&((TYPE *)0x10)->MEMBER, 0x10)
#define NullS (char *) 0
diff --git a/include/my_sys.h b/include/my_sys.h
index 36530eb94e9..a89480d3fcc 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -1,5 +1,5 @@
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
- Copyright (c) 2010, 2013, Monty Program Ab.
+ Copyright (c) 2010, 2016, Monty Program Ab.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -279,7 +279,7 @@ extern my_bool my_use_symdir;
extern ulong my_default_record_cache_size;
extern my_bool my_disable_locking, my_disable_async_io,
my_disable_flush_key_blocks, my_disable_symlinks;
-extern my_bool my_disable_sync;
+extern my_bool my_disable_sync, my_disable_copystat_in_redel;
extern char wild_many,wild_one,wild_prefix;
extern const char *charsets_dir;
extern my_bool timed_mutexes;
diff --git a/mysql-test/extra/binlog_tests/database.test b/mysql-test/extra/binlog_tests/database.test
index 6b3da087f01..2e093aacb0d 100644
--- a/mysql-test/extra/binlog_tests/database.test
+++ b/mysql-test/extra/binlog_tests/database.test
@@ -52,7 +52,7 @@ eval SELECT 'hello' INTO OUTFILE 'fake_file.$prefix';
# Use '/' instead of '\' in the error message. On windows platform, dir is
# formed with '\'.
---replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /File exists/Directory not empty/
+--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /247/39/ /File exists/Directory not empty/
--error 1010
DROP DATABASE testing_1;
let $wait_binlog_event= DROP TABLE IF EXIST;
diff --git a/mysql-test/include/index_merge2.inc b/mysql-test/include/index_merge2.inc
index c50a45a9923..03afa49d323 100644
--- a/mysql-test/include/index_merge2.inc
+++ b/mysql-test/include/index_merge2.inc
@@ -341,6 +341,7 @@ while ($1)
alter table t1 add index i2(key2);
alter table t1 add index i3(key3);
update t1 set key2=key1,key3=key1;
+analyze table t1;
# to test the bug, the following must use "sort_union":
--replace_column 9 REF
diff --git a/mysql-test/lib/My/CoreDump.pm b/mysql-test/lib/My/CoreDump.pm
index 0e90967ef95..f9f7b3d8d4b 100644
--- a/mysql-test/lib/My/CoreDump.pm
+++ b/mysql-test/lib/My/CoreDump.pm
@@ -261,11 +261,7 @@ sub show {
# On Windows, rely on cdb to be there...
if (IS_WINDOWS)
{
- # Starting cdb is unsafe when used with --parallel > 1 option
- if ( $parallel < 2 )
- {
- _cdb($core_name);
- }
+ _cdb($core_name);
return;
}
diff --git a/mysql-test/lib/mtr_cases.pm b/mysql-test/lib/mtr_cases.pm
index 2be903abf42..d758b81c1c7 100644
--- a/mysql-test/lib/mtr_cases.pm
+++ b/mysql-test/lib/mtr_cases.pm
@@ -58,8 +58,6 @@ use My::Test;
use My::Find;
use My::Suite;
-require "mtr_misc.pl";
-
# locate plugin suites, depending on whether it's a build tree or installed
my @plugin_suitedirs;
my $plugin_suitedir_regex;
@@ -1096,7 +1094,7 @@ sub get_tags_from_file($$) {
$file_to_tags{$file}= $tags;
$file_to_master_opts{$file}= $master_opts;
$file_to_slave_opts{$file}= $slave_opts;
- $file_combinations{$file}= [ uniq(@combinations) ];
+ $file_combinations{$file}= [ ::uniq(@combinations) ];
$file_in_overlay{$file} = 1 if $in_overlay;
return @{$tags};
}
diff --git a/mysql-test/lib/mtr_report.pm b/mysql-test/lib/mtr_report.pm
index 9ab82c454ed..97ace54f0fb 100644
--- a/mysql-test/lib/mtr_report.pm
+++ b/mysql-test/lib/mtr_report.pm
@@ -34,7 +34,6 @@ use mtr_match;
use My::Platform;
use POSIX qw[ _exit ];
use IO::Handle qw[ flush ];
-require "mtr_io.pl";
use mtr_results;
my $tot_real_time= 0;
@@ -92,7 +91,7 @@ sub mtr_report_test_passed ($) {
my $timer_str= "";
if ( $timer and -f "$::opt_vardir/log/timer" )
{
- $timer_str= mtr_fromfile("$::opt_vardir/log/timer");
+ $timer_str= ::mtr_fromfile("$::opt_vardir/log/timer");
$tinfo->{timer}= $timer_str;
resfile_test_info('duration', $timer_str) if $::opt_resfile;
}
diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl
index c6a71b91f69..2bd89f5ae49 100755
--- a/mysql-test/mysql-test-run.pl
+++ b/mysql-test/mysql-test-run.pl
@@ -102,11 +102,11 @@ use mtr_results;
use IO::Socket::INET;
use IO::Select;
-require "lib/mtr_process.pl";
-require "lib/mtr_io.pl";
-require "lib/mtr_gcov.pl";
-require "lib/mtr_gprof.pl";
-require "lib/mtr_misc.pl";
+require "mtr_process.pl";
+require "mtr_io.pl";
+require "mtr_gcov.pl";
+require "mtr_gprof.pl";
+require "mtr_misc.pl";
$SIG{INT}= sub { mtr_error("Got ^C signal"); };
$SIG{HUP}= sub { mtr_error("Hangup detected on controlling terminal"); };
diff --git a/mysql-test/r/contributors.result b/mysql-test/r/contributors.result
index 918ceaa496f..f3f5e227d3a 100644
--- a/mysql-test/r/contributors.result
+++ b/mysql-test/r/contributors.result
@@ -9,6 +9,7 @@ Acronis http://www.acronis.com Silver Sponsor of the MariaDB Foundation
Auttomattic https://automattic.com Bronze Sponsor of the MariaDB Foundation
Verkkokauppa.com https://virtuozzo.com Bronze Sponsor of the MariaDB Foundation
Virtuozzo https://virtuozzo.com/ Bronze Sponsor of the MariaDB Foundation
+Tencent Game DBA http://tencentdba.com/about/ Bronze Sponsor of the MariaDB Foundation
Google USA Sponsoring encryption, parallel replication and GTID
Facebook USA Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc
Ronald Bradford Brisbane, Australia EFF contribution for UC2006 Auction
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result
index a0a8072265c..5797a030e73 100644
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@@ -1662,6 +1662,11 @@ CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061))
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061))
1
+select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
+c
+0000003F0000003F0000003F0000003F
+Warnings:
+Warning 1300 Invalid utf32 character string: '\xFF\xFF\x00\x00'
#
# End of 5.5 tests
#
diff --git a/mysql-test/r/group_min_max_innodb.result b/mysql-test/r/group_min_max_innodb.result
index 77c74fbc041..2803107b97e 100644
--- a/mysql-test/r/group_min_max_innodb.result
+++ b/mysql-test/r/group_min_max_innodb.result
@@ -286,3 +286,19 @@ F 28 28
F 29 29
F 30 30
DROP TABLE t0,t1,t2;
+#
+# MDEV-MariaDB daemon leaks memory with specific query
+#
+CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
+`language_id` int(11) unsigned NOT NULL DEFAULT '1'
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
+`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
+SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
+translation_resources serialized_c
+NULL cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+NULL bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+drop table t1,t2;
diff --git a/mysql-test/r/index_merge_innodb.result b/mysql-test/r/index_merge_innodb.result
index 5202c79f3c7..5bf56e213ab 100644
--- a/mysql-test/r/index_merge_innodb.result
+++ b/mysql-test/r/index_merge_innodb.result
@@ -311,6 +311,9 @@ set @d=@d*2;
alter table t1 add index i2(key2);
alter table t1 add index i3(key3);
update t1 set key2=key1,key3=key1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
diff --git a/mysql-test/r/index_merge_myisam.result b/mysql-test/r/index_merge_myisam.result
index a857e2a21ff..b3f78c12af9 100644
--- a/mysql-test/r/index_merge_myisam.result
+++ b/mysql-test/r/index_merge_myisam.result
@@ -1146,6 +1146,9 @@ set @d=@d*2;
alter table t1 add index i2(key2);
alter table t1 add index i3(key3);
update t1 set key2=key1,key3=key1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
diff --git a/mysql-test/r/merge.result b/mysql-test/r/merge.result
index 804313af701..36e196497e5 100644
--- a/mysql-test/r/merge.result
+++ b/mysql-test/r/merge.result
@@ -3835,6 +3835,23 @@ test.m1 repair error Corrupt
# Clean-up.
drop tables m1, t1, t4;
drop view t3;
+#
+# MDEV-10424 - Assertion `ticket == __null' failed in
+# MDL_request::set_type
+#
+CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
+CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
+PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
+EXECUTE stmt;
+Table Op Msg_type Msg_text
+test.tmerge analyze note The storage engine for the table doesn't support analyze
+test.t1 analyze status Table is already up to date
+EXECUTE stmt;
+Table Op Msg_type Msg_text
+test.tmerge analyze note The storage engine for the table doesn't support analyze
+test.t1 analyze status Table is already up to date
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1, tmerge;
End of 5.5 tests
#
# Additional coverage for refactoring which is made as part
diff --git a/mysql-test/r/ps.result b/mysql-test/r/ps.result
index ca6a5cf876a..6c21f4225a0 100644
--- a/mysql-test/r/ps.result
+++ b/mysql-test/r/ps.result
@@ -4077,4 +4077,35 @@ id value
deallocate prepare stmt;
SET SESSION sql_mode = @save_sql_mode;
DROP TABLE t1,t2;
-# End of 10.0 tests
+#
+# MDEV-8833: Crash of server on prepared statement with
+# conversion to semi-join
+#
+CREATE TABLE t1 (column1 INT);
+INSERT INTO t1 VALUES (3),(9);
+CREATE TABLE t2 (column2 INT);
+INSERT INTO t2 VALUES (1),(4);
+CREATE TABLE t3 (column3 INT);
+INSERT INTO t3 VALUES (6),(8);
+CREATE TABLE t4 (column4 INT);
+INSERT INTO t4 VALUES (2),(5);
+PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1
+FROM t1 AS table1
+WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 )
+) AS sq
+FROM t3 AS table3, t4 AS table4";
+EXECUTE stmt;
+sq
+NULL
+NULL
+NULL
+NULL
+EXECUTE stmt;
+sq
+NULL
+NULL
+NULL
+NULL
+deallocate prepare stmt;
+drop table t1,t2,t3,t4;
+# End of 5.5 tests
diff --git a/mysql-test/r/type_uint.result b/mysql-test/r/type_uint.result
index 10aa2f2f393..c970f2ff896 100644
--- a/mysql-test/r/type_uint.result
+++ b/mysql-test/r/type_uint.result
@@ -14,6 +14,25 @@ this
0
4294967295
drop table t1;
+create table t1 (a bigint unsigned, b mediumint unsigned);
+insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
+select coalesce(a,b), coalesce(b,a) from t1;
+coalesce(a,b) coalesce(b,a)
+1 2
+18446744073709551615 16777215
+create table t2 as select a from t1 union select b from t1;
+show create table t2;
+Table Create Table
+t2 CREATE TABLE `t2` (
+ `a` bigint(20) unsigned DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select * from t2;
+a
+1
+18446744073709551615
+2
+16777215
+drop table t1, t2;
#
# Start of 10.0 tests
#
diff --git a/mysql-test/suite/innodb/r/innodb_bug54044.result b/mysql-test/suite/innodb/r/innodb_bug54044.result
index d80c451c841..7d6133adb74 100644
--- a/mysql-test/suite/innodb/r/innodb_bug54044.result
+++ b/mysql-test/suite/innodb/r/innodb_bug54044.result
@@ -6,7 +6,8 @@ table_54044 CREATE TEMPORARY TABLE `table_54044` (
`IF(NULL IS NOT NULL, NULL, NULL)` binary(0) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1
DROP TABLE table_54044;
-CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
+CREATE TABLE tmp ENGINE = INNODB
+AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
SHOW CREATE TABLE tmp;
Table Create Table
tmp CREATE TABLE `tmp` (
diff --git a/mysql-test/suite/innodb/r/system_tables.result b/mysql-test/suite/innodb/r/system_tables.result
new file mode 100644
index 00000000000..79a24f7e455
--- /dev/null
+++ b/mysql-test/suite/innodb/r/system_tables.result
@@ -0,0 +1,8 @@
+alter table mysql.time_zone_name engine=InnoDB;
+create table envois3 (starttime datetime) engine=InnoDB;
+insert envois3 values ('2008-08-11 22:43:00');
+select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
+starttime
+2008-08-12 02:43:00
+drop table envois3;
+alter table mysql.time_zone_name engine=MyISAM;
diff --git a/mysql-test/suite/innodb/t/innodb_bug54044.test b/mysql-test/suite/innodb/t/innodb_bug54044.test
index aa19c51018c..61a09375ae1 100644
--- a/mysql-test/suite/innodb/t/innodb_bug54044.test
+++ b/mysql-test/suite/innodb/t/innodb_bug54044.test
@@ -10,7 +10,10 @@ CREATE TEMPORARY TABLE table_54044 ENGINE = INNODB
SHOW CREATE TABLE table_54044;
DROP TABLE table_54044;
-CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
+# This 'create table' should pass since it uses a Field_string of size 0.
+
+CREATE TABLE tmp ENGINE = INNODB
+ AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
SHOW CREATE TABLE tmp;
DROP TABLE tmp;
@@ -23,4 +26,3 @@ FLUSH TABLES;
--error 1005
CREATE TEMPORARY TABLE tmp ENGINE=InnoDB AS SELECT VALUES(a) FROM t1;
DROP TABLE t1;
-
diff --git a/mysql-test/suite/innodb/t/system_tables.test b/mysql-test/suite/innodb/t/system_tables.test
new file mode 100644
index 00000000000..90cb8c59fbd
--- /dev/null
+++ b/mysql-test/suite/innodb/t/system_tables.test
@@ -0,0 +1,12 @@
+--source include/have_innodb.inc
+
+#
+# MDEV-10775 System table in InnoDB format allowed in MariaDB could lead to crash
+#
+alter table mysql.time_zone_name engine=InnoDB;
+create table envois3 (starttime datetime) engine=InnoDB;
+insert envois3 values ('2008-08-11 22:43:00');
+--source include/restart_mysqld.inc
+select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
+drop table envois3;
+alter table mysql.time_zone_name engine=MyISAM;
diff --git a/mysql-test/suite/perfschema/r/aggregate.result b/mysql-test/suite/perfschema/r/aggregate.result
deleted file mode 100644
index c8fa1cc2b24..00000000000
--- a/mysql-test/suite/perfschema/r/aggregate.result
+++ /dev/null
@@ -1,121 +0,0 @@
-"General cleanup"
-set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
-set @@global.aria_checkpoint_interval= 0;
-drop table if exists t1;
-update performance_schema.setup_instruments set enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-truncate table performance_schema.file_summary_by_event_name;
-truncate table performance_schema.file_summary_by_instance;
-truncate table performance_schema.socket_summary_by_event_name;
-truncate table performance_schema.socket_summary_by_instance;
-truncate table performance_schema.events_waits_summary_global_by_event_name;
-truncate table performance_schema.events_waits_summary_by_instance;
-truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-set enabled = 'YES', timed = 'YES';
-create table t1 (
-id INT PRIMARY KEY,
-b CHAR(100) DEFAULT 'initial value')
-ENGINE=MyISAM;
-insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
-update performance_schema.setup_instruments SET enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-set @dump_all=FALSE;
-"Verifying file aggregate consistency"
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-EVENT_NAME COUNT_READ SUM(i.COUNT_READ)
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE)
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-EVENT_NAME COUNT_READ SUM(i.COUNT_READ)
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE)
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
-OR @dump_all;
-EVENT_NAME SUM_NUMBER_OF_BYTES_READ SUM(i.SUM_NUMBER_OF_BYTES_READ)
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
-OR @dump_all;
-EVENT_NAME SUM_NUMBER_OF_BYTES_WRITE SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-"Verifying waits aggregate consistency (instance)"
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME SUM_TIMER_WAIT SUM(i.SUM_TIMER_WAIT)
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
-AND (MIN(i.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-EVENT_NAME MIN_TIMER_WAIT MIN(i.MIN_TIMER_WAIT)
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME MAX_TIMER_WAIT MAX(i.MAX_TIMER_WAIT)
-"Verifying waits aggregate consistency (thread)"
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME SUM_TIMER_WAIT SUM(t.SUM_TIMER_WAIT)
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
-AND (MIN(t.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-EVENT_NAME MIN_TIMER_WAIT MIN(t.MIN_TIMER_WAIT)
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME MAX_TIMER_WAIT MAX(t.MAX_TIMER_WAIT)
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-set enabled = 'YES', timed = 'YES';
-drop table test.t1;
-set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
diff --git a/mysql-test/suite/perfschema/t/aggregate.test b/mysql-test/suite/perfschema/t/aggregate.test
deleted file mode 100644
index fe30a7b8697..00000000000
--- a/mysql-test/suite/perfschema/t/aggregate.test
+++ /dev/null
@@ -1,197 +0,0 @@
-# Tests for PERFORMANCE_SCHEMA
-# Verify that statistics aggregated by different criteria are consistent.
-
---source include/not_embedded.inc
---source include/have_perfschema.inc
-
---echo "General cleanup"
-
-# MDEV-7187 - test fails sporadically in buildbot
-set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
-set @@global.aria_checkpoint_interval= 0;
-
---disable_warnings
-drop table if exists t1;
---enable_warnings
-
-update performance_schema.setup_instruments set enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-
-# Cleanup statistics
-truncate table performance_schema.file_summary_by_event_name;
-truncate table performance_schema.file_summary_by_instance;
-truncate table performance_schema.socket_summary_by_event_name;
-truncate table performance_schema.socket_summary_by_instance;
-truncate table performance_schema.events_waits_summary_global_by_event_name;
-truncate table performance_schema.events_waits_summary_by_instance;
-truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
-
-# Start recording data
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
- set enabled = 'YES', timed = 'YES';
-
-
-create table t1 (
- id INT PRIMARY KEY,
- b CHAR(100) DEFAULT 'initial value')
- ENGINE=MyISAM;
-
-insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
-
-# Stop recording data, so the select below don't add noise.
-update performance_schema.setup_instruments SET enabled = 'NO';
-# Disable all consumers, for long standing waits
-update performance_schema.setup_consumers set enabled = 'NO';
-
-# Helper to debug
-set @dump_all=FALSE;
-
-# Note that in general:
-# - COUNT/SUM/MAX(file_summary_by_event_name) >=
-# COUNT/SUM/MAX(file_summary_by_instance).
-# - MIN(file_summary_by_event_name) <=
-# MIN(file_summary_by_instance).
-# There will be equality only when file instances are not removed,
-# aka when a file is not deleted from the file system,
-# because doing so removes a row in file_summary_by_instance.
-
-# Likewise:
-# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
-# COUNT/SUM/MAX(events_waits_summary_by_instance)
-# - MIN(events_waits_summary_global_by_event_name) <=
-# MIN(events_waits_summary_by_instance)
-# There will be equality only when an instrument instance
-# is not removed, which is next to impossible to predictably guarantee
-# in the server.
-# For example, a MyISAM table removed from the table cache
-# will cause a mysql_mutex_destroy on myisam/MYISAM_SHARE::intern_lock.
-# Another example, a thread terminating will cause a mysql_mutex_destroy
-# on sql/LOCK_delete
-# Both cause a row to be deleted from events_waits_summary_by_instance.
-
-# Likewise:
-# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
-# COUNT/SUM/MAX(events_waits_summary_by_thread_by_event_name)
-# - MIN(events_waits_summary_global_by_event_name) <=
-# MIN(events_waits_summary_by_thread_by_event_name)
-# There will be equality only when no thread is removed,
-# that is if no thread disconnects, or no sub thread (for example insert
-# delayed) ever completes.
-# A thread completing will cause rows in
-# events_waits_summary_by_thread_by_event_name to be removed.
-
---echo "Verifying file aggregate consistency"
-
-# Since the code generating the load in this test does:
-# - create table
-# - insert
-# - does not cause temporary tables to be used
-# we can test for equality here for file aggregates.
-
-# If any of these queries returns data, the test failed.
-
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
-OR @dump_all;
-
---echo "Verifying waits aggregate consistency (instance)"
-
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
-AND (MIN(i.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
-OR @dump_all;
-
---echo "Verifying waits aggregate consistency (thread)"
-
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
-AND (MIN(t.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
-OR @dump_all;
-
-
-# Cleanup
-
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
- set enabled = 'YES', timed = 'YES';
-
-drop table test.t1;
-
-set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
-
diff --git a/mysql-test/suite/plugins/r/server_audit.result b/mysql-test/suite/plugins/r/server_audit.result
index 83b88ed0480..ceb75176b43 100644
--- a/mysql-test/suite/plugins/r/server_audit.result
+++ b/mysql-test/suite/plugins/r/server_audit.result
@@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users
-server_audit_loc_info
server_audit_logging OFF
server_audit_mode 0
server_audit_output_type file
@@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users odin, root, dva, tri
-server_audit_loc_info
server_audit_logging ON
server_audit_mode 0
server_audit_output_type file
@@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users odin, root, dva, tri
-server_audit_loc_info
server_audit_logging ON
server_audit_mode 1
server_audit_output_type file
diff --git a/mysql-test/suite/plugins/r/thread_pool_server_audit.result b/mysql-test/suite/plugins/r/thread_pool_server_audit.result
index 83b88ed0480..ceb75176b43 100644
--- a/mysql-test/suite/plugins/r/thread_pool_server_audit.result
+++ b/mysql-test/suite/plugins/r/thread_pool_server_audit.result
@@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users
-server_audit_loc_info
server_audit_logging OFF
server_audit_mode 0
server_audit_output_type file
@@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users odin, root, dva, tri
-server_audit_loc_info
server_audit_logging ON
server_audit_mode 0
server_audit_output_type file
@@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users odin, root, dva, tri
-server_audit_loc_info
server_audit_logging ON
server_audit_mode 1
server_audit_output_type file
diff --git a/mysql-test/suite/rpl/t/rpl_drop_db.test b/mysql-test/suite/rpl/t/rpl_drop_db.test
index a67850a66dd..dae1651dc93 100644
--- a/mysql-test/suite/rpl/t/rpl_drop_db.test
+++ b/mysql-test/suite/rpl/t/rpl_drop_db.test
@@ -13,7 +13,7 @@ insert into mysqltest1.t1 values (1);
select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt';
create table mysqltest1.t2 (n int);
create table mysqltest1.t3 (n int);
---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
+--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty"
--error 1010
drop database mysqltest1;
use mysqltest1;
@@ -30,7 +30,7 @@ while ($1)
}
--enable_query_log
---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
+--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty"
--error 1010
drop database mysqltest1;
use mysqltest1;
diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test
index 4bff8a867f1..190b9f3fb50 100644
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@@ -889,6 +889,11 @@ SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061));
SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061));
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
+#
+# potential signedness issue
+#
+select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
+
--echo #
--echo # End of 5.5 tests
--echo #
diff --git a/mysql-test/t/group_min_max_innodb.test b/mysql-test/t/group_min_max_innodb.test
index 6967f847147..91e0bd3279f 100644
--- a/mysql-test/t/group_min_max_innodb.test
+++ b/mysql-test/t/group_min_max_innodb.test
@@ -230,3 +230,16 @@ eval EXPLAIN $query;
eval $query;
DROP TABLE t0,t1,t2;
+
+--echo #
+--echo # MDEV-MariaDB daemon leaks memory with specific query
+--echo #
+
+CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
+ `language_id` int(11) unsigned NOT NULL DEFAULT '1'
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
+ `serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
+SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
+drop table t1,t2;
diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test
index 519094d6350..e9d69b446d5 100644
--- a/mysql-test/t/merge.test
+++ b/mysql-test/t/merge.test
@@ -2881,6 +2881,19 @@ drop tables m1, t1, t4;
drop view t3;
+--echo #
+--echo # MDEV-10424 - Assertion `ticket == __null' failed in
+--echo # MDL_request::set_type
+--echo #
+CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
+CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
+PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
+EXECUTE stmt;
+EXECUTE stmt;
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1, tmerge;
+
+
--echo End of 5.5 tests
diff --git a/mysql-test/t/ps.test b/mysql-test/t/ps.test
index 3881d522bbf..67f6f021434 100644
--- a/mysql-test/t/ps.test
+++ b/mysql-test/t/ps.test
@@ -3670,5 +3670,32 @@ deallocate prepare stmt;
SET SESSION sql_mode = @save_sql_mode;
DROP TABLE t1,t2;
+--echo #
+--echo # MDEV-8833: Crash of server on prepared statement with
+--echo # conversion to semi-join
+--echo #
+
+CREATE TABLE t1 (column1 INT);
+INSERT INTO t1 VALUES (3),(9);
+
+CREATE TABLE t2 (column2 INT);
+INSERT INTO t2 VALUES (1),(4);
+
+CREATE TABLE t3 (column3 INT);
+INSERT INTO t3 VALUES (6),(8);
+
+CREATE TABLE t4 (column4 INT);
+INSERT INTO t4 VALUES (2),(5);
+
+PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1
+FROM t1 AS table1
+WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 )
+) AS sq
+FROM t3 AS table3, t4 AS table4";
+EXECUTE stmt;
+EXECUTE stmt;
+deallocate prepare stmt;
+drop table t1,t2,t3,t4;
+
---echo # End of 10.0 tests
+--echo # End of 5.5 tests
diff --git a/mysql-test/t/type_uint.test b/mysql-test/t/type_uint.test
index 3a949c5c47a..84fca993d09 100644
--- a/mysql-test/t/type_uint.test
+++ b/mysql-test/t/type_uint.test
@@ -16,6 +16,13 @@ drop table t1;
# End of 4.1 tests
+create table t1 (a bigint unsigned, b mediumint unsigned);
+insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
+select coalesce(a,b), coalesce(b,a) from t1;
+create table t2 as select a from t1 union select b from t1;
+show create table t2;
+select * from t2;
+drop table t1, t2;
--echo #
--echo # Start of 10.0 tests
diff --git a/mysys/my_redel.c b/mysys/my_redel.c
index 61e61b40791..976fc5a18c3 100644
--- a/mysys/my_redel.c
+++ b/mysys/my_redel.c
@@ -1,5 +1,5 @@
-/*
- Copyright (c) 2000, 2010, Oracle and/or its affiliates
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates
+ Copyright (c) 2009, 2016, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -49,7 +49,8 @@ int my_redel(const char *org_name, const char *tmp_name,
DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s' MyFlags: %lu",
org_name,tmp_name,MyFlags));
- if (my_copystat(org_name,tmp_name,MyFlags) < 0)
+ if (!my_disable_copystat_in_redel &&
+ my_copystat(org_name,tmp_name,MyFlags) < 0)
goto end;
if (MyFlags & MY_REDEL_MAKE_BACKUP)
{
diff --git a/mysys/my_static.c b/mysys/my_static.c
index 4aca78e30a9..9236c1395fb 100644
--- a/mysys/my_static.c
+++ b/mysys/my_static.c
@@ -98,3 +98,4 @@ my_bool my_disable_sync=0;
my_bool my_disable_async_io=0;
my_bool my_disable_flush_key_blocks=0;
my_bool my_disable_symlinks=0;
+my_bool my_disable_copystat_in_redel=0;
diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c
index b84f2b94806..d48b6c37728 100644
--- a/plugin/server_audit/server_audit.c
+++ b/plugin/server_audit/server_audit.c
@@ -427,9 +427,8 @@ static MYSQL_SYSVAR_UINT(query_log_limit, query_log_limit,
char locinfo_ini_value[sizeof(struct connection_info)+4];
static MYSQL_THDVAR_STR(loc_info,
- PLUGIN_VAR_READONLY | PLUGIN_VAR_MEMALLOC,
- "Auxiliary info.", NULL, NULL,
- locinfo_ini_value);
+ PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_MEMALLOC,
+ "Internal info", NULL, NULL, locinfo_ini_value);
static const char *syslog_facility_names[]=
{
diff --git a/sql/contributors.h b/sql/contributors.h
index f52d3243453..0359ec54022 100644
--- a/sql/contributors.h
+++ b/sql/contributors.h
@@ -46,6 +46,7 @@ struct show_table_contributors_st show_table_contributors[]= {
{"Auttomattic", "https://automattic.com", "Bronze Sponsor of the MariaDB Foundation"},
{"Verkkokauppa.com", "https://virtuozzo.com", "Bronze Sponsor of the MariaDB Foundation"},
{"Virtuozzo", "https://virtuozzo.com/", "Bronze Sponsor of the MariaDB Foundation"},
+ {"Tencent Game DBA", "http://tencentdba.com/about/", "Bronze Sponsor of the MariaDB Foundation"},
/* Sponsors of important features */
{"Google", "USA", "Sponsoring encryption, parallel replication and GTID"},
diff --git a/sql/field.cc b/sql/field.cc
index ae815187019..b909d14ec8f 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -355,7 +355,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]=
//MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP
MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR,
//MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24
- MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONG,
+ MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG,
//MYSQL_TYPE_DATE MYSQL_TYPE_TIME
MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR,
//MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR
diff --git a/sql/item.cc b/sql/item.cc
index a9c17ef620c..47635b14f46 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -2673,9 +2673,28 @@ void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
if (context)
{
Name_resolution_context *ctx= new Name_resolution_context();
- ctx->outer_context= NULL; // We don't build a complete name resolver
- ctx->table_list= NULL; // We rely on first_name_resolution_table instead
+ if (context->select_lex == new_parent)
+ {
+ /*
+ This field was pushed in then pulled out
+ (for example left part of IN)
+ */
+ ctx->outer_context= context->outer_context;
+ }
+ else if (context->outer_context)
+ {
+ /* just pull to the upper context */
+ ctx->outer_context= context->outer_context->outer_context;
+ }
+ else
+ {
+ /* No upper context (merging Derived/VIEW where context chain ends) */
+ ctx->outer_context= NULL;
+ }
+ ctx->table_list= context->first_name_resolution_table;
ctx->select_lex= new_parent;
+ if (context->select_lex == NULL)
+ ctx->select_lex= NULL;
ctx->first_name_resolution_table= context->first_name_resolution_table;
ctx->last_name_resolution_table= context->last_name_resolution_table;
ctx->error_processor= context->error_processor;
diff --git a/sql/log.cc b/sql/log.cc
index be24bcd718a..45ab5c8827b 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -3102,7 +3102,7 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
if (! write_error)
{
write_error= 1;
- sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, error);
+ sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, tmp_errno);
}
}
}
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 41f6def8e08..8da8273083c 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -4096,6 +4096,7 @@ static int init_common_variables()
max_system_variables.pseudo_thread_id= (ulong)~0;
server_start_time= flush_status_time= my_time(0);
+ my_disable_copystat_in_redel= 1;
global_rpl_filter= new Rpl_filter;
binlog_filter= new Rpl_filter;
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index ef7a46a7109..da3c5646e84 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -1,5 +1,5 @@
-/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
- Copyright (c) 2010, 2014, SkySQL Ab.
+/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
+ Copyright (c) 2012, 2016, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/sql/parse_file.h b/sql/parse_file.h
index e4756e6c8af..87917dbd71b 100644
--- a/sql/parse_file.h
+++ b/sql/parse_file.h
@@ -42,9 +42,9 @@ enum file_opt_type {
struct File_option
{
- LEX_STRING name; /**< Name of the option */
- int offset; /**< offset to base address of value */
- file_opt_type type; /**< Option type */
+ LEX_STRING name; /**< Name of the option */
+ my_ptrdiff_t offset; /**< offset to base address of value */
+ file_opt_type type; /**< Option type */
};
diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc
index 9dd3e532d1e..f72eb676743 100644
--- a/sql/signal_handler.cc
+++ b/sql/signal_handler.cc
@@ -64,13 +64,13 @@ extern "C" sig_handler handle_fatal_signal(int sig)
struct tm tm;
#ifdef HAVE_STACKTRACE
THD *thd;
-#endif
/*
This flag remembers if the query pointer was found invalid.
We will try and print the query at the end of the signal handler, in case
we're wrong.
*/
bool print_invalid_query_pointer= false;
+#endif
if (segfaulted)
{
@@ -276,6 +276,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
"\"mlockall\" bugs.\n");
}
+#ifdef HAVE_STACKTRACE
if (print_invalid_query_pointer)
{
my_safe_printf_stderr(
@@ -285,6 +286,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length()));
my_safe_printf_stderr("\n\n");
}
+#endif
#ifdef HAVE_WRITE_CORE
if (test_flags & TEST_CORE_ON_SIGNAL)
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index b974075b442..1f4426f2043 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -466,7 +466,19 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
}
thd->prepare_derived_at_open= FALSE;
- table->next_global= save_next_global;
+ /*
+ MERGE engine may adjust table->next_global chain, thus we have to
+ append save_next_global after merge children.
+ */
+ if (save_next_global)
+ {
+ TABLE_LIST *table_list_iterator= table;
+ while (table_list_iterator->next_global)
+ table_list_iterator= table_list_iterator->next_global;
+ table_list_iterator->next_global= save_next_global;
+ save_next_global->prev_global= &table_list_iterator->next_global;
+ }
+
table->next_local= save_next_local;
thd->open_options&= ~extra_open_options;
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 9ea5b20dce6..b4a3cc27d2c 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -9267,6 +9267,7 @@ open_system_tables_for_read(THD *thd, TABLE_LIST *table_list,
*/
lex->reset_n_backup_query_tables_list(&query_tables_list_backup);
thd->reset_n_backup_open_tables_state(backup);
+ thd->lex->sql_command= SQLCOM_SELECT;
if (open_and_lock_tables(thd, table_list, FALSE,
MYSQL_OPEN_IGNORE_FLUSH |
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 430191cee5d..4143d2cc419 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -5828,9 +5828,11 @@ int THD::decide_logging_format(TABLE_LIST *tables)
{
static const char *prelocked_mode_name[] = {
"NON_PRELOCKED",
+ "LOCK_TABLES",
"PRELOCKED",
"PRELOCKED_UNDER_LOCK_TABLES",
};
+ compile_time_assert(array_elements(prelocked_mode_name) == LTM_always_last);
DBUG_PRINT("debug", ("prelocked_mode: %s",
prelocked_mode_name[locked_tables_mode]));
}
diff --git a/sql/sql_class.h b/sql/sql_class.h
index b22dc8142d8..da885c3dbac 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1262,7 +1262,8 @@ enum enum_locked_tables_mode
LTM_NONE= 0,
LTM_LOCK_TABLES,
LTM_PRELOCKED,
- LTM_PRELOCKED_UNDER_LOCK_TABLES
+ LTM_PRELOCKED_UNDER_LOCK_TABLES,
+ LTM_always_last
};
@@ -4610,6 +4611,11 @@ public:
save_copy_field_end= copy_field_end= NULL;
}
}
+ void free_copy_field_data()
+ {
+ for (Copy_field *ptr= copy_field ; ptr != copy_field_end ; ptr++)
+ ptr->tmp.free();
+ }
};
class select_union :public select_result_interceptor
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 60248f3fef4..e7286960599 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -2849,6 +2849,22 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
}
+static size_t var_storage_size(int flags)
+{
+ switch (flags & PLUGIN_VAR_TYPEMASK) {
+ case PLUGIN_VAR_BOOL: return sizeof(my_bool);
+ case PLUGIN_VAR_INT: return sizeof(int);
+ case PLUGIN_VAR_LONG: return sizeof(long);
+ case PLUGIN_VAR_ENUM: return sizeof(long);
+ case PLUGIN_VAR_LONGLONG: return sizeof(ulonglong);
+ case PLUGIN_VAR_SET: return sizeof(ulonglong);
+ case PLUGIN_VAR_STR: return sizeof(char*);
+ case PLUGIN_VAR_DOUBLE: return sizeof(double);
+ default: DBUG_ASSERT(0); return 0;
+ }
+}
+
+
/*
returns a bookmark for thd-local variables, creating if neccessary.
returns null for non thd-local variables.
@@ -2857,39 +2873,13 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
static st_bookmark *register_var(const char *plugin, const char *name,
int flags)
{
- uint length= strlen(plugin) + strlen(name) + 3, size= 0, offset, new_size;
+ uint length= strlen(plugin) + strlen(name) + 3, size, offset, new_size;
st_bookmark *result;
char *varname, *p;
- if (!(flags & PLUGIN_VAR_THDLOCAL))
- return NULL;
-
- switch (flags & PLUGIN_VAR_TYPEMASK) {
- case PLUGIN_VAR_BOOL:
- size= sizeof(my_bool);
- break;
- case PLUGIN_VAR_INT:
- size= sizeof(int);
- break;
- case PLUGIN_VAR_LONG:
- case PLUGIN_VAR_ENUM:
- size= sizeof(long);
- break;
- case PLUGIN_VAR_LONGLONG:
- case PLUGIN_VAR_SET:
- size= sizeof(ulonglong);
- break;
- case PLUGIN_VAR_STR:
- size= sizeof(char*);
- break;
- case PLUGIN_VAR_DOUBLE:
- size= sizeof(double);
- break;
- default:
- DBUG_ASSERT(0);
- return NULL;
- };
+ DBUG_ASSERT(flags & PLUGIN_VAR_THDLOCAL);
+ size= var_storage_size(flags);
varname= ((char*) my_alloca(length));
strxmov(varname + 1, plugin, "_", name, NullS);
for (p= varname + 1; *p; p++)
@@ -2983,25 +2973,17 @@ void sync_dynamic_session_variables(THD* thd, bool global_lock)
*/
for (idx= 0; idx < bookmark_hash.records; idx++)
{
- sys_var_pluginvar *pi;
- sys_var *var;
st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx);
if (v->version <= thd->variables.dynamic_variables_version)
continue; /* already in thd->variables */
- if (!(var= intern_find_sys_var(v->key + 1, v->name_len)) ||
- !(pi= var->cast_pluginvar()) ||
- v->key[0] != plugin_var_bookmark_key(pi->plugin_var->flags))
- continue;
-
/* Here we do anything special that may be required of the data types */
- if ((pi->plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
- pi->plugin_var->flags & PLUGIN_VAR_MEMALLOC)
+ if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
+ v->key[0] & BOOKMARK_MEMALLOC)
{
- int offset= ((thdvar_str_t *)(pi->plugin_var))->offset;
- char **pp= (char**) (thd->variables.dynamic_variables_ptr + offset);
+ char **pp= (char**) (thd->variables.dynamic_variables_ptr + v->offset);
if (*pp)
*pp= my_strdup(*pp, MYF(MY_WME|MY_FAE));
}
@@ -3448,69 +3430,58 @@ bool sys_var_pluginvar::session_update(THD *thd, set_var *var)
return false;
}
-bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
+static const void *var_def_ptr(st_mysql_sys_var *pv)
{
- DBUG_ASSERT(!is_readonly());
- mysql_mutex_assert_owner(&LOCK_global_system_variables);
-
- void *tgt= real_value_ptr(thd, OPT_GLOBAL);
- const void *src= &var->save_result;
-
- if (!var->value)
- {
- switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
+ switch (pv->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
case PLUGIN_VAR_INT:
- src= &((sysvar_uint_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_uint_t*) pv)->def_val;
case PLUGIN_VAR_LONG:
- src= &((sysvar_ulong_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_ulong_t*) pv)->def_val;
case PLUGIN_VAR_LONGLONG:
- src= &((sysvar_ulonglong_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_ulonglong_t*) pv)->def_val;
case PLUGIN_VAR_ENUM:
- src= &((sysvar_enum_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_enum_t*) pv)->def_val;
case PLUGIN_VAR_SET:
- src= &((sysvar_set_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_set_t*) pv)->def_val;
case PLUGIN_VAR_BOOL:
- src= &((sysvar_bool_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_bool_t*) pv)->def_val;
case PLUGIN_VAR_STR:
- src= &((sysvar_str_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_str_t*) pv)->def_val;
case PLUGIN_VAR_DOUBLE:
- src= &((sysvar_double_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_double_t*) pv)->def_val;
case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_uint_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_uint_t*) pv)->def_val;
case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_ulong_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_ulong_t*) pv)->def_val;
case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_ulonglong_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_ulonglong_t*) pv)->def_val;
case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_enum_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_enum_t*) pv)->def_val;
case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_set_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_set_t*) pv)->def_val;
case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_bool_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_bool_t*) pv)->def_val;
case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_str_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_str_t*) pv)->def_val;
case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_double_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_double_t*) pv)->def_val;
default:
DBUG_ASSERT(0);
+ return NULL;
}
- }
+}
+
+
+bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
+{
+ DBUG_ASSERT(!is_readonly());
+ mysql_mutex_assert_owner(&LOCK_global_system_variables);
+
+ void *tgt= real_value_ptr(thd, OPT_GLOBAL);
+ const void *src= &var->save_result;
+
+ if (!var->value)
+ src= var_def_ptr(plugin_var);
plugin_var->update(thd, plugin_var, tgt, src);
return false;
@@ -3863,7 +3834,18 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
*(int*)(opt + 1)= offset= v->offset;
if (opt->flags & PLUGIN_VAR_NOCMDOPT)
+ {
+ char *val= global_system_variables.dynamic_variables_ptr + offset;
+ if (((opt->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) &&
+ (opt->flags & PLUGIN_VAR_MEMALLOC))
+ {
+ char *def_val= *(char**)var_def_ptr(opt);
+ *(char**)val= def_val ? my_strdup(def_val, MYF(0)) : NULL;
+ }
+ else
+ memcpy(val, var_def_ptr(opt), var_storage_size(opt->flags));
continue;
+ }
optname= (char*) memdup_root(mem_root, v->key + 1,
(optnamelen= v->name_len) + 1);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 5dc50c92104..239e5b6b5d2 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -9172,9 +9172,26 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
We need to destruct the copy_field (allocated in create_tmp_table())
before setting it to 0 if the join is not "reusable".
*/
- if (!tmp_join || tmp_join != this)
- tmp_table_param.cleanup();
- tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
+ if (!tmp_join || tmp_join != this)
+ tmp_table_param.cleanup();
+ else
+ {
+ /*
+ Free data buffered in copy_fields, but keep data pointed by copy_field
+ around for next iteration (possibly stored in save_copy_fields).
+
+ It would be logically simpler to not clear copy_field
+ below, but as we have loops that runs over copy_field to
+ copy_field_end that should not be done anymore, it's simpler to
+ just clear the pointers.
+
+ Another option would be to just clear copy_field_end and not run
+ the loops if this is not set or to have tmp_table_param.cleanup()
+ to run cleanup on save_copy_field if copy_field is not set.
+ */
+ tmp_table_param.free_copy_field_data();
+ tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
+ }
first_record= sort_and_group=0;
send_records= (ha_rows) 0;
@@ -11890,7 +11907,7 @@ void JOIN::join_free()
/**
Free resources of given join.
- @param fill true if we should free all resources, call with full==1
+ @param full true if we should free all resources, call with full==1
should be last, before it this function can be called with
full==0
@@ -12010,7 +12027,7 @@ void JOIN::cleanup(bool full)
/*
If we have tmp_join and 'this' JOIN is not tmp_join and
tmp_table_param.copy_field's of them are equal then we have to remove
- pointer to tmp_table_param.copy_field from tmp_join, because it qill
+ pointer to tmp_table_param.copy_field from tmp_join, because it will
be removed in tmp_table_param.cleanup().
*/
if (tmp_join &&
@@ -16114,6 +16131,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
case Item::VARBIN_ITEM:
case Item::CACHE_ITEM:
case Item::EXPR_CACHE_ITEM:
+ case Item::PARAM_ITEM:
if (make_copy_field)
{
DBUG_ASSERT(((Item_result_field*)item)->result_field);
@@ -22917,7 +22935,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
err:
if (copy)
delete [] param->copy_field; // This is never 0
- param->copy_field=0;
+ param->copy_field= 0;
err2:
DBUG_RETURN(TRUE);
}
diff --git a/sql/table_cache.cc b/sql/table_cache.cc
index 2dd368a1945..16a47b37417 100644
--- a/sql/table_cache.cc
+++ b/sql/table_cache.cc
@@ -778,6 +778,8 @@ void tdc_release_share(TABLE_SHARE *share)
mysql_mutex_lock(&share->tdc->LOCK_table_share);
if (--share->tdc->ref_count)
{
+ if (!share->is_view)
+ mysql_cond_broadcast(&share->tdc->COND_release);
mysql_mutex_unlock(&share->tdc->LOCK_table_share);
mysql_mutex_unlock(&LOCK_unused_shares);
DBUG_VOID_RETURN;
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index f21fd560235..c13d4583fef 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -736,7 +736,7 @@ dict_stats_copy(
if (dst_idx->type & DICT_FTS) {
continue;
}
- dict_stats_empty_index(dst_idx);
+ dict_stats_empty_index(dst_idx, true);
} else {
continue;
}
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 1457417d5dc..61b6f5408cf 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key;
/** variable to record innodb_fts_internal_tbl_name for information
schema table INNODB_FTS_INSERTED etc. */
UNIV_INTERN char* fts_internal_tbl_name = NULL;
+UNIV_INTERN char* fts_internal_tbl_name2 = NULL;
/** InnoDB default stopword list:
There are different versions of stopwords, the stop words listed
@@ -6570,6 +6571,36 @@ fts_check_corrupt_index(
return(0);
}
+/* Get parent table name if it's a fts aux table
+@param[in] aux_table_name aux table name
+@param[in] aux_table_len aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+ const char* aux_table_name,
+ ulint aux_table_len)
+{
+ fts_aux_table_t aux_table;
+ char* parent_table_name = NULL;
+
+ if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
+ dict_table_t* parent_table;
+
+ parent_table = dict_table_open_on_id(
+ aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
+
+ if (parent_table != NULL) {
+ parent_table_name = mem_strdupl(
+ parent_table->name,
+ strlen(parent_table->name));
+
+ dict_table_close(parent_table, TRUE, FALSE);
+ }
+ }
+
+ return(parent_table_name);
+}
+
/** Check the validity of the parent table.
@param[in] aux_table auxiliary table
@return true if it is a valid table or false if it is not */
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index fd15092d96c..7ba54a1c360 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16916,7 +16916,12 @@ innodb_internal_table_update(
my_free(old);
}
- fts_internal_tbl_name = *(char**) var_ptr;
+ fts_internal_tbl_name2 = *(char**) var_ptr;
+ if (fts_internal_tbl_name2 == NULL) {
+ fts_internal_tbl_name = const_cast<char*>("default");
+ } else {
+ fts_internal_tbl_name = fts_internal_tbl_name2;
+ }
}
/****************************************************************//**
@@ -19148,7 +19153,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
"Whether to disable OS system file cache for sort I/O",
NULL, NULL, FALSE);
-static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
+static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2,
PLUGIN_VAR_NOCMDARG,
"FTS internal auxiliary table to be checked",
innodb_internal_table_validate,
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 5385c7ab920..0ccc9bebf29 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -210,7 +210,10 @@ innobase_need_rebuild(
const Alter_inplace_info* ha_alter_info,
const TABLE* altered_table)
{
- if (ha_alter_info->handler_flags
+ Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
+ ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE);
+
+ if (alter_inplace_flags
== Alter_inplace_info::CHANGE_CREATE_OPTION
&& !(ha_alter_info->create_info->used_fields
& (HA_CREATE_USED_ROW_FORMAT
@@ -3985,7 +3988,7 @@ err_exit:
}
if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
- || (ha_alter_info->handler_flags
+ || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
== Alter_inplace_info::CHANGE_CREATE_OPTION
&& !innobase_need_rebuild(ha_alter_info, table))) {
@@ -4159,7 +4162,7 @@ ok_exit:
DBUG_RETURN(false);
}
- if (ha_alter_info->handler_flags
+ if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
== Alter_inplace_info::CHANGE_CREATE_OPTION
&& !innobase_need_rebuild(ha_alter_info, table)) {
goto ok_exit;
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index d1e6e3ed808..a73446440aa 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -3959,6 +3959,8 @@ i_s_fts_config_fill(
DBUG_RETURN(0);
}
+ DEBUG_SYNC_C("i_s_fts_config_fille_check");
+
fields = table->field;
/* Prevent DDL to drop fts aux tables. */
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 87b5787d416..3e2f359bbeb 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -375,6 +375,7 @@ extern bool fts_need_sync;
/** Variable specifying the table that has Fulltext index to display its
content through information schema table */
extern char* fts_internal_tbl_name;
+extern char* fts_internal_tbl_name2;
#define fts_que_graph_free(graph) \
do { \
@@ -823,6 +824,15 @@ void
fts_drop_orphaned_tables(void);
/*==========================*/
+/* Get parent table name if it's a fts aux table
+@param[in] aux_table_name aux table name
+@param[in] aux_table_len aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+ const char* aux_table_name,
+ ulint aux_table_len);
+
/******************************************************************//**
Since we do a horizontal split on the index table, we need to drop
all the split tables.
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 8557f74f756..81190c3ad2e 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -45,7 +45,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 5
#define INNODB_VERSION_MINOR 6
-#define INNODB_VERSION_BUGFIX 32
+#define INNODB_VERSION_BUGFIX 33
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 5e32663ad32..12d4a59da6b 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -621,7 +621,7 @@ row_log_table_delete(
&old_pk_extra_size);
ut_ad(old_pk_extra_size < 0x100);
- mrec_size = 4 + old_pk_size;
+ mrec_size = 6 + old_pk_size;
/* Log enough prefix of the BLOB unless both the
old and new table are in COMPACT or REDUNDANT format,
@@ -651,8 +651,8 @@ row_log_table_delete(
*b++ = static_cast<byte>(old_pk_extra_size);
/* Log the size of external prefix we saved */
- mach_write_to_2(b, ext_size);
- b += 2;
+ mach_write_to_4(b, ext_size);
+ b += 4;
rec_convert_dtuple_to_temp(
b + old_pk_extra_size, new_index,
@@ -2276,14 +2276,14 @@ row_log_table_apply_op(
break;
case ROW_T_DELETE:
- /* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
- if (mrec + 4 >= mrec_end) {
+ /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
+ if (mrec + 6 >= mrec_end) {
return(NULL);
}
extra_size = *mrec++;
- ext_size = mach_read_from_2(mrec);
- mrec += 2;
+ ext_size = mach_read_from_4(mrec);
+ mrec += 4;
ut_ad(mrec < mrec_end);
/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index b2c96a7ed7b..35011247105 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -2734,6 +2734,10 @@ loop:
return(n_tables + n_tables_dropped);
}
+ DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
+ os_thread_sleep(5000000);
+ );
+
table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
DICT_ERR_IGNORE_NONE);
@@ -2744,6 +2748,16 @@ loop:
goto already_dropped;
}
+ if (!table->to_be_dropped) {
+ /* There is a scenario: the old table is dropped
+ just after it's added into drop list, and new
+ table with the same name is created, then we try
+ to drop the new table in background. */
+ dict_table_close(table, FALSE, FALSE);
+
+ goto already_dropped;
+ }
+
ut_a(!table->can_be_evicted);
dict_table_close(table, FALSE, FALSE);
@@ -4075,6 +4089,13 @@ row_drop_table_for_mysql(
}
}
+
+ DBUG_EXECUTE_IF("row_drop_table_add_to_background",
+ row_add_table_to_background_drop_list(table->name);
+ err = DB_SUCCESS;
+ goto funct_exit;
+ );
+
/* TODO: could we replace the counter n_foreign_key_checks_running
with lock checks on the table? Acquire here an exclusive lock on the
table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
@@ -4700,6 +4721,19 @@ loop:
row_mysql_lock_data_dictionary(trx);
while ((table_name = dict_get_first_table_name_in_db(name))) {
+ /* Drop parent table if it is a fts aux table, to
+ avoid accessing dropped fts aux tables in information
+ scheam when parent table still exists.
+ Note: Drop parent table will drop fts aux tables. */
+ char* parent_table_name;
+ parent_table_name = fts_get_parent_table_name(
+ table_name, strlen(table_name));
+
+ if (parent_table_name != NULL) {
+ mem_free(table_name);
+ table_name = parent_table_name;
+ }
+
ut_a(memcmp(table_name, name, namelen) == 0);
table = dict_table_open_on_name(
diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc
index 7c85431c57c..50d91842344 100644
--- a/storage/perfschema/ha_perfschema.cc
+++ b/storage/perfschema/ha_perfschema.cc
@@ -225,7 +225,7 @@ maria_declare_plugin(perfschema)
0x0001,
pfs_status_vars,
NULL,
- "5.6.32",
+ "5.6.33",
MariaDB_PLUGIN_MATURITY_STABLE
}
maria_declare_plugin_end;
diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt
index 765e6733a98..53a4a675bbf 100644
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@@ -1,4 +1,4 @@
-SET(TOKUDB_VERSION 5.6.31-77.0)
+SET(TOKUDB_VERSION 5.6.32-78.1)
# PerconaFT only supports x86-64 and cmake-2.8.9+
IF(CMAKE_VERSION VERSION_LESS "2.8.9")
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
index 4b62703480f..576f902f6ae 100644
--- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
+++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
@@ -367,8 +367,8 @@ static void print_db_env_struct (void) {
"int (*checkpointing_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */",
"int (*cleaner_set_period) (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */",
"int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
- "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */",
- "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */",
+ "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation. 0 means disabled. */",
+ "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation. 0 means disabled. */",
"int (*evictor_set_enable_partial_eviction) (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
"int (*evictor_get_enable_partial_eviction) (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
index 77f6d8f67b7..cce12d575bf 100644
--- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
+++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
@@ -71,6 +71,7 @@ set_cflags_if_supported(
-Wno-pointer-bool-conversion
-fno-rtti
-fno-exceptions
+ -Wno-error=nonnull-compare
)
## set_cflags_if_supported_named("-Weffc++" -Weffcpp)
diff --git a/storage/tokudb/PerconaFT/ft/CMakeLists.txt b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
index 11091073ac2..6696c26ecc0 100644
--- a/storage/tokudb/PerconaFT/ft/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
@@ -55,8 +55,8 @@ set(FT_SOURCES
msg_buffer
node
pivotkeys
+ serialize/rbtree_mhs
serialize/block_allocator
- serialize/block_allocator_strategy
serialize/block_table
serialize/compress
serialize/ft_node-serialize
diff --git a/storage/tokudb/PerconaFT/ft/ft-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
index fb456ea6a18..e6452f60cfc 100644
--- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
@@ -496,7 +496,7 @@ handle_split_of_child(
// We never set the rightmost blocknum to be the root.
// Instead, we wait for the root to split and let promotion initialize the rightmost
- // blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
+ // blocknum to be the first non-root leaf node on the right extreme to receive an insert.
BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
if (childa->blocknum.b == rightmost_blocknum.b) {
@@ -1470,7 +1470,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
// It is possible after reading in the entire child,
// that we now know that the child is not reactive
// if so, we can unpin parent right now
- // we wont be splitting/merging child
+ // we won't be splitting/merging child
// and we have already replaced the bnc
// for the root with a fresh one
enum reactivity child_re = toku_ftnode_get_reactivity(ft, child);
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc
index 8f61bc67339..f131668889e 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
@@ -598,15 +598,12 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data) {
}
}
-void toku_ftnode_clone_callback(
- void* value_data,
- void** cloned_value_data,
- long* clone_size,
- PAIR_ATTR* new_attr,
- bool for_checkpoint,
- void* write_extraargs
- )
-{
+void toku_ftnode_clone_callback(void *value_data,
+ void **cloned_value_data,
+ long *clone_size,
+ PAIR_ATTR *new_attr,
+ bool for_checkpoint,
+ void *write_extraargs) {
FTNODE node = static_cast<FTNODE>(value_data);
toku_ftnode_assert_fully_in_memory(node);
FT ft = static_cast<FT>(write_extraargs);
@@ -618,13 +615,16 @@ void toku_ftnode_clone_callback(
toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize);
}
- cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known;
- cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk;
+ cloned_node->oldest_referenced_xid_known =
+ node->oldest_referenced_xid_known;
+ cloned_node->max_msn_applied_to_node_on_disk =
+ node->max_msn_applied_to_node_on_disk;
cloned_node->flags = node->flags;
cloned_node->blocknum = node->blocknum;
cloned_node->layout_version = node->layout_version;
cloned_node->layout_version_original = node->layout_version_original;
- cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk;
+ cloned_node->layout_version_read_from_disk =
+ node->layout_version_read_from_disk;
cloned_node->build_id = node->build_id;
cloned_node->height = node->height;
cloned_node->dirty = node->dirty;
@@ -649,38 +649,39 @@ void toku_ftnode_clone_callback(
// set new pair attr if necessary
if (node->height == 0) {
*new_attr = make_ftnode_pair_attr(node);
- }
- else {
+ for (int i = 0; i < node->n_children; i++) {
+ BLB(node, i)->logical_rows_delta = 0;
+ BLB(cloned_node, i)->logical_rows_delta = 0;
+ }
+ } else {
new_attr->is_valid = false;
}
*clone_size = ftnode_memory_size(cloned_node);
*cloned_value_data = cloned_node;
}
-void toku_ftnode_flush_callback(
- CACHEFILE UU(cachefile),
- int fd,
- BLOCKNUM blocknum,
- void *ftnode_v,
- void** disk_data,
- void *extraargs,
- PAIR_ATTR size __attribute__((unused)),
- PAIR_ATTR* new_size,
- bool write_me,
- bool keep_me,
- bool for_checkpoint,
- bool is_clone
- )
-{
- FT ft = (FT) extraargs;
- FTNODE ftnode = (FTNODE) ftnode_v;
- FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
+void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
+ int fd,
+ BLOCKNUM blocknum,
+ void *ftnode_v,
+ void **disk_data,
+ void *extraargs,
+ PAIR_ATTR size __attribute__((unused)),
+ PAIR_ATTR *new_size,
+ bool write_me,
+ bool keep_me,
+ bool for_checkpoint,
+ bool is_clone) {
+ FT ft = (FT)extraargs;
+ FTNODE ftnode = (FTNODE)ftnode_v;
+ FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data;
assert(ftnode->blocknum.b == blocknum.b);
int height = ftnode->height;
if (write_me) {
toku_ftnode_assert_fully_in_memory(ftnode);
if (height > 0 && !is_clone) {
- // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback()
+ // cloned nodes already had their stale messages moved, see
+ // toku_ftnode_clone_callback()
toku_move_ftnode_messages_to_stale(ft, ftnode);
} else if (height == 0) {
toku_ftnode_leaf_run_gc(ft, ftnode);
@@ -688,7 +689,8 @@ void toku_ftnode_flush_callback(
toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint);
}
}
- int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
+ int r = toku_serialize_ftnode_to(
+ fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
assert_zero(r);
ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION;
}
@@ -703,20 +705,22 @@ void toku_ftnode_flush_callback(
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
}
toku_free(*disk_data);
- }
- else {
+ } else {
if (ftnode->height == 0) {
for (int i = 0; i < ftnode->n_children; i++) {
- if (BP_STATE(ftnode,i) == PT_AVAIL) {
+ if (BP_STATE(ftnode, i) == PT_AVAIL) {
BASEMENTNODE bn = BLB(ftnode, i);
- toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
+ toku_ft_decrease_stats(&ft->in_memory_stats,
+ bn->stat64_delta);
+ if (!ftnode->dirty)
+ toku_ft_adjust_logical_row_count(
+ ft, -bn->logical_rows_delta);
}
}
}
}
toku_ftnode_free(&ftnode);
- }
- else {
+ } else {
*new_size = make_ftnode_pair_attr(ftnode);
}
}
@@ -845,10 +849,13 @@ static void compress_internal_node_partition(FTNODE node, int i, enum toku_compr
}
// callback for partially evicting a node
-int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
- void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) {
- FTNODE node = (FTNODE) ftnode_pv;
- FT ft = (FT) write_extraargs;
+int toku_ftnode_pe_callback(void *ftnode_pv,
+ PAIR_ATTR old_attr,
+ void *write_extraargs,
+ void (*finalize)(PAIR_ATTR new_attr, void *extra),
+ void *finalize_extra) {
+ FTNODE node = (FTNODE)ftnode_pv;
+ FT ft = (FT)write_extraargs;
int num_partial_evictions = 0;
// Hold things we intend to destroy here.
@@ -866,7 +873,8 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
}
// Don't partially evict nodes whose partitions can't be read back
// from disk individually
- if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
+ if (node->layout_version_read_from_disk <
+ FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
goto exit;
}
//
@@ -874,77 +882,77 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
//
if (node->height > 0) {
for (int i = 0; i < node->n_children; i++) {
- if (BP_STATE(node,i) == PT_AVAIL) {
- if (BP_SHOULD_EVICT(node,i)) {
+ if (BP_STATE(node, i) == PT_AVAIL) {
+ if (BP_SHOULD_EVICT(node, i)) {
NONLEAF_CHILDINFO bnc = BNC(node, i);
if (ft_compress_buffers_before_eviction &&
- // We may not serialize and compress a partition in memory if its
- // in memory layout version is different than what's on disk (and
- // therefore requires upgrade).
+ // We may not serialize and compress a partition in
+ // memory if its in memory layout version is different
+ // than what's on disk (and therefore requires upgrade).
//
- // Auto-upgrade code assumes that if a node's layout version read
- // from disk is not current, it MUST require upgrade. Breaking
- // this rule would cause upgrade code to upgrade this partition
- // again after we serialize it as the current version, which is bad.
- node->layout_version == node->layout_version_read_from_disk) {
+ // Auto-upgrade code assumes that if a node's layout
+ // version read from disk is not current, it MUST
+ // require upgrade.
+ // Breaking this rule would cause upgrade code to
+ // upgrade this partition again after we serialize it as
+ // the current version, which is bad.
+ node->layout_version ==
+ node->layout_version_read_from_disk) {
toku_ft_bnc_move_messages_to_stale(ft, bnc);
compress_internal_node_partition(
node,
i,
// Always compress with quicklz
- TOKU_QUICKLZ_METHOD
- );
+ TOKU_QUICKLZ_METHOD);
} else {
// We're not compressing buffers before eviction. Simply
- // detach the buffer and set the child's state to on-disk.
+ // detach the buffer and set the child's state to
+ // on-disk.
set_BNULL(node, i);
BP_STATE(node, i) = PT_ON_DISK;
}
buffers_to_destroy[num_buffers_to_destroy++] = bnc;
num_partial_evictions++;
+ } else {
+ BP_SWEEP_CLOCK(node, i);
}
- else {
- BP_SWEEP_CLOCK(node,i);
- }
- }
- else {
+ } else {
continue;
}
}
- }
- //
- // partial eviction strategy for basement nodes:
- // if the bn is compressed, evict it
- // else: check if it requires eviction, if it does, evict it, if not, sweep the clock count
- //
- else {
+ } else {
+ //
+ // partial eviction strategy for basement nodes:
+ // if the bn is compressed, evict it
+ // else: check if it requires eviction, if it does, evict it, if not,
+ // sweep the clock count
+ //
for (int i = 0; i < node->n_children; i++) {
// Get rid of compressed stuff no matter what.
- if (BP_STATE(node,i) == PT_COMPRESSED) {
+ if (BP_STATE(node, i) == PT_COMPRESSED) {
SUB_BLOCK sb = BSB(node, i);
pointers_to_free[num_pointers_to_free++] = sb->compressed_ptr;
pointers_to_free[num_pointers_to_free++] = sb;
set_BNULL(node, i);
- BP_STATE(node,i) = PT_ON_DISK;
+ BP_STATE(node, i) = PT_ON_DISK;
num_partial_evictions++;
- }
- else if (BP_STATE(node,i) == PT_AVAIL) {
- if (BP_SHOULD_EVICT(node,i)) {
+ } else if (BP_STATE(node, i) == PT_AVAIL) {
+ if (BP_SHOULD_EVICT(node, i)) {
BASEMENTNODE bn = BLB(node, i);
basements_to_destroy[num_basements_to_destroy++] = bn;
- toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
+ toku_ft_decrease_stats(&ft->in_memory_stats,
+ bn->stat64_delta);
+ toku_ft_adjust_logical_row_count(ft,
+ -bn->logical_rows_delta);
set_BNULL(node, i);
BP_STATE(node, i) = PT_ON_DISK;
num_partial_evictions++;
+ } else {
+ BP_SWEEP_CLOCK(node, i);
}
- else {
- BP_SWEEP_CLOCK(node,i);
- }
- }
- else if (BP_STATE(node,i) == PT_ON_DISK) {
+ } else if (BP_STATE(node, i) == PT_ON_DISK) {
continue;
- }
- else {
+ } else {
abort();
}
}
@@ -2378,12 +2386,16 @@ ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) {
toku_ft_root_put_msg(ft_h->ft, msg, &gc_info);
}
-void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra,
- TOKUTXN txn, bool oplsn_valid, LSN oplsn,
- bool do_logging) {
+void toku_ft_maybe_update(FT_HANDLE ft_h,
+ const DBT *key,
+ const DBT *update_function_extra,
+ TOKUTXN txn,
+ bool oplsn_valid,
+ LSN oplsn,
+ bool do_logging) {
TXNID_PAIR xid = toku_txn_get_txnid(txn);
if (txn) {
- BYTESTRING keybs = { key->size, (char *) key->data };
+ BYTESTRING keybs = {key->size, (char *)key->data};
toku_logger_save_rollback_cmdupdate(
txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs);
toku_txn_maybe_note_ft(txn, ft_h->ft);
@@ -2392,22 +2404,33 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func
TOKULOGGER logger;
logger = toku_txn_logger(txn);
if (do_logging && logger) {
- BYTESTRING keybs = {.len=key->size, .data=(char *) key->data};
- BYTESTRING extrabs = {.len=update_function_extra->size,
- .data = (char *) update_function_extra->data};
- toku_log_enq_update(logger, NULL, 0, txn,
- toku_cachefile_filenum(ft_h->ft->cf),
- xid, keybs, extrabs);
+ BYTESTRING keybs = {.len = key->size, .data = (char *)key->data};
+ BYTESTRING extrabs = {.len = update_function_extra->size,
+ .data = (char *)update_function_extra->data};
+ toku_log_enq_update(logger,
+ NULL,
+ 0,
+ txn,
+ toku_cachefile_filenum(ft_h->ft->cf),
+ xid,
+ keybs,
+ extrabs);
}
LSN treelsn;
- if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
+ if (oplsn_valid &&
+ oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
// do nothing
} else {
- XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
- ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
+ XIDS message_xids =
+ txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
+ ft_msg msg(
+ key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
ft_send_update_msg(ft_h, msg, txn);
}
+ // updates get converted to insert messages, which should do a -1 on the
+ // logical row count when the messages are permanently applied
+ toku_ft_adjust_logical_row_count(ft_h->ft, 1);
}
void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra,
diff --git a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
index adac96f4882..e31d80772d5 100644
--- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
@@ -73,30 +73,20 @@ static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) {
return rre->_cancelled =
rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
}
-int toku_ft_recount_rows(
- FT_HANDLE ft,
- int (*progress_callback)(
- uint64_t count,
- uint64_t deleted,
- void* progress_extra),
- void* progress_extra) {
-
+int toku_ft_recount_rows(FT_HANDLE ft,
+ int (*progress_callback)(uint64_t count,
+ uint64_t deleted,
+ void* progress_extra),
+ void* progress_extra) {
int ret = 0;
- recount_rows_extra_t rre = {
- progress_callback,
- progress_extra,
- 0,
- false
- };
+ recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false};
ft_cursor c;
ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
- if (ret) return ret;
+ if (ret)
+ return ret;
- toku_ft_cursor_set_check_interrupt_cb(
- &c,
- recount_rows_interrupt,
- &rre);
+ toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre);
ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
while (FT_LIKELY(ret == 0)) {
@@ -108,6 +98,7 @@ int toku_ft_recount_rows(
if (rre._cancelled == false) {
// update ft count
toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
+ ft->ft->h->dirty = 1;
ret = 0;
}
diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc
index 93d21233bf7..699fcc57603 100644
--- a/storage/tokudb/PerconaFT/ft/ft.cc
+++ b/storage/tokudb/PerconaFT/ft/ft.cc
@@ -903,6 +903,9 @@ void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) {
// must be returned in toku_ft_stat64.
if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) {
toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta);
+ if (ft->in_memory_logical_rows == (uint64_t)-1) {
+ toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), 1);
+ }
}
}
diff --git a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
index dd070373e26..1aa2c203831 100644
--- a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
+++ b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
@@ -301,7 +301,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
-// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.)
+// For test purposes only. (In production, the rowset size is determined by negotiation with the cachetable for some memory. See #2613.)
uint64_t toku_ft_loader_get_rowset_budget_for_testing (void);
int toku_ft_loader_finish_extractor(FTLOADER bl);
diff --git a/storage/tokudb/PerconaFT/ft/loader/loader.cc b/storage/tokudb/PerconaFT/ft/loader/loader.cc
index 20f9363da1e..528c86a8f79 100644
--- a/storage/tokudb/PerconaFT/ft/loader/loader.cc
+++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc
@@ -91,7 +91,7 @@ toku_ft_loader_set_size_factor(uint32_t factor) {
uint64_t
toku_ft_loader_get_rowset_budget_for_testing (void)
-// For test purposes only. In production, the rowset size is determined by negotation with the cachetable for some memory. (See #2613).
+// For test purposes only. In production, the rowset size is determined by negotiation with the cachetable for some memory. (See #2613).
{
return 16ULL*size_factor*1024ULL;
}
diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc
index 58ba675eb7c..12e5fda226e 100644
--- a/storage/tokudb/PerconaFT/ft/node.cc
+++ b/storage/tokudb/PerconaFT/ft/node.cc
@@ -373,52 +373,48 @@ find_bounds_within_message_tree(
}
}
-/**
- * For each message in the ancestor's buffer (determined by childnum) that
- * is key-wise between lower_bound_exclusive and upper_bound_inclusive,
- * apply the message to the basement node. We treat the bounds as minus
- * or plus infinity respectively if they are NULL. Do not mark the node
- * as dirty (preserve previous state of 'dirty' bit).
- */
+// For each message in the ancestor's buffer (determined by childnum) that
+// is key-wise between lower_bound_exclusive and upper_bound_inclusive,
+// apply the message to the basement node. We treat the bounds as minus
+// or plus infinity respectively if they are NULL. Do not mark the node
+// as dirty (preserve previous state of 'dirty' bit).
static void bnc_apply_messages_to_basement_node(
- FT_HANDLE t, // used for comparison function
- BASEMENTNODE bn, // where to apply messages
+ FT_HANDLE t, // used for comparison function
+ BASEMENTNODE bn, // where to apply messages
FTNODE ancestor, // the ancestor node where we can find messages to apply
- int childnum, // which child buffer of ancestor contains messages we want
- const pivot_bounds &bounds, // contains pivot key bounds of this basement node
- txn_gc_info* gc_info,
- bool* msgs_applied) {
-
+ int childnum, // which child buffer of ancestor contains messages we want
+ const pivot_bounds &
+ bounds, // contains pivot key bounds of this basement node
+ txn_gc_info *gc_info,
+ bool *msgs_applied) {
int r;
NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
// Determine the offsets in the message trees between which we need to
// apply messages from this buffer
- STAT64INFO_S stats_delta = {0,0};
+ STAT64INFO_S stats_delta = {0, 0};
uint64_t workdone_this_ancestor = 0;
int64_t logical_rows_delta = 0;
uint32_t stale_lbi, stale_ube;
if (!bn->stale_ancestor_messages_applied) {
- find_bounds_within_message_tree(
- t->ft->cmp,
- bnc->stale_message_tree,
- &bnc->msg_buffer,
- bounds,
- &stale_lbi,
- &stale_ube);
+ find_bounds_within_message_tree(t->ft->cmp,
+ bnc->stale_message_tree,
+ &bnc->msg_buffer,
+ bounds,
+ &stale_lbi,
+ &stale_ube);
} else {
stale_lbi = 0;
stale_ube = 0;
}
uint32_t fresh_lbi, fresh_ube;
- find_bounds_within_message_tree(
- t->ft->cmp,
- bnc->fresh_message_tree,
- &bnc->msg_buffer,
- bounds,
- &fresh_lbi,
- &fresh_ube);
+ find_bounds_within_message_tree(t->ft->cmp,
+ bnc->fresh_message_tree,
+ &bnc->msg_buffer,
+ bounds,
+ &fresh_lbi,
+ &fresh_ube);
// We now know where all the messages we must apply are, so one of the
// following 4 cases will do the application, depending on which of
@@ -432,44 +428,53 @@ static void bnc_apply_messages_to_basement_node(
// We have messages in multiple trees, so we grab all
// the relevant messages' offsets and sort them by MSN, then apply
// them in MSN order.
- const int buffer_size = ((stale_ube - stale_lbi) +
- (fresh_ube - fresh_lbi) +
- bnc->broadcast_list.size());
+ const int buffer_size =
+ ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) +
+ bnc->broadcast_list.size());
toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
- struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
+ struct store_msg_buffer_offset_extra sfo_extra = {.offsets = offsets,
+ .i = 0};
// Populate offsets array with offsets to stale messages
- r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
+ r = bnc->stale_message_tree
+ .iterate_on_range<struct store_msg_buffer_offset_extra,
+ store_msg_buffer_offset>(
+ stale_lbi, stale_ube, &sfo_extra);
assert_zero(r);
// Then store fresh offsets, and mark them to be moved to stale later.
- r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
+ r = bnc->fresh_message_tree
+ .iterate_and_mark_range<struct store_msg_buffer_offset_extra,
+ store_msg_buffer_offset>(
+ fresh_lbi, fresh_ube, &sfo_extra);
assert_zero(r);
// Store offsets of all broadcast messages.
- r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
+ r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra,
+ store_msg_buffer_offset>(&sfo_extra);
assert_zero(r);
invariant(sfo_extra.i == buffer_size);
// Sort by MSN.
- toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
+ toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::
+ mergesort_r(offsets, buffer_size, bnc->msg_buffer);
// Apply the messages in MSN order.
for (int i = 0; i < buffer_size; ++i) {
*msgs_applied = true;
- do_bn_apply_msg(
- t,
- bn,
- &bnc->msg_buffer,
- offsets[i],
- gc_info,
- &workdone_this_ancestor,
- &stats_delta,
- &logical_rows_delta);
+ do_bn_apply_msg(t,
+ bn,
+ &bnc->msg_buffer,
+ offsets[i],
+ gc_info,
+ &workdone_this_ancestor,
+ &stats_delta,
+ &logical_rows_delta);
}
} else if (stale_lbi == stale_ube) {
- // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
+ // No stale messages to apply, we just apply fresh messages, and mark
+ // them to be moved to stale later.
struct iterate_do_bn_apply_msg_extra iter_extra = {
.t = t,
.bn = bn,
@@ -477,16 +482,20 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
- .logical_rows_delta = &logical_rows_delta
- };
- if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
- r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
+ .logical_rows_delta = &logical_rows_delta};
+ if (fresh_ube - fresh_lbi > 0)
+ *msgs_applied = true;
+ r = bnc->fresh_message_tree
+ .iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra,
+ iterate_do_bn_apply_msg>(
+ fresh_lbi, fresh_ube, &iter_extra);
assert_zero(r);
} else {
invariant(fresh_lbi == fresh_ube);
// No fresh messages to apply, we just apply stale messages.
- if (stale_ube - stale_lbi > 0) *msgs_applied = true;
+ if (stale_ube - stale_lbi > 0)
+ *msgs_applied = true;
struct iterate_do_bn_apply_msg_extra iter_extra = {
.t = t,
.bn = bn,
@@ -494,22 +503,26 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
- .logical_rows_delta = &logical_rows_delta
- };
+ .logical_rows_delta = &logical_rows_delta};
- r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
+ r = bnc->stale_message_tree
+ .iterate_on_range<struct iterate_do_bn_apply_msg_extra,
+ iterate_do_bn_apply_msg>(
+ stale_lbi, stale_ube, &iter_extra);
assert_zero(r);
}
//
// update stats
//
if (workdone_this_ancestor > 0) {
- (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor);
+ (void)toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum),
+ workdone_this_ancestor);
}
if (stats_delta.numbytes || stats_delta.numrows) {
toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
}
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
+ bn->logical_rows_delta += logical_rows_delta;
}
static void
diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h
index ad0298e81c5..52eefec0936 100644
--- a/storage/tokudb/PerconaFT/ft/node.h
+++ b/storage/tokudb/PerconaFT/ft/node.h
@@ -199,6 +199,7 @@ struct ftnode_leaf_basement_node {
MSN max_msn_applied; // max message sequence number applied
bool stale_ancestor_messages_applied;
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
+ int64_t logical_rows_delta;
};
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
index 1355f3739ee..19811373d16 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
@@ -46,415 +46,214 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "portability/toku_stdlib.h"
#include "ft/serialize/block_allocator.h"
-#include "ft/serialize/block_allocator_strategy.h"
+#include "ft/serialize/rbtree_mhs.h"
#if TOKU_DEBUG_PARANOID
-#define VALIDATE() validate()
+#define VALIDATE() Validate()
#else
#define VALIDATE()
#endif
-static FILE *ba_trace_file = nullptr;
-
-void block_allocator::maybe_initialize_trace(void) {
- const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH");
- if (ba_trace_path != nullptr) {
- ba_trace_file = toku_os_fopen(ba_trace_path, "w");
- if (ba_trace_file == nullptr) {
- fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), "
- "but it could not be opened for writing (errno %d)\n",
- ba_trace_path, get_maybe_error_errno());
- } else {
- fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path);
- }
- }
-}
-
-void block_allocator::maybe_close_trace() {
- if (ba_trace_file != nullptr) {
- int r = toku_os_fclose(ba_trace_file);
- if (r != 0) {
- fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n",
- r, get_maybe_error_errno());
- } else {
- fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n");
- }
- }
-}
-
-void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) {
- // the alignment must be at least 512 and aligned with 512 to work with direct I/O
- assert(alignment >= 512 && (alignment % 512) == 0);
+void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning,
+ uint64_t alignment) {
+ // the alignment must be at least 512 and aligned with 512 to work with
+ // direct I/O
+ invariant(alignment >= 512 && (alignment % 512) == 0);
_reserve_at_beginning = reserve_at_beginning;
_alignment = alignment;
_n_blocks = 0;
- _blocks_array_size = 1;
- XMALLOC_N(_blocks_array_size, _blocks_array);
_n_bytes_in_use = reserve_at_beginning;
- _strategy = BA_STRATEGY_FIRST_FIT;
-
- memset(&_trace_lock, 0, sizeof(toku_mutex_t));
- toku_mutex_init(&_trace_lock, nullptr);
+ _tree = new MhsRbTree::Tree(alignment);
+}
+void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) {
+ CreateInternal(reserve_at_beginning, alignment);
+ _tree->Insert({reserve_at_beginning, MAX_BYTE});
VALIDATE();
}
-void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
- _create_internal(reserve_at_beginning, alignment);
- _trace_create();
+void BlockAllocator::Destroy() {
+ delete _tree;
}
-void block_allocator::destroy() {
- toku_free(_blocks_array);
- _trace_destroy();
- toku_mutex_destroy(&_trace_lock);
-}
+void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning,
+ uint64_t alignment,
+ struct BlockPair *translation_pairs,
+ uint64_t n_blocks) {
+ CreateInternal(reserve_at_beginning, alignment);
+ _n_blocks = n_blocks;
-void block_allocator::set_strategy(enum allocation_strategy strategy) {
- _strategy = strategy;
-}
+ struct BlockPair *XMALLOC_N(n_blocks, pairs);
+ memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair));
+ std::sort(pairs, pairs + n_blocks);
-void block_allocator::grow_blocks_array_by(uint64_t n_to_add) {
- if (_n_blocks + n_to_add > _blocks_array_size) {
- uint64_t new_size = _n_blocks + n_to_add;
- uint64_t at_least = _blocks_array_size * 2;
- if (at_least > new_size) {
- new_size = at_least;
- }
- _blocks_array_size = new_size;
- XREALLOC_N(_blocks_array_size, _blocks_array);
+ if (pairs[0]._offset > reserve_at_beginning) {
+ _tree->Insert(
+ {reserve_at_beginning, pairs[0]._offset - reserve_at_beginning});
}
-}
-
-void block_allocator::grow_blocks_array() {
- grow_blocks_array_by(1);
-}
-
-void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
- struct blockpair *pairs, uint64_t n_blocks) {
- _create_internal(reserve_at_beginning, alignment);
-
- _n_blocks = n_blocks;
- grow_blocks_array_by(_n_blocks);
- memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
- std::sort(_blocks_array, _blocks_array + _n_blocks);
for (uint64_t i = 0; i < _n_blocks; i++) {
- // Allocator does not support size 0 blocks. See block_allocator_free_block.
- invariant(_blocks_array[i].size > 0);
- invariant(_blocks_array[i].offset >= _reserve_at_beginning);
- invariant(_blocks_array[i].offset % _alignment == 0);
-
- _n_bytes_in_use += _blocks_array[i].size;
+ // Allocator does not support size 0 blocks. See
+ // block_allocator_free_block.
+ invariant(pairs[i]._size > 0);
+ invariant(pairs[i]._offset >= _reserve_at_beginning);
+ invariant(pairs[i]._offset % _alignment == 0);
+
+ _n_bytes_in_use += pairs[i]._size;
+
+ MhsRbTree::OUUInt64 free_size(MAX_BYTE);
+ MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size);
+ if (i < n_blocks - 1) {
+ MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset);
+ invariant(next_offset >= free_offset);
+ free_size = next_offset - free_offset;
+ if (free_size == 0)
+ continue;
+ }
+ _tree->Insert({free_offset, free_size});
}
-
+ toku_free(pairs);
VALIDATE();
-
- _trace_create_from_blockpairs();
}
// Effect: align a value by rounding up.
-static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
+static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) {
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
}
-struct block_allocator::blockpair *
-block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) {
- switch (_strategy) {
- case BA_STRATEGY_FIRST_FIT:
- return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
- case BA_STRATEGY_BEST_FIT:
- return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
- case BA_STRATEGY_HEAT_ZONE:
- return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat);
- case BA_STRATEGY_PADDED_FIT:
- return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment);
- default:
- abort();
- }
-}
-
-// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
-void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) {
- struct blockpair *bp;
-
+// Effect: Allocate a block. The resulting block must be aligned on the
+// ba->alignment (which to make direct_io happy must be a positive multiple of
+// 512).
+void BlockAllocator::AllocBlock(uint64_t size,
+ uint64_t *offset) {
// Allocator does not support size 0 blocks. See block_allocator_free_block.
invariant(size > 0);
- grow_blocks_array();
_n_bytes_in_use += size;
+ *offset = _tree->Remove(size);
- uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
-
- if (_n_blocks == 0) {
- // First and only block
- assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use
- _blocks_array[0].offset = align(_reserve_at_beginning, _alignment);
- _blocks_array[0].size = size;
- *offset = _blocks_array[0].offset;
- goto done;
- } else if (end_of_reserve + size <= _blocks_array[0].offset ) {
- // Check to see if the space immediately after the reserve is big enough to hold the new block.
- bp = &_blocks_array[0];
- memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
- bp[0].offset = end_of_reserve;
- bp[0].size = size;
- *offset = end_of_reserve;
- goto done;
- }
-
- bp = choose_block_to_alloc_after(size, heat);
- if (bp != nullptr) {
- // our allocation strategy chose the space after `bp' to fit the new block
- uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
- uint64_t blocknum = bp - _blocks_array;
- invariant(&_blocks_array[blocknum] == bp);
- invariant(blocknum < _n_blocks);
- memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
- bp[1].offset = answer_offset;
- bp[1].size = size;
- *offset = answer_offset;
- } else {
- // It didn't fit anywhere, so fit it on the end.
- assert(_n_blocks < _blocks_array_size);
- bp = &_blocks_array[_n_blocks];
- uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment);
- bp->offset = answer_offset;
- bp->size = size;
- *offset = answer_offset;
- }
-
-done:
_n_blocks++;
VALIDATE();
-
- _trace_alloc(size, heat, *offset);
-}
-
-// Find the index in the blocks array that has a particular offset. Requires that the block exist.
-// Use binary search so it runs fast.
-int64_t block_allocator::find_block(uint64_t offset) {
- VALIDATE();
- if (_n_blocks == 1) {
- assert(_blocks_array[0].offset == offset);
- return 0;
- }
-
- uint64_t lo = 0;
- uint64_t hi = _n_blocks;
- while (1) {
- assert(lo < hi); // otherwise no such block exists.
- uint64_t mid = (lo + hi) / 2;
- uint64_t thisoff = _blocks_array[mid].offset;
- if (thisoff < offset) {
- lo = mid + 1;
- } else if (thisoff > offset) {
- hi = mid;
- } else {
- return mid;
- }
- }
}
-// To support 0-sized blocks, we need to include size as an input to this function.
+// To support 0-sized blocks, we need to include size as an input to this
+// function.
// All 0-sized blocks at the same offset can be considered identical, but
// a 0-sized block can share offset with a non-zero sized block.
-// The non-zero sized block is not exchangable with a zero sized block (or vice versa),
-// so inserting 0-sized blocks can cause corruption here.
-void block_allocator::free_block(uint64_t offset) {
+// The non-zero sized block is not exchangable with a zero sized block (or vice
+// versa), so inserting 0-sized blocks can cause corruption here.
+void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) {
VALIDATE();
- int64_t bn = find_block(offset);
- assert(bn >= 0); // we require that there is a block with that offset.
- _n_bytes_in_use -= _blocks_array[bn].size;
- memmove(&_blocks_array[bn], &_blocks_array[bn + 1],
- (_n_blocks - bn - 1) * sizeof(struct blockpair));
+ _n_bytes_in_use -= size;
+ _tree->Insert({offset, size});
_n_blocks--;
VALIDATE();
-
- _trace_free(offset);
-}
-
-uint64_t block_allocator::block_size(uint64_t offset) {
- int64_t bn = find_block(offset);
- assert(bn >=0); // we require that there is a block with that offset.
- return _blocks_array[bn].size;
}
-uint64_t block_allocator::allocated_limit() const {
- if (_n_blocks == 0) {
- return _reserve_at_beginning;
- } else {
- struct blockpair *last = &_blocks_array[_n_blocks - 1];
- return last->offset + last->size;
- }
+uint64_t BlockAllocator::AllocatedLimit() const {
+ MhsRbTree::Node *max_node = _tree->MaxNode();
+ return rbn_offset(max_node).ToInt();
}
-// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
+// Effect: Consider the blocks in sorted order. The reserved block at the
+// beginning is number 0. The next one is number 1 and so forth.
// Return the offset and size of the block with that number.
// Return 0 if there is a block that big, return nonzero if b is too big.
-int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) {
- if (b ==0 ) {
+int BlockAllocator::NthBlockInLayoutOrder(uint64_t b,
+ uint64_t *offset,
+ uint64_t *size) {
+ MhsRbTree::Node *x, *y;
+ if (b == 0) {
*offset = 0;
*size = _reserve_at_beginning;
- return 0;
+ return 0;
} else if (b > _n_blocks) {
return -1;
} else {
- *offset =_blocks_array[b - 1].offset;
- *size =_blocks_array[b - 1].size;
+ x = _tree->MinNode();
+ for (uint64_t i = 1; i <= b; i++) {
+ y = x;
+ x = _tree->Successor(x);
+ }
+ *size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt();
+ *offset = (rbn_offset(y) + rbn_size(y)).ToInt();
return 0;
}
}
+struct VisUnusedExtra {
+ TOKU_DB_FRAGMENTATION _report;
+ uint64_t _align;
+};
+
+static void VisUnusedCollector(void *extra,
+ MhsRbTree::Node *node,
+ uint64_t UU(depth)) {
+ struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra;
+ TOKU_DB_FRAGMENTATION report = v_e->_report;
+ uint64_t alignm = v_e->_align;
+
+ MhsRbTree::OUUInt64 offset = rbn_offset(node);
+ MhsRbTree::OUUInt64 size = rbn_size(node);
+ MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm));
+ uint64_t free_space = (offset + size - answer_offset).ToInt();
+ if (free_space > 0) {
+ report->unused_bytes += free_space;
+ report->unused_blocks++;
+ if (free_space > report->largest_unused_block) {
+ report->largest_unused_block = free_space;
+ }
+ }
+}
// Requires: report->file_size_bytes is filled in
// Requires: report->data_bytes is filled in
// Requires: report->checkpoint_bytes_additional is filled in
-void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) {
- assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional);
+void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) {
+ invariant(_n_bytes_in_use ==
+ report->data_bytes + report->checkpoint_bytes_additional);
report->unused_bytes = 0;
report->unused_blocks = 0;
report->largest_unused_block = 0;
- if (_n_blocks > 0) {
- //Deal with space before block 0 and after reserve:
- {
- struct blockpair *bp = &_blocks_array[0];
- assert(bp->offset >= align(_reserve_at_beginning, _alignment));
- uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment);
- if (free_space > 0) {
- report->unused_bytes += free_space;
- report->unused_blocks++;
- if (free_space > report->largest_unused_block) {
- report->largest_unused_block = free_space;
- }
- }
- }
-
- //Deal with space between blocks:
- for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) {
- // Consider the space after blocknum
- struct blockpair *bp = &_blocks_array[blocknum];
- uint64_t this_offset = bp[0].offset;
- uint64_t this_size = bp[0].size;
- uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
- uint64_t next_offset = bp[1].offset;
- uint64_t free_space = next_offset - end_of_this_block;
- if (free_space > 0) {
- report->unused_bytes += free_space;
- report->unused_blocks++;
- if (free_space > report->largest_unused_block) {
- report->largest_unused_block = free_space;
- }
- }
- }
-
- //Deal with space after last block
- {
- struct blockpair *bp = &_blocks_array[_n_blocks-1];
- uint64_t this_offset = bp[0].offset;
- uint64_t this_size = bp[0].size;
- uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
- if (end_of_this_block < report->file_size_bytes) {
- uint64_t free_space = report->file_size_bytes - end_of_this_block;
- assert(free_space > 0);
- report->unused_bytes += free_space;
- report->unused_blocks++;
- if (free_space > report->largest_unused_block) {
- report->largest_unused_block = free_space;
- }
- }
- }
- } else {
- // No blocks. Just the reserve.
- uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment);
- if (end_of_this_block < report->file_size_bytes) {
- uint64_t free_space = report->file_size_bytes - end_of_this_block;
- assert(free_space > 0);
- report->unused_bytes += free_space;
- report->unused_blocks++;
- if (free_space > report->largest_unused_block) {
- report->largest_unused_block = free_space;
- }
- }
- }
+ struct VisUnusedExtra extra = {report, _alignment};
+ _tree->InOrderVisitor(VisUnusedCollector, &extra);
}
-void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) {
- report->data_bytes = _n_bytes_in_use;
- report->data_blocks = _n_blocks;
+void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) {
+ report->data_bytes = _n_bytes_in_use;
+ report->data_blocks = _n_blocks;
report->file_size_bytes = 0;
report->checkpoint_bytes_additional = 0;
- get_unused_statistics(report);
+ UnusedStatistics(report);
}
-void block_allocator::validate() const {
- uint64_t n_bytes_in_use = _reserve_at_beginning;
- for (uint64_t i = 0; i < _n_blocks; i++) {
- n_bytes_in_use += _blocks_array[i].size;
- if (i > 0) {
- assert(_blocks_array[i].offset > _blocks_array[i - 1].offset);
- assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size );
- }
- }
- assert(n_bytes_in_use == _n_bytes_in_use);
-}
-
-// Tracing
-
-void block_allocator::_trace_create(void) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n",
- this, _reserve_at_beginning, _alignment);
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
- }
-}
-
-void block_allocator::_trace_create_from_blockpairs(void) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ",
- this, _reserve_at_beginning, _alignment);
- for (uint64_t i = 0; i < _n_blocks; i++) {
- fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ",
- _blocks_array[i].offset, _blocks_array[i].size);
- }
- fprintf(ba_trace_file, "\n");
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
- }
-}
-
-void block_allocator::_trace_destroy(void) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
- }
-}
-
-void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- this, size, heat, offset);
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
+struct ValidateExtra {
+ uint64_t _bytes;
+ MhsRbTree::Node *_pre_node;
+};
+static void VisUsedBlocksInOrder(void *extra,
+ MhsRbTree::Node *cur_node,
+ uint64_t UU(depth)) {
+ struct ValidateExtra *v_e = (struct ValidateExtra *)extra;
+ MhsRbTree::Node *pre_node = v_e->_pre_node;
+ // verify no overlaps
+ if (pre_node) {
+ invariant(rbn_size(pre_node) > 0);
+ invariant(rbn_offset(cur_node) >
+ rbn_offset(pre_node) + rbn_size(pre_node));
+ MhsRbTree::OUUInt64 used_space =
+ rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node));
+ v_e->_bytes += used_space.ToInt();
+ } else {
+ v_e->_bytes += rbn_offset(cur_node).ToInt();
}
+ v_e->_pre_node = cur_node;
}
-void block_allocator::_trace_free(uint64_t offset) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset);
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
- }
+void BlockAllocator::Validate() const {
+ _tree->ValidateBalance();
+ _tree->ValidateMhs();
+ struct ValidateExtra extra = {0, nullptr};
+ _tree->InOrderVisitor(VisUsedBlocksInOrder, &extra);
+ invariant(extra._bytes == _n_bytes_in_use);
}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
index 9b2c1553e7f..648ea9a9ef2 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
@@ -43,6 +43,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "portability/toku_pthread.h"
#include "portability/toku_stdint.h"
#include "portability/toku_stdlib.h"
+#include "ft/serialize/rbtree_mhs.h"
// Block allocator.
//
@@ -51,151 +52,128 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// The allocation of block numbers is handled elsewhere.
//
// When creating a block allocator we also specify a certain-sized
-// block at the beginning that is preallocated (and cannot be allocated or freed)
+// block at the beginning that is preallocated (and cannot be allocated or
+// freed)
//
// We can allocate blocks of a particular size at a particular location.
-// We can allocate blocks of a particular size at a location chosen by the allocator.
// We can free blocks.
// We can determine the size of a block.
-
-class block_allocator {
-public:
+#define MAX_BYTE 0xffffffffffffffff
+class BlockAllocator {
+ public:
static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096;
// How much must be reserved at the beginning for the block?
- // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
+ // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1
+ // pointer for each root.
// So 4096 should be enough.
static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096;
-
- static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0,
+
+ static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT ==
+ 0,
"block allocator header must have proper alignment");
- static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
+ static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE =
+ BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
- enum allocation_strategy {
- BA_STRATEGY_FIRST_FIT = 1,
- BA_STRATEGY_BEST_FIT,
- BA_STRATEGY_PADDED_FIT,
- BA_STRATEGY_HEAT_ZONE
- };
-
- struct blockpair {
- uint64_t offset;
- uint64_t size;
- blockpair(uint64_t o, uint64_t s) :
- offset(o), size(s) {
- }
- int operator<(const struct blockpair &rhs) const {
- return offset < rhs.offset;
- }
- int operator<(const uint64_t &o) const {
- return offset < o;
+ struct BlockPair {
+ uint64_t _offset;
+ uint64_t _size;
+ BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
+ int operator<(const struct BlockPair &rhs) const {
+ return _offset < rhs._offset;
}
+ int operator<(const uint64_t &o) const { return _offset < o; }
};
- // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
- // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
+ // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
+ // bytes are not put into a block.
+ // The default allocation strategy is first fit
+ // (BA_STRATEGY_FIRST_FIT)
// All blocks be start on a multiple of ALIGNMENT.
// Aborts if we run out of memory.
// Parameters
- // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
+ // reserve_at_beginning (IN) Size of reserved block at beginning.
+ // This size does not have to be aligned.
// alignment (IN) Block alignment.
- void create(uint64_t reserve_at_beginning, uint64_t alignment);
+ void Create(uint64_t reserve_at_beginning, uint64_t alignment);
- // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
- // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
- // The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs'
+ // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
+ // bytes are not put into a block.
+ // The allocator is initialized to contain `n_blocks' of BlockPairs,
+ // taken from `pairs'
// All blocks be start on a multiple of ALIGNMENT.
// Aborts if we run out of memory.
// Parameters
// pairs, unowned array of pairs to copy
// n_blocks, Size of pairs array
- // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
+ // reserve_at_beginning (IN) Size of reserved block at beginning.
+ // This size does not have to be aligned.
// alignment (IN) Block alignment.
- void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
- struct blockpair *pairs, uint64_t n_blocks);
+ void CreateFromBlockPairs(uint64_t reserve_at_beginning,
+ uint64_t alignment,
+ struct BlockPair *pairs,
+ uint64_t n_blocks);
// Effect: Destroy this block allocator
- void destroy();
-
- // Effect: Set the allocation strategy that the allocator should use
- // Requires: No other threads are operating on this block allocator
- void set_strategy(enum allocation_strategy strategy);
+ void Destroy();
- // Effect: Allocate a block of the specified size at an address chosen by the allocator.
+ // Effect: Allocate a block of the specified size at an address chosen by
+ // the allocator.
// Aborts if anything goes wrong.
// The block address will be a multiple of the alignment.
// Parameters:
- // size (IN): The size of the block. (The size does not have to be aligned.)
+ // size (IN): The size of the block. (The size does not have to be
+ // aligned.)
// offset (OUT): The location of the block.
- // heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint)
- // Heat values are lexiographically ordered (like integers), but their specific values are arbitrary
- void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset);
+ // block soon (perhaps in the next checkpoint)
+ // Heat values are lexiographically ordered (like integers),
+ // but their specific values are arbitrary
+ void AllocBlock(uint64_t size, uint64_t *offset);
// Effect: Free the block at offset.
// Requires: There must be a block currently allocated at that offset.
// Parameters:
// offset (IN): The offset of the block.
- void free_block(uint64_t offset);
+ void FreeBlock(uint64_t offset, uint64_t size);
- // Effect: Return the size of the block that starts at offset.
- // Requires: There must be a block currently allocated at that offset.
- // Parameters:
- // offset (IN): The offset of the block.
- uint64_t block_size(uint64_t offset);
-
- // Effect: Check to see if the block allocator is OK. This may take a long time.
+ // Effect: Check to see if the block allocator is OK. This may take a long
+ // time.
// Usage Hints: Probably only use this for unit tests.
// TODO: Private?
- void validate() const;
+ void Validate() const;
// Effect: Return the unallocated block address of "infinite" size.
- // That is, return the smallest address that is above all the allocated blocks.
- uint64_t allocated_limit() const;
+ // That is, return the smallest address that is above all the allocated
+ // blocks.
+ uint64_t AllocatedLimit() const;
- // Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
+ // Effect: Consider the blocks in sorted order. The reserved block at the
+ // beginning is number 0. The next one is number 1 and so forth.
// Return the offset and size of the block with that number.
// Return 0 if there is a block that big, return nonzero if b is too big.
// Rationale: This is probably useful only for tests.
- int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size);
+ int NthBlockInLayoutOrder(uint64_t b, uint64_t *offset, uint64_t *size);
// Effect: Fill in report to indicate how the file is used.
- // Requires:
+ // Requires:
// report->file_size_bytes is filled in
// report->data_bytes is filled in
// report->checkpoint_bytes_additional is filled in
- void get_unused_statistics(TOKU_DB_FRAGMENTATION report);
+ void UnusedStatistics(TOKU_DB_FRAGMENTATION report);
// Effect: Fill in report->data_bytes with the number of bytes in use
- // Fill in report->data_blocks with the number of blockpairs in use
+ // Fill in report->data_blocks with the number of BlockPairs in use
// Fill in unused statistics using this->get_unused_statistics()
// Requires:
// report->file_size is ignored on return
// report->checkpoint_bytes_additional is ignored on return
- void get_statistics(TOKU_DB_FRAGMENTATION report);
-
- // Block allocator tracing.
- // - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file
- // should be written to.
- // - Trace may be replayed by ba_trace_replay tool in tools/ directory
- // eg: "cat mytracefile | ba_trace_replay"
- static void maybe_initialize_trace();
- static void maybe_close_trace();
-
-private:
- void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment);
- void grow_blocks_array_by(uint64_t n_to_add);
- void grow_blocks_array();
- int64_t find_block(uint64_t offset);
- struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
-
- // Tracing
- toku_mutex_t _trace_lock;
- void _trace_create(void);
- void _trace_create_from_blockpairs(void);
- void _trace_destroy(void);
- void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset);
- void _trace_free(uint64_t offset);
+ void Statistics(TOKU_DB_FRAGMENTATION report);
+
+ virtual ~BlockAllocator(){};
+
+ private:
+ void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment);
// How much to reserve at the beginning
uint64_t _reserve_at_beginning;
@@ -203,12 +181,8 @@ private:
uint64_t _alignment;
// How many blocks
uint64_t _n_blocks;
- // How big is the blocks_array. Must be >= n_blocks.
- uint64_t _blocks_array_size;
- // These blocks are sorted by address.
- struct blockpair *_blocks_array;
- // Including the reserve_at_beginning
uint64_t _n_bytes_in_use;
- // The allocation strategy are we using
- enum allocation_strategy _strategy;
+
+ // These blocks are sorted by address.
+ MhsRbTree::Tree *_tree;
};
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
deleted file mode 100644
index 62bb8fc4a87..00000000000
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License, version 2,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License, version 3,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-#include <algorithm>
-
-#include <string.h>
-
-#include "portability/toku_assert.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
- return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
-}
-
-static uint64_t _roundup_to_power_of_two(uint64_t value) {
- uint64_t r = 4096;
- while (r < value) {
- r *= 2;
- invariant(r > 0);
- }
- return r;
-}
-
-// First fit block allocation
-static struct block_allocator::blockpair *
-_first_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment,
- uint64_t max_padding) {
- if (n_blocks == 1) {
- // won't enter loop, can't underflow the direction < 0 case
- return nullptr;
- }
-
- struct block_allocator::blockpair *bp = &blocks_array[0];
- for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
- n_spaces_to_check--, bp++) {
- // Consider the space after bp
- uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
- uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
- if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1]
- invariant(bp - blocks_array < (int64_t) n_blocks);
- return bp;
- }
- }
- return nullptr;
-}
-
-static struct block_allocator::blockpair *
-_first_fit_bw(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment,
- uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) {
- if (n_blocks == 1) {
- // won't enter loop, can't underflow the direction < 0 case
- return nullptr;
- }
-
- struct block_allocator::blockpair *bp = &blocks_array[-1];
- for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
- n_spaces_to_check--, bp--) {
- // Consider the space after bp
- uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
- uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
- if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) {
- invariant(blocks_array - bp < (int64_t) n_blocks);
- return bp;
- }
- }
- return nullptr;
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment) {
- return _first_fit(blocks_array, n_blocks, size, alignment, 0);
-}
-
-// Best fit block allocation
-struct block_allocator::blockpair *
-block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment) {
- struct block_allocator::blockpair *best_bp = nullptr;
- uint64_t best_hole_size = 0;
- for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
- // Consider the space after blocknum
- struct block_allocator::blockpair *bp = &blocks_array[blocknum];
- uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
- uint64_t possible_end_offset = possible_offset + size;
- if (possible_end_offset <= bp[1].offset) {
- // It fits here. Is it the best fit?
- uint64_t hole_size = bp[1].offset - possible_end_offset;
- if (best_bp == nullptr || hole_size < best_hole_size) {
- best_hole_size = hole_size;
- best_bp = bp;
- }
- }
- }
- return best_bp;
-}
-
-static uint64_t padded_fit_alignment = 4096;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-// portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_padded_fit_alignment_from_env(void) {
- // TODO: Should be in portability as 'toku_os_getenv()?'
- const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
- if (s != nullptr && strlen(s) > 0) {
- const int64_t alignment = strtoll(s, nullptr, 10);
- if (alignment <= 0) {
- fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
- "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n",
- s, padded_fit_alignment);
- } else {
- padded_fit_alignment = _roundup_to_power_of_two(alignment);
- fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n",
- padded_fit_alignment);
- }
- }
-}
-
-// First fit into a block that is oversized by up to max_padding.
-// The hope is that if we purposefully waste a bit of space at allocation
-// time we'll be more likely to reuse this block later.
-struct block_allocator::blockpair *
-block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment) {
- return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment);
-}
-
-static double hot_zone_threshold = 0.85;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-// portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_hot_zone_threshold_from_env(void) {
- // TODO: Should be in portability as 'toku_os_getenv()?'
- const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
- if (s != nullptr && strlen(s) > 0) {
- const double hot_zone = strtod(s, nullptr);
- if (hot_zone < 1 || hot_zone > 99) {
- fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "
- "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s);
- hot_zone_threshold = 85 / 100;
- } else {
- fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s);
- hot_zone_threshold = hot_zone / 100;
- }
- }
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment,
- uint64_t heat) {
- if (heat > 0) {
- struct block_allocator::blockpair *bp, *boundary_bp;
-
- // Hot allocation. Find the beginning of the hot zone.
- boundary_bp = &blocks_array[n_blocks - 1];
- uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment);
- uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
-
- boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
- uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp;
- uint64_t blocks_outside_zone = boundary_bp - blocks_array;
- invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
-
- if (blocks_in_zone > 0) {
- // Find the first fit in the hot zone, going forward.
- bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0);
- if (bp != nullptr) {
- return bp;
- }
- }
- if (blocks_outside_zone > 0) {
- // Find the first fit in the cold zone, going backwards.
- bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]);
- if (bp != nullptr) {
- return bp;
- }
- }
- } else {
- // Cold allocations are simply first-fit from the beginning.
- return _first_fit(blocks_array, n_blocks, size, alignment, 0);
- }
- return nullptr;
-}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
index 7101ba9f58c..d2532134d96 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
@@ -46,31 +46,27 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "ft/ft-internal.h"
// TODO: reorganize this dependency (FT-303)
-#include "ft/ft-ops.h" // for toku_maybe_truncate_file
+#include "ft/ft-ops.h" // for toku_maybe_truncate_file
#include "ft/serialize/block_table.h"
#include "ft/serialize/rbuf.h"
#include "ft/serialize/wbuf.h"
#include "ft/serialize/block_allocator.h"
-
#include "util/nb_mutex.h"
#include "util/scoped_malloc.h"
// indicates the end of a freelist
-static const BLOCKNUM freelist_null = { -1 };
+static const BLOCKNUM freelist_null = {-1};
// value of block_translation_pair.size if blocknum is unused
-static const DISKOFF size_is_free = (DISKOFF) -1;
+static const DISKOFF size_is_free = (DISKOFF)-1;
-// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock
-static const DISKOFF diskoff_unused = (DISKOFF) -2;
+// value of block_translation_pair.u.diskoff if blocknum is used but does not
+// yet have a diskblock
+static const DISKOFF diskoff_unused = (DISKOFF)-2;
-void block_table::_mutex_lock() {
- toku_mutex_lock(&_mutex);
-}
+void block_table::_mutex_lock() { toku_mutex_lock(&_mutex); }
-void block_table::_mutex_unlock() {
- toku_mutex_unlock(&_mutex);
-}
+void block_table::_mutex_unlock() { toku_mutex_unlock(&_mutex); }
// TODO: Move lock to FT
void toku_ft_lock(FT ft) {
@@ -85,13 +81,16 @@ void toku_ft_unlock(FT ft) {
bt->_mutex_unlock();
}
-// There are two headers: the reserve must fit them both and be suitably aligned.
-static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
- block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0,
+// There are two headers: the reserve must fit them both and be suitably
+// aligned.
+static_assert(BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
+ BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT ==
+ 0,
"Block allocator's header reserve must be suitibly aligned");
-static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
- block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
- "Block allocator's total header reserve must exactly fit two headers");
+static_assert(
+ BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+ "Block allocator's total header reserve must exactly fit two headers");
// does NOT initialize the block allocator: the caller is responsible
void block_table::_create_internal() {
@@ -99,25 +98,30 @@ void block_table::_create_internal() {
memset(&_inprogress, 0, sizeof(struct translation));
memset(&_checkpointed, 0, sizeof(struct translation));
memset(&_mutex, 0, sizeof(_mutex));
+ _bt_block_allocator = new BlockAllocator();
toku_mutex_init(&_mutex, nullptr);
nb_mutex_init(&_safe_file_size_lock);
}
-// Fill in the checkpointed translation from buffer, and copy checkpointed to current.
-// The one read from disk is the last known checkpointed one, so we are keeping it in
-// place and then setting current (which is never stored on disk) for current use.
-// The translation_buffer has translation only, we create the rest of the block_table.
-int block_table::create_from_buffer(int fd,
- DISKOFF location_on_disk, //Location of translation_buffer
- DISKOFF size_on_disk,
- unsigned char *translation_buffer) {
+// Fill in the checkpointed translation from buffer, and copy checkpointed to
+// current.
+// The one read from disk is the last known checkpointed one, so we are keeping
+// it in
+// place and then setting current (which is never stored on disk) for current
+// use.
+// The translation_buffer has translation only, we create the rest of the
+// block_table.
+int block_table::create_from_buffer(
+ int fd,
+ DISKOFF location_on_disk, // Location of translation_buffer
+ DISKOFF size_on_disk,
+ unsigned char *translation_buffer) {
// Does not initialize the block allocator
_create_internal();
// Deserialize the translation and copy it to current
- int r = _translation_deserialize_from_buffer(&_checkpointed,
- location_on_disk, size_on_disk,
- translation_buffer);
+ int r = _translation_deserialize_from_buffer(
+ &_checkpointed, location_on_disk, size_on_disk, translation_buffer);
if (r != 0) {
return r;
}
@@ -130,22 +134,26 @@ int block_table::create_from_buffer(int fd,
invariant(file_size >= 0);
_safe_file_size = file_size;
- // Gather the non-empty translations and use them to create the block allocator
+ // Gather the non-empty translations and use them to create the block
+ // allocator
toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b *
- sizeof(struct block_allocator::blockpair));
- struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
+ sizeof(struct BlockAllocator::BlockPair));
+ struct BlockAllocator::BlockPair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
uint64_t n_pairs = 0;
for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) {
struct block_translation_pair pair = _checkpointed.block_translation[i];
if (pair.size > 0) {
invariant(pair.u.diskoff != diskoff_unused);
- pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size);
+ pairs[n_pairs++] =
+ BlockAllocator::BlockPair(pair.u.diskoff, pair.size);
}
}
- _bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
- block_allocator::BLOCK_ALLOCATOR_ALIGNMENT,
- pairs, n_pairs);
+ _bt_block_allocator->CreateFromBlockPairs(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+ BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT,
+ pairs,
+ n_pairs);
return 0;
}
@@ -155,8 +163,10 @@ void block_table::create() {
_create_internal();
_checkpointed.type = TRANSLATION_CHECKPOINTED;
- _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS);
- _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b;
+ _checkpointed.smallest_never_used_blocknum =
+ make_blocknum(RESERVED_BLOCKNUMS);
+ _checkpointed.length_of_array =
+ _checkpointed.smallest_never_used_blocknum.b;
_checkpointed.blocknum_freelist_head = freelist_null;
XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
@@ -164,12 +174,13 @@ void block_table::create() {
_checkpointed.block_translation[i].u.diskoff = diskoff_unused;
}
- // we just created a default checkpointed, now copy it to current.
+ // we just created a default checkpointed, now copy it to current.
_copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
// Create an empty block allocator.
- _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
- block_allocator::BLOCK_ALLOCATOR_ALIGNMENT);
+ _bt_block_allocator->Create(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+ BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT);
}
// TODO: Refactor with FT-303
@@ -185,20 +196,24 @@ static void ft_set_dirty(FT ft, bool for_checkpoint) {
void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) {
toku_mutex_assert_locked(&_mutex);
- uint64_t new_size_needed = _bt_block_allocator.allocated_limit();
- //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful.
- if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) {
+ uint64_t new_size_needed = _bt_block_allocator->AllocatedLimit();
+ // Save a call to toku_os_get_file_size (kernel call) if unlikely to be
+ // useful.
+ if (new_size_needed < size_needed_before &&
+ new_size_needed < _safe_file_size) {
nb_mutex_lock(&_safe_file_size_lock, &_mutex);
// Must hold _safe_file_size_lock to change _safe_file_size.
if (new_size_needed < _safe_file_size) {
int64_t safe_file_size_before = _safe_file_size;
- // Not safe to use the 'to-be-truncated' portion until truncate is done.
+ // Not safe to use the 'to-be-truncated' portion until truncate is
+ // done.
_safe_file_size = new_size_needed;
_mutex_unlock();
uint64_t size_after;
- toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after);
+ toku_maybe_truncate_file(
+ fd, new_size_needed, safe_file_size_before, &size_after);
_mutex_lock();
_safe_file_size = size_after;
@@ -213,26 +228,35 @@ void block_table::maybe_truncate_file_on_open(int fd) {
_mutex_unlock();
}
-void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) {
- // We intend to malloc a fresh block, so the incoming translation should be empty
+void block_table::_copy_translation(struct translation *dst,
+ struct translation *src,
+ enum translation_type newtype) {
+ // We intend to malloc a fresh block, so the incoming translation should be
+ // empty
invariant_null(dst->block_translation);
invariant(src->length_of_array >= src->smallest_never_used_blocknum.b);
invariant(newtype == TRANSLATION_DEBUG ||
- (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) ||
- (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT));
+ (src->type == TRANSLATION_CURRENT &&
+ newtype == TRANSLATION_INPROGRESS) ||
+ (src->type == TRANSLATION_CHECKPOINTED &&
+ newtype == TRANSLATION_CURRENT));
dst->type = newtype;
dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum;
- dst->blocknum_freelist_head = src->blocknum_freelist_head;
+ dst->blocknum_freelist_head = src->blocknum_freelist_head;
- // destination btt is of fixed size. Allocate + memcpy the exact length necessary.
+ // destination btt is of fixed size. Allocate + memcpy the exact length
+ // necessary.
dst->length_of_array = dst->smallest_never_used_blocknum.b;
XMALLOC_N(dst->length_of_array, dst->block_translation);
- memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation));
+ memcpy(dst->block_translation,
+ src->block_translation,
+ dst->length_of_array * sizeof(*dst->block_translation));
// New version of btt is not yet stored on disk.
dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0;
- dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused;
+ dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff =
+ diskoff_unused;
}
int64_t block_table::get_blocks_in_use_unlocked() {
@@ -240,8 +264,9 @@ int64_t block_table::get_blocks_in_use_unlocked() {
struct translation *t = &_current;
int64_t num_blocks = 0;
{
- //Reserved blocknums do not get upgraded; They are part of the header.
- for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
+ // Reserved blocknums do not get upgraded; They are part of the header.
+ for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
+ b.b++) {
if (t->block_translation[b.b].size != size_is_free) {
num_blocks++;
}
@@ -251,38 +276,43 @@ int64_t block_table::get_blocks_in_use_unlocked() {
}
void block_table::_maybe_optimize_translation(struct translation *t) {
- //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just
- //on a free list. Doing so requires us to regenerate the free list.
- //This is O(n) work, so do it only if you're already doing that.
+ // Reduce 'smallest_never_used_blocknum.b' (completely free blocknums
+ // instead of just
+ // on a free list. Doing so requires us to regenerate the free list.
+ // This is O(n) work, so do it only if you're already doing that.
BLOCKNUM b;
paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
- //Calculate how large the free suffix is.
+ // Calculate how large the free suffix is.
int64_t freed;
{
- for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) {
- if (t->block_translation[b.b-1].size != size_is_free) {
+ for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS;
+ b.b--) {
+ if (t->block_translation[b.b - 1].size != size_is_free) {
break;
}
}
freed = t->smallest_never_used_blocknum.b - b.b;
}
- if (freed>0) {
+ if (freed > 0) {
t->smallest_never_used_blocknum.b = b.b;
- if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) {
- //We're using more memory than necessary to represent this now. Reduce.
+ if (t->length_of_array / 4 > t->smallest_never_used_blocknum.b) {
+ // We're using more memory than necessary to represent this now.
+ // Reduce.
uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
XREALLOC_N(new_length, t->block_translation);
t->length_of_array = new_length;
- //No need to zero anything out.
+ // No need to zero anything out.
}
- //Regenerate free list.
+ // Regenerate free list.
t->blocknum_freelist_head.b = freelist_null.b;
- for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
+ for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
+ b.b++) {
if (t->block_translation[b.b].size == size_is_free) {
- t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
- t->blocknum_freelist_head = b;
+ t->block_translation[b.b].u.next_free_blocknum =
+ t->blocknum_freelist_head;
+ t->blocknum_freelist_head = b;
}
}
}
@@ -303,14 +333,16 @@ void block_table::note_start_checkpoint_unlocked() {
}
void block_table::note_skipped_checkpoint() {
- //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header
+ // Purpose, alert block translation that the checkpoint was skipped, e.x.
+ // for a non-dirty header
_mutex_lock();
paranoid_invariant_notnull(_inprogress.block_translation);
_checkpoint_skipped = true;
_mutex_unlock();
}
-// Purpose: free any disk space used by previous checkpoint that isn't in use by either
+// Purpose: free any disk space used by previous checkpoint that isn't in use by
+// either
// - current state
// - in-progress checkpoint
// capture inprogress as new checkpointed.
@@ -323,7 +355,7 @@ void block_table::note_skipped_checkpoint() {
void block_table::note_end_checkpoint(int fd) {
// Free unused blocks
_mutex_lock();
- uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit();
+ uint64_t allocated_limit_at_start = _bt_block_allocator->AllocatedLimit();
paranoid_invariant_notnull(_inprogress.block_translation);
if (_checkpoint_skipped) {
toku_free(_inprogress.block_translation);
@@ -331,17 +363,23 @@ void block_table::note_end_checkpoint(int fd) {
goto end;
}
- //Make certain inprogress was allocated space on disk
- assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
- assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0);
+ // Make certain inprogress was allocated space on disk
+ invariant(
+ _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
+ invariant(
+ _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff >
+ 0);
{
struct translation *t = &_checkpointed;
for (int64_t i = 0; i < t->length_of_array; i++) {
struct block_translation_pair *pair = &t->block_translation[i];
- if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) {
- assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair));
- _bt_block_allocator.free_block(pair->u.diskoff);
+ if (pair->size > 0 &&
+ !_translation_prevents_freeing(
+ &_inprogress, make_blocknum(i), pair)) {
+ invariant(!_translation_prevents_freeing(
+ &_current, make_blocknum(i), pair));
+ _bt_block_allocator->FreeBlock(pair->u.diskoff, pair->size);
}
}
toku_free(_checkpointed.block_translation);
@@ -359,53 +397,65 @@ bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) {
return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b;
}
-void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+void block_table::_verify_valid_blocknum(struct translation *UU(t),
+ BLOCKNUM UU(b)) {
invariant(_is_valid_blocknum(t, b));
}
-bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) {
+bool block_table::_is_valid_freeable_blocknum(struct translation *t,
+ BLOCKNUM b) {
invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b;
}
// should be freeable
-void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t),
+ BLOCKNUM UU(b)) {
invariant(_is_valid_freeable_blocknum(t, b));
}
// Also used only in ft-serialize-test.
-void block_table::block_free(uint64_t offset) {
+void block_table::block_free(uint64_t offset, uint64_t size) {
_mutex_lock();
- _bt_block_allocator.free_block(offset);
+ _bt_block_allocator->FreeBlock(offset, size);
_mutex_unlock();
}
int64_t block_table::_calculate_size_on_disk(struct translation *t) {
- return 8 + // smallest_never_used_blocknum
- 8 + // blocknum_freelist_head
- t->smallest_never_used_blocknum.b * 16 + // Array
- 4; // 4 for checksum
+ return 8 + // smallest_never_used_blocknum
+ 8 + // blocknum_freelist_head
+ t->smallest_never_used_blocknum.b * 16 + // Array
+ 4; // 4 for checksum
}
-// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table.
-bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) {
- return t->block_translation &&
- b.b < t->smallest_never_used_blocknum.b &&
+// We cannot free the disk space allocated to this blocknum if it is still in
+// use by the given translation table.
+bool block_table::_translation_prevents_freeing(
+ struct translation *t,
+ BLOCKNUM b,
+ struct block_translation_pair *old_pair) {
+ return t->block_translation && b.b < t->smallest_never_used_blocknum.b &&
old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
}
-void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) {
+void block_table::_realloc_on_disk_internal(BLOCKNUM b,
+ DISKOFF size,
+ DISKOFF *offset,
+ FT ft,
+ bool for_checkpoint) {
toku_mutex_assert_locked(&_mutex);
ft_set_dirty(ft, for_checkpoint);
struct translation *t = &_current;
struct block_translation_pair old_pair = t->block_translation[b.b];
- //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
- bool cannot_free = (bool)
- ((!for_checkpoint && _translation_prevents_freeing(&_inprogress, b, &old_pair)) ||
- _translation_prevents_freeing(&_checkpointed, b, &old_pair));
- if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) {
- _bt_block_allocator.free_block(old_pair.u.diskoff);
+ // Free the old block if it is not still in use by the checkpoint in
+ // progress or the previous checkpoint
+ bool cannot_free =
+ (!for_checkpoint &&
+ _translation_prevents_freeing(&_inprogress, b, &old_pair)) ||
+ _translation_prevents_freeing(&_checkpointed, b, &old_pair);
+ if (!cannot_free && old_pair.u.diskoff != diskoff_unused) {
+ _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
}
uint64_t allocator_offset = diskoff_unused;
@@ -413,19 +463,22 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o
if (size > 0) {
// Allocate a new block if the size is greater than 0,
// if the size is just 0, offset will be set to diskoff_unused
- _bt_block_allocator.alloc_block(size, heat, &allocator_offset);
+ _bt_block_allocator->AllocBlock(size, &allocator_offset);
}
t->block_translation[b.b].u.diskoff = allocator_offset;
*offset = allocator_offset;
- //Update inprogress btt if appropriate (if called because Pending bit is set).
+ // Update inprogress btt if appropriate (if called because Pending bit is
+ // set).
if (for_checkpoint) {
paranoid_invariant(b.b < _inprogress.length_of_array);
_inprogress.block_translation[b.b] = t->block_translation[b.b];
}
}
-void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) {
+void block_table::_ensure_safe_write_unlocked(int fd,
+ DISKOFF block_size,
+ DISKOFF block_offset) {
// Requires: holding _mutex
uint64_t size_needed = block_size + block_offset;
if (size_needed > _safe_file_size) {
@@ -435,7 +488,8 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
_mutex_unlock();
int64_t size_after;
- toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after);
+ toku_maybe_preallocate_in_file(
+ fd, size_needed, _safe_file_size, &size_after);
_mutex_lock();
_safe_file_size = size_after;
@@ -444,11 +498,16 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
}
}
-void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) {
+void block_table::realloc_on_disk(BLOCKNUM b,
+ DISKOFF size,
+ DISKOFF *offset,
+ FT ft,
+ int fd,
+ bool for_checkpoint) {
_mutex_lock();
struct translation *t = &_current;
_verify_valid_freeable_blocknum(t, b);
- _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat);
+ _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint);
_ensure_safe_write_unlocked(fd, size, *offset);
_mutex_unlock();
@@ -458,70 +517,97 @@ bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) {
return pair->size == 0 && pair->u.diskoff == diskoff_unused;
}
-// Effect: figure out where to put the inprogress btt on disk, allocate space for it there.
-// The space must be 512-byte aligned (both the starting address and the size).
-// As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt.
+// Effect: figure out where to put the inprogress btt on disk, allocate space
+// for it there.
+// The space must be 512-byte aligned (both the starting address and the
+// size).
+// As a result, the allcoated space may be a little bit bigger (up to the next
+// 512-byte boundary) than the actual btt.
void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
toku_mutex_assert_locked(&_mutex);
struct translation *t = &_inprogress;
paranoid_invariant_notnull(t->block_translation);
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
- //Each inprogress is allocated only once
+ // Each inprogress is allocated only once
paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b]));
- //Allocate a new block
+ // Allocate a new block
int64_t size = _calculate_size_on_disk(t);
uint64_t offset;
- _bt_block_allocator.alloc_block(size, 0, &offset);
+ _bt_block_allocator->AllocBlock(size, &offset);
t->block_translation[b.b].u.diskoff = offset;
- t->block_translation[b.b].size = size;
+ t->block_translation[b.b].size = size;
}
// Effect: Serializes the blocktable to a wbuf (which starts uninitialized)
-// A clean shutdown runs checkpoint start so that current and inprogress are copies.
-// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd)
-// The address is guaranteed to be 512-byte aligned, but the size is not guaranteed.
-// It *is* guaranteed that we can read up to the next 512-byte boundary, however
-void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w,
- int64_t *address, int64_t *size) {
+// A clean shutdown runs checkpoint start so that current and inprogress are
+// copies.
+// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the
+// total length is a multiple of 512 (so we pad with zeros at the end if
+// needd)
+// The address is guaranteed to be 512-byte aligned, but the size is not
+// guaranteed.
+// It *is* guaranteed that we can read up to the next 512-byte boundary,
+// however
+void block_table::serialize_translation_to_wbuf(int fd,
+ struct wbuf *w,
+ int64_t *address,
+ int64_t *size) {
_mutex_lock();
struct translation *t = &_inprogress;
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
- _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy.
+ _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block
+ // must be 512-byte
+ // aligned to make
+ // O_DIRECT happy.
uint64_t size_translation = _calculate_size_on_disk(t);
- uint64_t size_aligned = roundup_to_multiple(512, size_translation);
- assert((int64_t)size_translation==t->block_translation[b.b].size);
+ uint64_t size_aligned = roundup_to_multiple(512, size_translation);
+ invariant((int64_t)size_translation == t->block_translation[b.b].size);
{
- //Init wbuf
+ // Init wbuf
if (0)
- printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff);
+ printf(
+ "%s:%d writing translation table of size_translation %" PRIu64
+ " at %" PRId64 "\n",
+ __FILE__,
+ __LINE__,
+ size_translation,
+ t->block_translation[b.b].u.diskoff);
char *XMALLOC_N_ALIGNED(512, size_aligned, buf);
- for (uint64_t i=size_translation; i<size_aligned; i++) buf[i]=0; // fill in the end of the buffer with zeros.
+ for (uint64_t i = size_translation; i < size_aligned; i++)
+ buf[i] = 0; // fill in the end of the buffer with zeros.
wbuf_init(w, buf, size_aligned);
}
- wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum);
- wbuf_BLOCKNUM(w, t->blocknum_freelist_head);
+ wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum);
+ wbuf_BLOCKNUM(w, t->blocknum_freelist_head);
int64_t i;
- for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
+ for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
if (0)
- printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+ printf("%s:%d %" PRId64 ",%" PRId64 "\n",
+ __FILE__,
+ __LINE__,
+ t->block_translation[i].u.diskoff,
+ t->block_translation[i].size);
wbuf_DISKOFF(w, t->block_translation[i].u.diskoff);
wbuf_DISKOFF(w, t->block_translation[i].size);
}
uint32_t checksum = toku_x1764_finish(&w->checksum);
wbuf_int(w, checksum);
*address = t->block_translation[b.b].u.diskoff;
- *size = size_translation;
- assert((*address)%512 == 0);
+ *size = size_translation;
+ invariant((*address) % 512 == 0);
_ensure_safe_write_unlocked(fd, size_aligned, *address);
_mutex_unlock();
}
-// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+// Perhaps rename: purpose is get disk address of a block, given its blocknum
+// (blockid?)
+void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
+ DISKOFF *offset,
+ DISKOFF *size) {
struct translation *t = &_current;
_verify_valid_blocknum(t, b);
if (offset) {
@@ -532,8 +618,11 @@ void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOF
}
}
-// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+// Perhaps rename: purpose is get disk address of a block, given its blocknum
+// (blockid?)
+void block_table::translate_blocknum_to_offset_size(BLOCKNUM b,
+ DISKOFF *offset,
+ DISKOFF *size) {
_mutex_lock();
_translate_blocknum_to_offset_size_unlocked(b, offset, size);
_mutex_unlock();
@@ -544,13 +633,13 @@ void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset,
// given that one more never-used blocknum will soon be used.
void block_table::_maybe_expand_translation(struct translation *t) {
if (t->length_of_array <= t->smallest_never_used_blocknum.b) {
- //expansion is necessary
+ // expansion is necessary
uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
XREALLOC_N(new_length, t->block_translation);
uint64_t i;
for (i = t->length_of_array; i < new_length; i++) {
t->block_translation[i].u.next_free_blocknum = freelist_null;
- t->block_translation[i].size = size_is_free;
+ t->block_translation[i].size = size_is_free;
}
t->length_of_array = new_length;
}
@@ -563,7 +652,8 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
if (t->blocknum_freelist_head.b == freelist_null.b) {
// no previously used blocknums are available
// use a never used blocknum
- _maybe_expand_translation(t); //Ensure a never used blocknums is available
+ _maybe_expand_translation(
+ t); // Ensure a never used blocknums is available
result = t->smallest_never_used_blocknum;
t->smallest_never_used_blocknum.b++;
} else { // reuse a previously used blocknum
@@ -571,11 +661,11 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum;
t->blocknum_freelist_head = next;
}
- //Verify the blocknum is free
+ // Verify the blocknum is free
paranoid_invariant(t->block_translation[result.b].size == size_is_free);
- //blocknum is not free anymore
+ // blocknum is not free anymore
t->block_translation[result.b].u.diskoff = diskoff_unused;
- t->block_translation[result.b].size = 0;
+ t->block_translation[result.b].size = 0;
_verify_valid_freeable_blocknum(t, result);
*res = result;
ft_set_dirty(ft, false);
@@ -587,42 +677,46 @@ void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) {
_mutex_unlock();
}
-void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) {
+void block_table::_free_blocknum_in_translation(struct translation *t,
+ BLOCKNUM b) {
_verify_valid_freeable_blocknum(t, b);
paranoid_invariant(t->block_translation[b.b].size != size_is_free);
- t->block_translation[b.b].size = size_is_free;
+ t->block_translation[b.b].size = size_is_free;
t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
- t->blocknum_freelist_head = b;
+ t->blocknum_freelist_head = b;
}
// Effect: Free a blocknum.
// If the blocknum holds the only reference to a block on disk, free that block
-void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) {
+void block_table::_free_blocknum_unlocked(BLOCKNUM *bp,
+ FT ft,
+ bool for_checkpoint) {
toku_mutex_assert_locked(&_mutex);
BLOCKNUM b = *bp;
- bp->b = 0; //Remove caller's reference.
+ bp->b = 0; // Remove caller's reference.
struct block_translation_pair old_pair = _current.block_translation[b.b];
_free_blocknum_in_translation(&_current, b);
if (for_checkpoint) {
- paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
+ paranoid_invariant(ft->checkpoint_header->type ==
+ FT_CHECKPOINT_INPROGRESS);
_free_blocknum_in_translation(&_inprogress, b);
}
- //If the size is 0, no disk block has ever been assigned to this blocknum.
+ // If the size is 0, no disk block has ever been assigned to this blocknum.
if (old_pair.size > 0) {
- //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
- bool cannot_free = (bool)
- (_translation_prevents_freeing(&_inprogress, b, &old_pair) ||
- _translation_prevents_freeing(&_checkpointed, b, &old_pair));
+ // Free the old block if it is not still in use by the checkpoint in
+ // progress or the previous checkpoint
+ bool cannot_free =
+ _translation_prevents_freeing(&_inprogress, b, &old_pair) ||
+ _translation_prevents_freeing(&_checkpointed, b, &old_pair);
if (!cannot_free) {
- _bt_block_allocator.free_block(old_pair.u.diskoff);
+ _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
}
- }
- else {
- paranoid_invariant(old_pair.size==0);
+ } else {
+ paranoid_invariant(old_pair.size == 0);
paranoid_invariant(old_pair.u.diskoff == diskoff_unused);
}
ft_set_dirty(ft, for_checkpoint);
@@ -644,13 +738,14 @@ void block_table::verify_no_free_blocknums() {
void block_table::free_unused_blocknums(BLOCKNUM root) {
_mutex_lock();
int64_t smallest = _current.smallest_never_used_blocknum.b;
- for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) {
+ for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) {
if (i == root.b) {
continue;
}
BLOCKNUM b = make_blocknum(i);
if (_current.block_translation[b.b].size == 0) {
- invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused);
+ invariant(_current.block_translation[b.b].u.diskoff ==
+ diskoff_unused);
_free_blocknum_in_translation(&_current, b);
}
}
@@ -675,13 +770,14 @@ bool block_table::_no_data_blocks_except_root(BLOCKNUM root) {
goto cleanup;
}
}
- cleanup:
+cleanup:
_mutex_unlock();
return ok;
}
// Verify there are no data blocks except root.
-// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now.
+// TODO(leif): This actually takes a lock, but I don't want to fix all the
+// callers right now.
void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) {
paranoid_invariant(_no_data_blocks_except_root(root));
}
@@ -705,13 +801,24 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
if (t->block_translation) {
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array);
- fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b);
- fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b);
- fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
- fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff);
+ fprintf(f,
+ " smallest_never_used_blocknum[%" PRId64 "]",
+ t->smallest_never_used_blocknum.b);
+ fprintf(f,
+ " blocknum_free_list_head[%" PRId64 "]",
+ t->blocknum_freelist_head.b);
+ fprintf(
+ f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
+ fprintf(f,
+ " location_on_disk[%" PRId64 "]\n",
+ t->block_translation[b.b].u.diskoff);
int64_t i;
- for (i=0; i<t->length_of_array; i++) {
- fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+ for (i = 0; i < t->length_of_array; i++) {
+ fprintf(f,
+ " %" PRId64 ": %" PRId64 " %" PRId64 "\n",
+ i,
+ t->block_translation[i].u.diskoff,
+ t->block_translation[i].size);
}
fprintf(f, "\n");
} else {
@@ -724,9 +831,13 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
void block_table::dump_translation_table_pretty(FILE *f) {
_mutex_lock();
struct translation *t = &_checkpointed;
- assert(t->block_translation != nullptr);
+ invariant(t->block_translation != nullptr);
for (int64_t i = 0; i < t->length_of_array; ++i) {
- fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+ fprintf(f,
+ "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n",
+ i,
+ t->block_translation[i].u.diskoff,
+ t->block_translation[i].size);
}
_mutex_unlock();
}
@@ -750,7 +861,10 @@ void block_table::blocknum_dump_translation(BLOCKNUM b) {
struct translation *t = &_current;
if (b.b < t->length_of_array) {
struct block_translation_pair *bx = &t->block_translation[b.b];
- printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size);
+ printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n",
+ b.b,
+ bx->u.diskoff,
+ bx->size);
}
_mutex_unlock();
}
@@ -763,26 +877,31 @@ void block_table::destroy(void) {
toku_free(_inprogress.block_translation);
toku_free(_checkpointed.block_translation);
- _bt_block_allocator.destroy();
+ _bt_block_allocator->Destroy();
+ delete _bt_block_allocator;
toku_mutex_destroy(&_mutex);
nb_mutex_destroy(&_safe_file_size_lock);
}
-int block_table::_translation_deserialize_from_buffer(struct translation *t,
- DISKOFF location_on_disk,
- uint64_t size_on_disk,
- // out: buffer with serialized translation
- unsigned char *translation_buffer) {
+int block_table::_translation_deserialize_from_buffer(
+ struct translation *t,
+ DISKOFF location_on_disk,
+ uint64_t size_on_disk,
+ // out: buffer with serialized translation
+ unsigned char *translation_buffer) {
int r = 0;
- assert(location_on_disk != 0);
+ invariant(location_on_disk != 0);
t->type = TRANSLATION_CHECKPOINTED;
// check the checksum
uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4);
uint64_t offset = size_on_disk - 4;
- uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset));
+ uint32_t stored_x1764 = toku_dtoh32(*(int *)(translation_buffer + offset));
if (x1764 != stored_x1764) {
- fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764);
+ fprintf(stderr,
+ "Translation table checksum failure: calc=0x%08x read=0x%08x\n",
+ x1764,
+ stored_x1764);
r = TOKUDB_BAD_CHECKSUM;
goto exit;
}
@@ -790,42 +909,47 @@ int block_table::_translation_deserialize_from_buffer(struct translation *t,
struct rbuf rb;
rb.buf = translation_buffer;
rb.ndone = 0;
- rb.size = size_on_disk-4;//4==checksum
+ rb.size = size_on_disk - 4; // 4==checksum
- t->smallest_never_used_blocknum = rbuf_blocknum(&rb);
+ t->smallest_never_used_blocknum = rbuf_blocknum(&rb);
t->length_of_array = t->smallest_never_used_blocknum.b;
invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
- t->blocknum_freelist_head = rbuf_blocknum(&rb);
+ t->blocknum_freelist_head = rbuf_blocknum(&rb);
XMALLOC_N(t->length_of_array, t->block_translation);
for (int64_t i = 0; i < t->length_of_array; i++) {
t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb);
t->block_translation[i].size = rbuf_DISKOFF(&rb);
}
- invariant(_calculate_size_on_disk(t) == (int64_t) size_on_disk);
- invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk);
- invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk);
+ invariant(_calculate_size_on_disk(t) == (int64_t)size_on_disk);
+ invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size ==
+ (int64_t)size_on_disk);
+ invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff ==
+ location_on_disk);
exit:
return r;
}
int block_table::iterate(enum translation_type type,
- BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) {
+ BLOCKTABLE_CALLBACK f,
+ void *extra,
+ bool data_only,
+ bool used_only) {
struct translation *src;
-
+
int r = 0;
switch (type) {
- case TRANSLATION_CURRENT:
- src = &_current;
- break;
- case TRANSLATION_INPROGRESS:
- src = &_inprogress;
- break;
- case TRANSLATION_CHECKPOINTED:
- src = &_checkpointed;
- break;
- default:
- r = EINVAL;
+ case TRANSLATION_CURRENT:
+ src = &_current;
+ break;
+ case TRANSLATION_INPROGRESS:
+ src = &_inprogress;
+ break;
+ case TRANSLATION_CHECKPOINTED:
+ src = &_checkpointed;
+ break;
+ default:
+ r = EINVAL;
}
struct translation fakecurrent;
@@ -839,12 +963,15 @@ int block_table::iterate(enum translation_type type,
src->block_translation[RESERVED_BLOCKNUM_TRANSLATION];
_mutex_unlock();
int64_t i;
- for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
+ for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
struct block_translation_pair pair = t->block_translation[i];
- if (data_only && i< RESERVED_BLOCKNUMS) continue;
- if (used_only && pair.size <= 0) continue;
+ if (data_only && i < RESERVED_BLOCKNUMS)
+ continue;
+ if (used_only && pair.size <= 0)
+ continue;
r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra);
- if (r!=0) break;
+ if (r != 0)
+ break;
}
toku_free(t->block_translation);
}
@@ -856,8 +983,11 @@ typedef struct {
int64_t total_space;
} frag_extra;
-static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) {
- frag_extra *info = (frag_extra *) extra;
+static int frag_helper(BLOCKNUM UU(b),
+ int64_t size,
+ int64_t address,
+ void *extra) {
+ frag_extra *info = (frag_extra *)extra;
if (size + address > info->total_space)
info->total_space = size + address;
@@ -865,22 +995,30 @@ static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extr
return 0;
}
-void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) {
- frag_extra info = { 0, 0 };
+void block_table::internal_fragmentation(int64_t *total_sizep,
+ int64_t *used_sizep) {
+ frag_extra info = {0, 0};
int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true);
- assert_zero(r);
+ invariant_zero(r);
- if (total_sizep) *total_sizep = info.total_space;
- if (used_sizep) *used_sizep = info.used_space;
+ if (total_sizep)
+ *total_sizep = info.total_space;
+ if (used_sizep)
+ *used_sizep = info.used_space;
}
-void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) {
+void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size,
+ DISKOFF *offset,
+ FT ft) {
toku_mutex_assert_locked(&_mutex);
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
- _realloc_on_disk_internal(b, size, offset, ft, false, 0);
+ _realloc_on_disk_internal(b, size, offset, ft, false);
}
-void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) {
+void block_table::realloc_descriptor_on_disk(DISKOFF size,
+ DISKOFF *offset,
+ FT ft,
+ int fd) {
_mutex_lock();
_realloc_descriptor_on_disk_unlocked(size, offset, ft);
_ensure_safe_write_unlocked(fd, size, *offset);
@@ -897,11 +1035,12 @@ void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) {
void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
// Requires: blocktable lock is held.
// Requires: report->file_size_bytes is already filled in.
-
+
// Count the headers.
- report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ report->data_bytes = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
report->data_blocks = 1;
- report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ report->checkpoint_bytes_additional =
+ BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
report->checkpoint_blocks_additional = 1;
struct translation *current = &_current;
@@ -915,30 +1054,34 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
struct translation *checkpointed = &_checkpointed;
for (int64_t i = 0; i < checkpointed->length_of_array; i++) {
- struct block_translation_pair *pair = &checkpointed->block_translation[i];
- if (pair->size > 0 && !(i < current->length_of_array &&
- current->block_translation[i].size > 0 &&
- current->block_translation[i].u.diskoff == pair->u.diskoff)) {
- report->checkpoint_bytes_additional += pair->size;
- report->checkpoint_blocks_additional++;
+ struct block_translation_pair *pair =
+ &checkpointed->block_translation[i];
+ if (pair->size > 0 &&
+ !(i < current->length_of_array &&
+ current->block_translation[i].size > 0 &&
+ current->block_translation[i].u.diskoff == pair->u.diskoff)) {
+ report->checkpoint_bytes_additional += pair->size;
+ report->checkpoint_blocks_additional++;
}
}
struct translation *inprogress = &_inprogress;
for (int64_t i = 0; i < inprogress->length_of_array; i++) {
struct block_translation_pair *pair = &inprogress->block_translation[i];
- if (pair->size > 0 && !(i < current->length_of_array &&
- current->block_translation[i].size > 0 &&
- current->block_translation[i].u.diskoff == pair->u.diskoff) &&
- !(i < checkpointed->length_of_array &&
- checkpointed->block_translation[i].size > 0 &&
- checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) {
+ if (pair->size > 0 &&
+ !(i < current->length_of_array &&
+ current->block_translation[i].size > 0 &&
+ current->block_translation[i].u.diskoff == pair->u.diskoff) &&
+ !(i < checkpointed->length_of_array &&
+ checkpointed->block_translation[i].size > 0 &&
+ checkpointed->block_translation[i].u.diskoff ==
+ pair->u.diskoff)) {
report->checkpoint_bytes_additional += pair->size;
report->checkpoint_blocks_additional++;
}
}
- _bt_block_allocator.get_unused_statistics(report);
+ _bt_block_allocator->UnusedStatistics(report);
}
void block_table::get_info64(struct ftinfo64 *s) {
@@ -967,25 +1110,38 @@ void block_table::get_info64(struct ftinfo64 *s) {
_mutex_unlock();
}
-int block_table::iterate_translation_tables(uint64_t checkpoint_count,
- int (*iter)(uint64_t checkpoint_count,
- int64_t total_num_rows,
- int64_t blocknum,
- int64_t diskoff,
- int64_t size,
- void *extra),
- void *iter_extra) {
+int block_table::iterate_translation_tables(
+ uint64_t checkpoint_count,
+ int (*iter)(uint64_t checkpoint_count,
+ int64_t total_num_rows,
+ int64_t blocknum,
+ int64_t diskoff,
+ int64_t size,
+ void *extra),
+ void *iter_extra) {
int error = 0;
_mutex_lock();
- int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array;
+ int64_t total_num_rows =
+ _current.length_of_array + _checkpointed.length_of_array;
for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) {
struct block_translation_pair *block = &_current.block_translation[i];
- error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
+ error = iter(checkpoint_count,
+ total_num_rows,
+ i,
+ block->u.diskoff,
+ block->size,
+ iter_extra);
}
for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) {
- struct block_translation_pair *block = &_checkpointed.block_translation[i];
- error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
+ struct block_translation_pair *block =
+ &_checkpointed.block_translation[i];
+ error = iter(checkpoint_count - 1,
+ total_num_rows,
+ i,
+ block->u.diskoff,
+ block->size,
+ iter_extra);
}
_mutex_unlock();
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.h b/storage/tokudb/PerconaFT/ft/serialize/block_table.h
index 8d391674540..dd732d4f372 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.h
@@ -62,13 +62,16 @@ enum {
RESERVED_BLOCKNUMS
};
-typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
+typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b,
+ int64_t size,
+ int64_t address,
+ void *extra);
static inline BLOCKNUM make_blocknum(int64_t b) {
- BLOCKNUM result = { .b = b };
+ BLOCKNUM result = {.b = b};
return result;
}
-static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
+static const BLOCKNUM ROLLBACK_NONE = {.b = 0};
/**
* There are three copies of the translation table (btt) in the block table:
@@ -80,18 +83,20 @@ static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
*
* inprogress Is only filled by copying from current,
* and is the only version ever serialized to disk.
- * (It is serialized to disk on checkpoint and clean shutdown.)
+ * (It is serialized to disk on checkpoint and clean
+ *shutdown.)
* At end of checkpoint it replaces 'checkpointed'.
* During a checkpoint, any 'pending' dirty writes will update
* inprogress.
*
* current Is initialized by copying from checkpointed,
- * is the only version ever modified while the database is in use,
+ * is the only version ever modified while the database is in
+ *use,
* and is the only version ever copied to inprogress.
* It is never stored on disk.
*/
class block_table {
-public:
+ public:
enum translation_type {
TRANSLATION_NONE = 0,
TRANSLATION_CURRENT,
@@ -102,7 +107,10 @@ public:
void create();
- int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
+ int create_from_buffer(int fd,
+ DISKOFF location_on_disk,
+ DISKOFF size_on_disk,
+ unsigned char *translation_buffer);
void destroy();
@@ -114,11 +122,21 @@ public:
// Blocknums
void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
- void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat);
+ void realloc_on_disk(BLOCKNUM b,
+ DISKOFF size,
+ DISKOFF *offset,
+ struct ft *ft,
+ int fd,
+ bool for_checkpoint);
void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
- void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+ void translate_blocknum_to_offset_size(BLOCKNUM b,
+ DISKOFF *offset,
+ DISKOFF *size);
void free_unused_blocknums(BLOCKNUM root);
- void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
+ void realloc_descriptor_on_disk(DISKOFF size,
+ DISKOFF *offset,
+ struct ft *ft,
+ int fd);
void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size);
// External verfication
@@ -127,15 +145,22 @@ public:
void verify_no_free_blocknums();
// Serialization
- void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size);
+ void serialize_translation_to_wbuf(int fd,
+ struct wbuf *w,
+ int64_t *address,
+ int64_t *size);
// DEBUG ONLY (ftdump included), tests included
void blocknum_dump_translation(BLOCKNUM b);
void dump_translation_table_pretty(FILE *f);
void dump_translation_table(FILE *f);
- void block_free(uint64_t offset);
+ void block_free(uint64_t offset, uint64_t size);
- int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only);
+ int iterate(enum translation_type type,
+ BLOCKTABLE_CALLBACK f,
+ void *extra,
+ bool data_only,
+ bool used_only);
void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep);
// Requires: blocktable lock is held.
@@ -146,13 +171,16 @@ public:
void get_info64(struct ftinfo64 *);
- int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *);
+ int iterate_translation_tables(
+ uint64_t,
+ int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *),
+ void *);
-private:
+ private:
struct block_translation_pair {
// If in the freelist, use next_free_blocknum, otherwise diskoff.
union {
- DISKOFF diskoff;
+ DISKOFF diskoff;
BLOCKNUM next_free_blocknum;
} u;
@@ -173,7 +201,8 @@ private:
struct translation {
enum translation_type type;
- // Number of elements in array (block_translation). always >= smallest_never_used_blocknum
+ // Number of elements in array (block_translation). always >=
+ // smallest_never_used_blocknum
int64_t length_of_array;
BLOCKNUM smallest_never_used_blocknum;
@@ -181,20 +210,28 @@ private:
BLOCKNUM blocknum_freelist_head;
struct block_translation_pair *block_translation;
- // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
- // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
+ // size_on_disk is stored in
+ // block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
+ // location_on is stored in
+ // block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
};
void _create_internal();
- int _translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize
- DISKOFF location_on_disk, // location of translation_buffer
- uint64_t size_on_disk,
- unsigned char * translation_buffer); // buffer with serialized translation
-
- void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype);
+ int _translation_deserialize_from_buffer(
+ struct translation *t, // destination into which to deserialize
+ DISKOFF location_on_disk, // location of translation_buffer
+ uint64_t size_on_disk,
+ unsigned char *
+ translation_buffer); // buffer with serialized translation
+
+ void _copy_translation(struct translation *dst,
+ struct translation *src,
+ enum translation_type newtype);
void _maybe_optimize_translation(struct translation *t);
void _maybe_expand_translation(struct translation *t);
- bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair);
+ bool _translation_prevents_freeing(struct translation *t,
+ BLOCKNUM b,
+ struct block_translation_pair *old_pair);
void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b);
int64_t _calculate_size_on_disk(struct translation *t);
bool _pair_is_unallocated(struct block_translation_pair *pair);
@@ -203,14 +240,26 @@ private:
// Blocknum management
void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
- void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
- void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
- void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat);
- void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+ void _free_blocknum_unlocked(BLOCKNUM *bp,
+ struct ft *ft,
+ bool for_checkpoint);
+ void _realloc_descriptor_on_disk_unlocked(DISKOFF size,
+ DISKOFF *offset,
+ struct ft *ft);
+ void _realloc_on_disk_internal(BLOCKNUM b,
+ DISKOFF size,
+ DISKOFF *offset,
+ struct ft *ft,
+ bool for_checkpoint);
+ void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
+ DISKOFF *offset,
+ DISKOFF *size);
// File management
void _maybe_truncate_file(int fd, uint64_t size_needed_before);
- void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset);
+ void _ensure_safe_write_unlocked(int fd,
+ DISKOFF block_size,
+ DISKOFF block_offset);
// Verification
bool _is_valid_blocknum(struct translation *t, BLOCKNUM b);
@@ -220,29 +269,33 @@ private:
bool _no_data_blocks_except_root(BLOCKNUM root);
bool _blocknum_allocated(BLOCKNUM b);
- // Locking
+ // Locking
//
// TODO: Move the lock to the FT
void _mutex_lock();
void _mutex_unlock();
- // The current translation is the one used by client threads.
+ // The current translation is the one used by client threads.
// It is not represented on disk.
struct translation _current;
- // The translation used by the checkpoint currently in progress.
- // If the checkpoint thread allocates a block, it must also update the current translation.
+ // The translation used by the checkpoint currently in progress.
+ // If the checkpoint thread allocates a block, it must also update the
+ // current translation.
struct translation _inprogress;
- // The translation for the data that shall remain inviolate on disk until the next checkpoint finishes,
+ // The translation for the data that shall remain inviolate on disk until
+ // the next checkpoint finishes,
// after which any blocks used only in this translation can be freed.
struct translation _checkpointed;
- // The in-memory data structure for block allocation.
+ // The in-memory data structure for block allocation.
// There is no on-disk data structure for block allocation.
- // Note: This is *allocation* not *translation* - the block allocator is unaware of which
- // blocks are used for which translation, but simply allocates and deallocates blocks.
- block_allocator _bt_block_allocator;
+ // Note: This is *allocation* not *translation* - the block allocator is
+ // unaware of which
+ // blocks are used for which translation, but simply allocates and
+ // deallocates blocks.
+ BlockAllocator *_bt_block_allocator;
toku_mutex_t _mutex;
struct nb_mutex _safe_file_size_lock;
bool _checkpoint_skipped;
@@ -257,16 +310,16 @@ private:
#include "ft/serialize/wbuf.h"
-static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+static inline void wbuf_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
wbuf_ulonglong(w, b.b);
}
-static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+static inline void wbuf_nocrc_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
wbuf_nocrc_ulonglong(w, b.b);
}
static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) {
- wbuf_ulonglong(wb, (uint64_t) off);
+ wbuf_ulonglong(wb, (uint64_t)off);
}
#include "ft/serialize/rbuf.h"
@@ -280,6 +333,8 @@ static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) {
return result;
}
-static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) {
+static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb,
+ memarena *UU(ma),
+ BLOCKNUM *blocknum) {
*blocknum = rbuf_blocknum(rb);
}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/compress.cc b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
index 1719b6b7cb5..c2f815c6cf2 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/compress.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
@@ -235,7 +235,7 @@ void toku_decompress (Bytef *dest, uLongf destLen,
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
- char windowBits = source[1];
+ int8_t windowBits = source[1];
int r = inflateInit2(&strm, windowBits);
lazy_assert(r == Z_OK);
strm.next_out = dest;
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
index 49d4368a3ab..8fcb5293412 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
@@ -217,8 +217,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
// translation table itself won't fit in main memory.
ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read,
translation_address_on_disk);
- assert(readsz >= translation_size_on_disk);
- assert(readsz <= (ssize_t)size_to_read);
+ invariant(readsz >= translation_size_on_disk);
+ invariant(readsz <= (ssize_t)size_to_read);
}
// Create table and read in data.
r = ft->blocktable.create_from_buffer(fd,
@@ -411,73 +411,90 @@ exit:
return r;
}
-static size_t
-serialize_ft_min_size (uint32_t version) {
+static size_t serialize_ft_min_size(uint32_t version) {
size_t size = 0;
- switch(version) {
- case FT_LAYOUT_VERSION_29:
- size += sizeof(uint64_t); // logrows in ft
- case FT_LAYOUT_VERSION_28:
- size += sizeof(uint32_t); // fanout in ft
- case FT_LAYOUT_VERSION_27:
- case FT_LAYOUT_VERSION_26:
- case FT_LAYOUT_VERSION_25:
- case FT_LAYOUT_VERSION_24:
- case FT_LAYOUT_VERSION_23:
- case FT_LAYOUT_VERSION_22:
- case FT_LAYOUT_VERSION_21:
- size += sizeof(MSN); // max_msn_in_ft
- case FT_LAYOUT_VERSION_20:
- case FT_LAYOUT_VERSION_19:
- size += 1; // compression method
- size += sizeof(MSN); // highest_unused_msn_for_upgrade
- case FT_LAYOUT_VERSION_18:
- size += sizeof(uint64_t); // time_of_last_optimize_begin
- size += sizeof(uint64_t); // time_of_last_optimize_end
- size += sizeof(uint32_t); // count_of_optimize_in_progress
- size += sizeof(MSN); // msn_at_start_of_last_completed_optimize
- size -= 8; // removed num_blocks_to_upgrade_14
- size -= 8; // removed num_blocks_to_upgrade_13
- case FT_LAYOUT_VERSION_17:
- size += 16;
- invariant(sizeof(STAT64INFO_S) == 16);
- case FT_LAYOUT_VERSION_16:
- case FT_LAYOUT_VERSION_15:
- size += 4; // basement node size
- size += 8; // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14
- size += 8; // time of last verification
- case FT_LAYOUT_VERSION_14:
- size += 8; //TXNID that created
- case FT_LAYOUT_VERSION_13:
- size += ( 4 // build_id
- +4 // build_id_original
- +8 // time_of_creation
- +8 // time_of_last_modification
- );
+ switch (version) {
+ case FT_LAYOUT_VERSION_29:
+ size += sizeof(uint64_t); // logrows in ft
+ case FT_LAYOUT_VERSION_28:
+ size += sizeof(uint32_t); // fanout in ft
+ case FT_LAYOUT_VERSION_27:
+ case FT_LAYOUT_VERSION_26:
+ case FT_LAYOUT_VERSION_25:
+ case FT_LAYOUT_VERSION_24:
+ case FT_LAYOUT_VERSION_23:
+ case FT_LAYOUT_VERSION_22:
+ case FT_LAYOUT_VERSION_21:
+ size += sizeof(MSN); // max_msn_in_ft
+ case FT_LAYOUT_VERSION_20:
+ case FT_LAYOUT_VERSION_19:
+ size += 1; // compression method
+ size += sizeof(MSN); // highest_unused_msn_for_upgrade
+ case FT_LAYOUT_VERSION_18:
+ size += sizeof(uint64_t); // time_of_last_optimize_begin
+ size += sizeof(uint64_t); // time_of_last_optimize_end
+ size += sizeof(uint32_t); // count_of_optimize_in_progress
+ size += sizeof(MSN); // msn_at_start_of_last_completed_optimize
+ size -= 8; // removed num_blocks_to_upgrade_14
+ size -= 8; // removed num_blocks_to_upgrade_13
+ case FT_LAYOUT_VERSION_17:
+ size += 16;
+ invariant(sizeof(STAT64INFO_S) == 16);
+ case FT_LAYOUT_VERSION_16:
+ case FT_LAYOUT_VERSION_15:
+ size += 4; // basement node size
+ size += 8; // num_blocks_to_upgrade_14 (previously
+ // num_blocks_to_upgrade, now one int each for upgrade
+ // from 13, 14
+ size += 8; // time of last verification
+ case FT_LAYOUT_VERSION_14:
+ size += 8; // TXNID that created
+ case FT_LAYOUT_VERSION_13:
+ size += (4 // build_id
+ +
+ 4 // build_id_original
+ +
+ 8 // time_of_creation
+ +
+ 8 // time_of_last_modification
+ );
// fall through
- case FT_LAYOUT_VERSION_12:
- size += (+8 // "tokudata"
- +4 // version
- +4 // original_version
- +4 // size
- +8 // byte order verification
- +8 // checkpoint_count
- +8 // checkpoint_lsn
- +4 // tree's nodesize
- +8 // translation_size_on_disk
- +8 // translation_address_on_disk
- +4 // checksum
- +8 // Number of blocks in old version.
- +8 // diskoff
- +4 // flags
- );
- break;
- default:
- abort();
- }
-
- lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+ case FT_LAYOUT_VERSION_12:
+ size += (+8 // "tokudata"
+ +
+ 4 // version
+ +
+ 4 // original_version
+ +
+ 4 // size
+ +
+ 8 // byte order verification
+ +
+ 8 // checkpoint_count
+ +
+ 8 // checkpoint_lsn
+ +
+ 4 // tree's nodesize
+ +
+ 8 // translation_size_on_disk
+ +
+ 8 // translation_address_on_disk
+ +
+ 4 // checksum
+ +
+ 8 // Number of blocks in old version.
+ +
+ 8 // diskoff
+ +
+ 4 // flags
+ );
+ break;
+ default:
+ abort();
+ }
+
+ lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
return size;
}
@@ -486,7 +503,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
struct rbuf *rb,
uint64_t *checkpoint_count,
LSN *checkpoint_lsn,
- uint32_t * version_p)
+ uint32_t *version_p)
// Effect: Read and parse the header of a fractalal tree
//
// Simply reading the raw bytes of the header into an rbuf is insensitive
@@ -496,18 +513,18 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
// file AND the header is useless
{
int r = 0;
- const int64_t prefix_size = 8 + // magic ("tokudata")
- 4 + // version
- 4 + // build_id
- 4; // size
+ const int64_t prefix_size = 8 + // magic ("tokudata")
+ 4 + // version
+ 4 + // build_id
+ 4; // size
const int64_t read_size = roundup_to_multiple(512, prefix_size);
unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix);
rb->buf = NULL;
int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header);
if (n != read_size) {
- if (n==0) {
+ if (n == 0) {
r = TOKUDB_DICTIONARY_NO_HEADER;
- } else if (n<0) {
+ } else if (n < 0) {
r = get_error_errno();
} else {
r = EINVAL;
@@ -518,95 +535,102 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
rbuf_init(rb, prefix, prefix_size);
- //Check magic number
+ // Check magic number
const void *magic;
rbuf_literal_bytes(rb, &magic, 8);
- if (memcmp(magic,"tokudata",8)!=0) {
- if ((*(uint64_t*)magic) == 0) {
+ if (memcmp(magic, "tokudata", 8) != 0) {
+ if ((*(uint64_t *)magic) == 0) {
r = TOKUDB_DICTIONARY_NO_HEADER;
} else {
- r = EINVAL; //Not a tokudb file! Do not use.
+ r = EINVAL; // Not a tokudb file! Do not use.
}
goto exit;
}
- //Version MUST be in network order regardless of disk order.
+ // Version MUST be in network order regardless of disk order.
uint32_t version;
version = rbuf_network_int(rb);
*version_p = version;
if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) {
- r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use
+ r = TOKUDB_DICTIONARY_TOO_OLD; // Cannot use
goto exit;
} else if (version > FT_LAYOUT_VERSION) {
- r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use
+ r = TOKUDB_DICTIONARY_TOO_NEW; // Cannot use
goto exit;
}
- //build_id MUST be in network order regardless of disk order.
+ // build_id MUST be in network order regardless of disk order.
uint32_t build_id __attribute__((__unused__));
build_id = rbuf_network_int(rb);
int64_t min_header_size;
min_header_size = serialize_ft_min_size(version);
- //Size MUST be in network order regardless of disk order.
+ // Size MUST be in network order regardless of disk order.
uint32_t size;
size = rbuf_network_int(rb);
- //If too big, it is corrupt. We would probably notice during checksum
- //but may have to do a multi-gigabyte malloc+read to find out.
- //If its too small reading rbuf would crash, so verify.
- if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) {
+ // If too big, it is corrupt. We would probably notice during checksum
+ // but may have to do a multi-gigabyte malloc+read to find out.
+ // If its too small reading rbuf would crash, so verify.
+ if (size > BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE ||
+ size < min_header_size) {
r = TOKUDB_DICTIONARY_NO_HEADER;
goto exit;
}
- lazy_assert(rb->ndone==prefix_size);
+ lazy_assert(rb->ndone == prefix_size);
rb->size = size;
{
toku_free(rb->buf);
uint32_t size_to_read = roundup_to_multiple(512, size);
XMALLOC_N_ALIGNED(512, size_to_read, rb->buf);
- assert(offset_of_header%512==0);
+ invariant(offset_of_header % 512 == 0);
n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header);
if (n != size_to_read) {
if (n < 0) {
r = get_error_errno();
} else {
- r = EINVAL; //Header might be useless (wrong size) or could be a disk read error.
+ r = EINVAL; // Header might be useless (wrong size) or could be
+ // a disk read error.
}
goto exit;
}
}
- //It's version 14 or later. Magic looks OK.
- //We have an rbuf that represents the header.
- //Size is within acceptable bounds.
+ // It's version 14 or later. Magic looks OK.
+ // We have an rbuf that represents the header.
+ // Size is within acceptable bounds.
- //Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed)
+ // Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function
+ // changed)
uint32_t calculated_x1764;
- calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4);
+ calculated_x1764 = toku_x1764_memory(rb->buf, rb->size - 4);
uint32_t stored_x1764;
- stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4));
+ stored_x1764 = toku_dtoh32(*(int *)(rb->buf + rb->size - 4));
if (calculated_x1764 != stored_x1764) {
- r = TOKUDB_BAD_CHECKSUM; //Header useless
- fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764);
+ r = TOKUDB_BAD_CHECKSUM; // Header useless
+ fprintf(stderr,
+ "Header checksum failure: calc=0x%08x read=0x%08x\n",
+ calculated_x1764,
+ stored_x1764);
goto exit;
}
- //Verify byte order
+ // Verify byte order
const void *tmp_byte_order_check;
lazy_assert((sizeof toku_byte_order_host) == 8);
- rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order
+ rbuf_literal_bytes(
+ rb, &tmp_byte_order_check, 8); // Must not translate byte order
int64_t byte_order_stored;
- byte_order_stored = *(int64_t*)tmp_byte_order_check;
+ byte_order_stored = *(int64_t *)tmp_byte_order_check;
if (byte_order_stored != toku_byte_order_host) {
- r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary
+ r = TOKUDB_DICTIONARY_NO_HEADER; // Cannot use dictionary
goto exit;
}
- //Load checkpoint count
+ // Load checkpoint count
*checkpoint_count = rbuf_ulonglong(rb);
*checkpoint_lsn = rbuf_LSN(rb);
- //Restart at beginning during regular deserialization
+ // Restart at beginning during regular deserialization
rb->ndone = 0;
exit:
@@ -620,11 +644,7 @@ exit:
// Read ft from file into struct. Read both headers and use one.
// We want the latest acceptable header whose checkpoint_lsn is no later
// than max_acceptable_lsn.
-int
-toku_deserialize_ft_from(int fd,
- LSN max_acceptable_lsn,
- FT *ft)
-{
+int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft) {
struct rbuf rb_0;
struct rbuf rb_1;
uint64_t checkpoint_count_0 = 0;
@@ -638,13 +658,23 @@ toku_deserialize_ft_from(int fd,
int r0, r1, r;
toku_off_t header_0_off = 0;
- r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0);
+ r0 = deserialize_ft_from_fd_into_rbuf(fd,
+ header_0_off,
+ &rb_0,
+ &checkpoint_count_0,
+ &checkpoint_lsn_0,
+ &version_0);
if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) {
h0_acceptable = true;
}
- toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
- r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1);
+ toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ r1 = deserialize_ft_from_fd_into_rbuf(fd,
+ header_1_off,
+ &rb_1,
+ &checkpoint_count_1,
+ &checkpoint_lsn_1,
+ &version_1);
if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) {
h1_acceptable = true;
}
@@ -655,24 +685,29 @@ toku_deserialize_ft_from(int fd,
// We were unable to read either header or at least one is too
// new. Certain errors are higher priority than others. Order of
// these if/else if is important.
- if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
+ if (r0 == TOKUDB_DICTIONARY_TOO_NEW ||
+ r1 == TOKUDB_DICTIONARY_TOO_NEW) {
r = TOKUDB_DICTIONARY_TOO_NEW;
- } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) {
+ } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD ||
+ r1 == TOKUDB_DICTIONARY_TOO_OLD) {
r = TOKUDB_DICTIONARY_TOO_OLD;
} else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) {
fprintf(stderr, "Both header checksums failed.\n");
r = TOKUDB_BAD_CHECKSUM;
- } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) {
+ } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER ||
+ r1 == TOKUDB_DICTIONARY_NO_HEADER) {
r = TOKUDB_DICTIONARY_NO_HEADER;
} else {
- r = r0 ? r0 : r1; //Arbitrarily report the error from the
- //first header, unless it's readable
+ r = r0 ? r0 : r1; // Arbitrarily report the error from the
+ // first header, unless it's readable
}
- // it should not be possible for both headers to be later than the max_acceptable_lsn
- invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
- (r1==0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
- invariant(r!=0);
+ // it should not be possible for both headers to be later than the
+ // max_acceptable_lsn
+ invariant(
+ !((r0 == 0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
+ (r1 == 0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
+ invariant(r != 0);
goto exit;
}
@@ -682,8 +717,7 @@ toku_deserialize_ft_from(int fd,
invariant(version_0 >= version_1);
rb = &rb_0;
version = version_0;
- }
- else {
+ } else {
invariant(checkpoint_count_1 == checkpoint_count_0 + 1);
invariant(version_1 >= version_0);
rb = &rb_1;
@@ -692,14 +726,18 @@ toku_deserialize_ft_from(int fd,
} else if (h0_acceptable) {
if (r1 == TOKUDB_BAD_CHECKSUM) {
// print something reassuring
- fprintf(stderr, "Header 2 checksum failed, but header 1 ok. Proceeding.\n");
+ fprintf(
+ stderr,
+ "Header 2 checksum failed, but header 1 ok. Proceeding.\n");
}
rb = &rb_0;
version = version_0;
} else if (h1_acceptable) {
if (r0 == TOKUDB_BAD_CHECKSUM) {
// print something reassuring
- fprintf(stderr, "Header 1 checksum failed, but header 2 ok. Proceeding.\n");
+ fprintf(
+ stderr,
+ "Header 1 checksum failed, but header 2 ok. Proceeding.\n");
}
rb = &rb_1;
version = version_1;
@@ -718,15 +756,13 @@ exit:
return r;
}
-
-size_t toku_serialize_ft_size (FT_HEADER h) {
+size_t toku_serialize_ft_size(FT_HEADER h) {
size_t size = serialize_ft_min_size(h->layout_version);
- //There is no dynamic data.
- lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+ // There is no dynamic data.
+ lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
return size;
}
-
void toku_serialize_ft_to_wbuf (
struct wbuf *wbuf,
FT_HEADER h,
@@ -771,52 +807,60 @@ void toku_serialize_ft_to_wbuf (
}
void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) {
- lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS);
+ lazy_assert(h->type == FT_CHECKPOINT_INPROGRESS);
struct wbuf w_translation;
int64_t size_translation;
int64_t address_translation;
// Must serialize translation first, to get address,size for header.
- bt->serialize_translation_to_wbuf(fd, &w_translation,
- &address_translation,
- &size_translation);
- assert(size_translation == w_translation.ndone);
+ bt->serialize_translation_to_wbuf(
+ fd, &w_translation, &address_translation, &size_translation);
+ invariant(size_translation == w_translation.ndone);
- // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized.
- assert(w_translation.size % 512 == 0);
+ // the number of bytes available in the buffer is 0 mod 512, and those last
+ // bytes are all initialized.
+ invariant(w_translation.size % 512 == 0);
struct wbuf w_main;
- size_t size_main = toku_serialize_ft_size(h);
+ size_t size_main = toku_serialize_ft_size(h);
size_t size_main_aligned = roundup_to_multiple(512, size_main);
- assert(size_main_aligned<block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+ invariant(size_main_aligned <
+ BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
char *XMALLOC_N_ALIGNED(512, size_main_aligned, mainbuf);
- for (size_t i=size_main; i<size_main_aligned; i++) mainbuf[i]=0; // initialize the end of the buffer with zeros
+ for (size_t i = size_main; i < size_main_aligned; i++)
+ mainbuf[i] = 0; // initialize the end of the buffer with zeros
wbuf_init(&w_main, mainbuf, size_main);
- toku_serialize_ft_to_wbuf(&w_main, h, address_translation, size_translation);
+ toku_serialize_ft_to_wbuf(
+ &w_main, h, address_translation, size_translation);
lazy_assert(w_main.ndone == size_main);
// Actually write translation table
- // This write is guaranteed to read good data at the end of the buffer, since the
+ // This write is guaranteed to read good data at the end of the buffer,
+ // since the
// w_translation.buf is padded with zeros to a 512-byte boundary.
- toku_os_full_pwrite(fd, w_translation.buf, roundup_to_multiple(512, size_translation), address_translation);
-
- //Everything but the header MUST be on disk before header starts.
- //Otherwise we will think the header is good and some blocks might not
- //yet be on disk.
- //If the header has a cachefile we need to do cachefile fsync (to
- //prevent crash if we redirected to dev null)
- //If there is no cachefile we still need to do an fsync.
+ toku_os_full_pwrite(fd,
+ w_translation.buf,
+ roundup_to_multiple(512, size_translation),
+ address_translation);
+
+ // Everything but the header MUST be on disk before header starts.
+ // Otherwise we will think the header is good and some blocks might not
+ // yet be on disk.
+ // If the header has a cachefile we need to do cachefile fsync (to
+ // prevent crash if we redirected to dev null)
+ // If there is no cachefile we still need to do an fsync.
if (cf) {
toku_cachefile_fsync(cf);
- }
- else {
+ } else {
toku_file_fsync(fd);
}
- //Alternate writing header to two locations:
+ // Alternate writing header to two locations:
// Beginning (0) or BLOCK_ALLOCATOR_HEADER_RESERVE
toku_off_t main_offset;
- main_offset = (h->checkpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ main_offset = (h->checkpoint_count & 0x1)
+ ? 0
+ : BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset);
toku_free(w_main.buf);
toku_free(w_translation.buf);
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
index c4f4886b6a0..5914f8a1050 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
@@ -99,13 +99,11 @@ void toku_ft_serialize_layer_init(void) {
num_cores = toku_os_get_number_active_processors();
int r = toku_thread_pool_create(&ft_pool, num_cores);
lazy_assert_zero(r);
- block_allocator::maybe_initialize_trace();
toku_serialize_in_parallel = false;
}
void toku_ft_serialize_layer_destroy(void) {
toku_thread_pool_destroy(&ft_pool);
- block_allocator::maybe_close_trace();
}
enum { FILE_CHANGE_INCREMENT = (16 << 20) };
@@ -773,19 +771,23 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
return 0;
}
-int
-toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) {
-
+int toku_serialize_ftnode_to(int fd,
+ BLOCKNUM blocknum,
+ FTNODE node,
+ FTNODE_DISK_DATA *ndd,
+ bool do_rebalancing,
+ FT ft,
+ bool for_checkpoint) {
size_t n_to_write;
size_t n_uncompressed_bytes;
char *compressed_buf = nullptr;
- // because toku_serialize_ftnode_to is only called for
+ // because toku_serialize_ftnode_to is only called for
// in toku_ftnode_flush_callback, we pass false
// for in_parallel. The reasoning is that when we write
- // nodes to disk via toku_ftnode_flush_callback, we
+ // nodes to disk via toku_ftnode_flush_callback, we
// assume that it is being done on a non-critical
- // background thread (probably for checkpointing), and therefore
+ // background thread (probably for checkpointing), and therefore
// should not hog CPU,
//
// Should the above facts change, we may want to revisit
@@ -802,32 +804,32 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
toku_unsafe_fetch(&toku_serialize_in_parallel),
&n_to_write,
&n_uncompressed_bytes,
- &compressed_buf
- );
+ &compressed_buf);
if (r != 0) {
return r;
}
- // If the node has never been written, then write the whole buffer, including the zeros
- invariant(blocknum.b>=0);
+ // If the node has never been written, then write the whole buffer,
+ // including the zeros
+ invariant(blocknum.b >= 0);
DISKOFF offset;
// Dirties the ft
- ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
- ft, fd, for_checkpoint,
- // Allocations for nodes high in the tree are considered 'hot',
- // as they are likely to move again in the next checkpoint.
- node->height);
+ ft->blocktable.realloc_on_disk(
+ blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
tokutime_t t0 = toku_time_now();
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
tokutime_t t1 = toku_time_now();
tokutime_t io_time = t1 - t0;
- toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
+ toku_ft_status_update_flush_reason(
+ node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
toku_free(compressed_buf);
- node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+ node->dirty = 0; // See #1957. Must set the node to be clean after
+ // serializing it so that it doesn't get written again on
+ // the next checkpoint or eviction.
return 0;
}
@@ -994,6 +996,7 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
bn->seqinsert = orig_bn->seqinsert;
bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
bn->stat64_delta = orig_bn->stat64_delta;
+ bn->logical_rows_delta = orig_bn->logical_rows_delta;
bn->data_buffer.clone(&orig_bn->data_buffer);
return bn;
}
@@ -1004,6 +1007,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
bn->seqinsert = 0;
bn->stale_ancestor_messages_applied = false;
bn->stat64_delta = ZEROSTATS;
+ bn->logical_rows_delta = 0;
bn->data_buffer.init_zero();
return bn;
}
@@ -1897,7 +1901,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
/* out */ int *layout_version_p);
// This function upgrades a version 14 or 13 ftnode to the current
-// verison. NOTE: This code assumes the first field of the rbuf has
+// version. NOTE: This code assumes the first field of the rbuf has
// already been read from the buffer (namely the layout_version of the
// ftnode.)
static int
@@ -2488,9 +2492,12 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL
serialized->blocknum = log->blocknum;
}
-int
-toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
- FT ft, bool for_checkpoint) {
+int toku_serialize_rollback_log_to(int fd,
+ ROLLBACK_LOG_NODE log,
+ SERIALIZED_ROLLBACK_LOG_NODE serialized_log,
+ bool is_serialized,
+ FT ft,
+ bool for_checkpoint) {
size_t n_to_write;
char *compressed_buf;
struct serialized_rollback_log_node serialized_local;
@@ -2511,21 +2518,21 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
serialized_log->n_sub_blocks,
serialized_log->sub_block,
ft->h->compression_method,
- &n_to_write, &compressed_buf);
+ &n_to_write,
+ &compressed_buf);
// Dirties the ft
DISKOFF offset;
- ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
- ft, fd, for_checkpoint,
- // We consider rollback log flushing the hottest possible allocation,
- // since rollback logs are short-lived compared to FT nodes.
- INT_MAX);
+ ft->blocktable.realloc_on_disk(
+ blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
toku_free(compressed_buf);
if (!is_serialized) {
toku_static_serialized_rollback_log_destroy(&serialized_local);
- log->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+ log->dirty = 0; // See #1957. Must set the node to be clean after
+ // serializing it so that it doesn't get written again
+ // on the next checkpoint or eviction.
}
return 0;
}
@@ -2704,7 +2711,7 @@ exit:
}
static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
- // This function exists solely to accomodate future changes in compression.
+ // This function exists solely to accommodate future changes in compression.
int r = 0;
if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) ||
(FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) ||
diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
new file mode 100644
index 00000000000..922850fb3e0
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
@@ -0,0 +1,833 @@
+/*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILIT or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License, version 3,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "ft/serialize/rbtree_mhs.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_portability.h"
+#include <algorithm>
+
+namespace MhsRbTree {
+
+ Tree::Tree() : _root(NULL), _align(1) {}
+
+ Tree::Tree(uint64_t align) : _root(NULL), _align(align) {}
+
+ Tree::~Tree() { Destroy(); }
+
+ void Tree::PreOrder(Node *tree) const {
+ if (tree != NULL) {
+ fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+ PreOrder(tree->_left);
+ PreOrder(tree->_right);
+ }
+ }
+
+ void Tree::PreOrder() { PreOrder(_root); }
+
+ void Tree::InOrder(Node *tree) const {
+ if (tree != NULL) {
+ InOrder(tree->_left);
+ fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+ InOrder(tree->_right);
+ }
+ }
+
+ // yeah, i only care about in order visitor. -Jun
+ void Tree::InOrderVisitor(Node *tree,
+ void (*f)(void *, Node *, uint64_t),
+ void *extra,
+ uint64_t depth) {
+ if (tree != NULL) {
+ InOrderVisitor(tree->_left, f, extra, depth + 1);
+ f(extra, tree, depth);
+ InOrderVisitor(tree->_right, f, extra, depth + 1);
+ }
+ }
+
+ void Tree::InOrderVisitor(void (*f)(void *, Node *, uint64_t),
+ void *extra) {
+ InOrderVisitor(_root, f, extra, 0);
+ }
+
+ void Tree::InOrder() { InOrder(_root); }
+
+ void Tree::PostOrder(Node *tree) const {
+ if (tree != NULL) {
+ PostOrder(tree->_left);
+ PostOrder(tree->_right);
+ fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+ }
+ }
+
+ void Tree::PostOrder() { PostOrder(_root); }
+
+ Node *Tree::SearchByOffset(uint64_t offset) {
+ Node *x = _root;
+ while ((x != NULL) && (rbn_offset(x).ToInt() != offset)) {
+ if (offset < rbn_offset(x).ToInt())
+ x = x->_left;
+ else
+ x = x->_right;
+ }
+
+ return x;
+ }
+
+ // mostly for testing
+ Node *Tree::SearchFirstFitBySize(uint64_t size) {
+ if (EffectiveSize(_root) < size && rbn_left_mhs(_root) < size &&
+ rbn_right_mhs(_root) < size) {
+ return nullptr;
+ } else {
+ return SearchFirstFitBySizeHelper(_root, size);
+ }
+ }
+
+ Node *Tree::SearchFirstFitBySizeHelper(Node *x, uint64_t size) {
+ if (EffectiveSize(x) >= size) {
+ // only possible to go left
+ if (rbn_left_mhs(x) >= size)
+ return SearchFirstFitBySizeHelper(x->_left, size);
+ else
+ return x;
+ }
+ if (rbn_left_mhs(x) >= size)
+ return SearchFirstFitBySizeHelper(x->_left, size);
+
+ if (rbn_right_mhs(x) >= size)
+ return SearchFirstFitBySizeHelper(x->_right, size);
+
+ // this is an invalid state
+ Dump();
+ ValidateBalance();
+ ValidateMhs();
+ invariant(0);
+ return NULL;
+ }
+
+ Node *Tree::MinNode(Node *tree) {
+ if (tree == NULL)
+ return NULL;
+
+ while (tree->_left != NULL)
+ tree = tree->_left;
+ return tree;
+ }
+
+ Node *Tree::MinNode() { return MinNode(_root); }
+
+ Node *Tree::MaxNode(Node *tree) {
+ if (tree == NULL)
+ return NULL;
+
+ while (tree->_right != NULL)
+ tree = tree->_right;
+ return tree;
+ }
+
+ Node *Tree::MaxNode() { return MaxNode(_root); }
+
+ Node *Tree::SuccessorHelper(Node *y, Node *x) {
+ while ((y != NULL) && (x == y->_right)) {
+ x = y;
+ y = y->_parent;
+ }
+ return y;
+ }
+ Node *Tree::Successor(Node *x) {
+ if (x->_right != NULL)
+ return MinNode(x->_right);
+
+ Node *y = x->_parent;
+ return SuccessorHelper(y, x);
+ }
+
+ Node *Tree::PredecessorHelper(Node *y, Node *x) {
+ while ((y != NULL) && (x == y->_left)) {
+ x = y;
+ y = y->_parent;
+ }
+
+ return y;
+ }
+ Node *Tree::Predecessor(Node *x) {
+ if (x->_left != NULL)
+ return MaxNode(x->_left);
+
+ Node *y = x->_parent;
+ return SuccessorHelper(y, x);
+ }
+
+ /*
+ * px px
+ * / /
+ * x y
+ * / \ --(left rotation)--> / \ #
+ * lx y x ry
+ * / \ / \
+ * ly ry lx ly
+ * max_hole_size updates are pretty local
+ */
+
+ void Tree::LeftRotate(Node *&root, Node *x) {
+ Node *y = x->_right;
+
+ x->_right = y->_left;
+ rbn_right_mhs(x) = rbn_left_mhs(y);
+
+ if (y->_left != NULL)
+ y->_left->_parent = x;
+
+ y->_parent = x->_parent;
+
+ if (x->_parent == NULL) {
+ root = y;
+ } else {
+ if (x->_parent->_left == x) {
+ x->_parent->_left = y;
+ } else {
+ x->_parent->_right = y;
+ }
+ }
+ y->_left = x;
+ rbn_left_mhs(y) = mhs_of_subtree(x);
+
+ x->_parent = y;
+ }
+
+ /* py py
+ * / /
+ * y x
+ * / \ --(right rotate)--> / \ #
+ * x ry lx y
+ * / \ / \ #
+ * lx rx rx ry
+ *
+ */
+
+ void Tree::RightRotate(Node *&root, Node *y) {
+ Node *x = y->_left;
+
+ y->_left = x->_right;
+ rbn_left_mhs(y) = rbn_right_mhs(x);
+
+ if (x->_right != NULL)
+ x->_right->_parent = y;
+
+ x->_parent = y->_parent;
+
+ if (y->_parent == NULL) {
+ root = x;
+ } else {
+ if (y == y->_parent->_right)
+ y->_parent->_right = x;
+ else
+ y->_parent->_left = x;
+ }
+
+ x->_right = y;
+ rbn_right_mhs(x) = mhs_of_subtree(y);
+ y->_parent = x;
+ }
+
+ // walking from this node up to update the mhs info
+ // whenver there is change on left/right mhs or size we should recalculate.
+ // prerequisit: the children of the node are mhs up-to-date.
+ void Tree::RecalculateMhs(Node *node) {
+ uint64_t *p_node_mhs = 0;
+ Node *parent = node->_parent;
+
+ if (!parent)
+ return;
+
+ uint64_t max_mhs = mhs_of_subtree(node);
+ if (node == parent->_left) {
+ p_node_mhs = &rbn_left_mhs(parent);
+ } else if (node == parent->_right) {
+ p_node_mhs = &rbn_right_mhs(parent);
+ } else {
+ return;
+ }
+ if (*p_node_mhs != max_mhs) {
+ *p_node_mhs = max_mhs;
+ RecalculateMhs(parent);
+ }
+ }
+
+ void Tree::IsNewNodeMergable(Node *pred,
+ Node *succ,
+ Node::BlockPair pair,
+ bool *left_merge,
+ bool *right_merge) {
+ if (pred) {
+ OUUInt64 end_of_pred = rbn_size(pred) + rbn_offset(pred);
+ if (end_of_pred < pair._offset)
+ *left_merge = false;
+ else {
+ invariant(end_of_pred == pair._offset);
+ *left_merge = true;
+ }
+ }
+ if (succ) {
+ OUUInt64 begin_of_succ = rbn_offset(succ);
+ OUUInt64 end_of_node = pair._offset + pair._size;
+ if (end_of_node < begin_of_succ) {
+ *right_merge = false;
+ } else {
+ invariant(end_of_node == begin_of_succ);
+ *right_merge = true;
+ }
+ }
+ }
+
+ void Tree::AbsorbNewNode(Node *pred,
+ Node *succ,
+ Node::BlockPair pair,
+ bool left_merge,
+ bool right_merge,
+ bool is_right_child) {
+ invariant(left_merge || right_merge);
+ if (left_merge && right_merge) {
+ // merge to the succ
+ if (!is_right_child) {
+ rbn_size(succ) += pair._size;
+ rbn_offset(succ) = pair._offset;
+ // merge to the pred
+ rbn_size(pred) += rbn_size(succ);
+ // to keep the invariant of the tree -no overlapping holes
+ rbn_offset(succ) += rbn_size(succ);
+ rbn_size(succ) = 0;
+ RecalculateMhs(succ);
+ RecalculateMhs(pred);
+ // pred dominates succ. this is going to
+ // update the pred labels separately.
+ // remove succ
+ RawRemove(_root, succ);
+ } else {
+ rbn_size(pred) += pair._size;
+ rbn_offset(succ) = rbn_offset(pred);
+ rbn_size(succ) += rbn_size(pred);
+ rbn_offset(pred) += rbn_size(pred);
+ rbn_size(pred) = 0;
+ RecalculateMhs(pred);
+ RecalculateMhs(succ);
+ // now remove pred
+ RawRemove(_root, pred);
+ }
+ } else if (left_merge) {
+ rbn_size(pred) += pair._size;
+ RecalculateMhs(pred);
+ } else if (right_merge) {
+ rbn_offset(succ) -= pair._size;
+ rbn_size(succ) += pair._size;
+ RecalculateMhs(succ);
+ }
+ }
+ // this is the most tedious part, but not complicated:
+ // 1.find where to insert the pair
+ // 2.if the pred and succ can merge with the pair. merge with them. either
+ // pred
+ // or succ can be removed.
+ // 3. if only left-mergable or right-mergeable, just merge
+ // 4. non-mergable case. insert the node and run the fixup.
+
+ int Tree::Insert(Node *&root, Node::BlockPair pair) {
+ Node *x = _root;
+ Node *y = NULL;
+ bool left_merge = false;
+ bool right_merge = false;
+ Node *node = NULL;
+
+ while (x != NULL) {
+ y = x;
+ if (pair._offset < rbn_key(x))
+ x = x->_left;
+ else
+ x = x->_right;
+ }
+
+ // we found where to insert, lets find out the pred and succ for
+ // possible
+ // merges.
+ // node->parent = y;
+ Node *pred, *succ;
+ if (y != NULL) {
+ if (pair._offset < rbn_key(y)) {
+ // as the left child
+ pred = PredecessorHelper(y->_parent, y);
+ succ = y;
+ IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
+ if (left_merge || right_merge) {
+ AbsorbNewNode(
+ pred, succ, pair, left_merge, right_merge, false);
+ } else {
+ // construct the node
+ Node::Pair mhsp {0, 0};
+ node =
+ new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+ if (!node)
+ return -1;
+ y->_left = node;
+ node->_parent = y;
+ RecalculateMhs(node);
+ }
+
+ } else {
+ // as the right child
+ pred = y;
+ succ = SuccessorHelper(y->_parent, y);
+ IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
+ if (left_merge || right_merge) {
+ AbsorbNewNode(
+ pred, succ, pair, left_merge, right_merge, true);
+ } else {
+ // construct the node
+ Node::Pair mhsp {0, 0};
+ node =
+ new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+ if (!node)
+ return -1;
+ y->_right = node;
+ node->_parent = y;
+ RecalculateMhs(node);
+ }
+ }
+ } else {
+ Node::Pair mhsp {0, 0};
+ node = new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+ if (!node)
+ return -1;
+ root = node;
+ }
+ if (!left_merge && !right_merge) {
+ invariant_notnull(node);
+ node->_color = EColor::RED;
+ return InsertFixup(root, node);
+ }
+ return 0;
+ }
+
+ int Tree::InsertFixup(Node *&root, Node *node) {
+ Node *parent, *gparent;
+ while ((parent = rbn_parent(node)) && rbn_is_red(parent)) {
+ gparent = rbn_parent(parent);
+ if (parent == gparent->_left) {
+ {
+ Node *uncle = gparent->_right;
+ if (uncle && rbn_is_red(uncle)) {
+ rbn_set_black(uncle);
+ rbn_set_black(parent);
+ rbn_set_red(gparent);
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->_right == node) {
+ Node *tmp;
+ LeftRotate(root, parent);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ rbn_set_black(parent);
+ rbn_set_red(gparent);
+ RightRotate(root, gparent);
+ } else {
+ {
+ Node *uncle = gparent->_left;
+ if (uncle && rbn_is_red(uncle)) {
+ rbn_set_black(uncle);
+ rbn_set_black(parent);
+ rbn_set_red(gparent);
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->_left == node) {
+ Node *tmp;
+ RightRotate(root, parent);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+ rbn_set_black(parent);
+ rbn_set_red(gparent);
+ LeftRotate(root, gparent);
+ }
+ }
+ rbn_set_black(root);
+ return 0;
+ }
+
+ int Tree::Insert(Node::BlockPair pair) { return Insert(_root, pair); }
+
+ uint64_t Tree::Remove(size_t size) {
+ Node *node = SearchFirstFitBySize(size);
+ return Remove(_root, node, size);
+ }
+
+ void Tree::RawRemove(Node *&root, Node *node) {
+ Node *child, *parent;
+ EColor color;
+
+ if ((node->_left != NULL) && (node->_right != NULL)) {
+ Node *replace = node;
+ replace = replace->_right;
+ while (replace->_left != NULL)
+ replace = replace->_left;
+
+ if (rbn_parent(node)) {
+ if (rbn_parent(node)->_left == node)
+ rbn_parent(node)->_left = replace;
+ else
+ rbn_parent(node)->_right = replace;
+ } else {
+ root = replace;
+ }
+ child = replace->_right;
+ parent = rbn_parent(replace);
+ color = rbn_color(replace);
+
+ if (parent == node) {
+ parent = replace;
+ } else {
+ if (child)
+ rbn_parent(child) = parent;
+
+ parent->_left = child;
+ rbn_left_mhs(parent) = rbn_right_mhs(replace);
+ RecalculateMhs(parent);
+ replace->_right = node->_right;
+ rbn_set_parent(node->_right, replace);
+ rbn_right_mhs(replace) = rbn_right_mhs(node);
+ }
+
+ replace->_parent = node->_parent;
+ replace->_color = node->_color;
+ replace->_left = node->_left;
+ rbn_left_mhs(replace) = rbn_left_mhs(node);
+ node->_left->_parent = replace;
+ RecalculateMhs(replace);
+ if (color == EColor::BLACK)
+ RawRemoveFixup(root, child, parent);
+ delete node;
+ return;
+ }
+
+ if (node->_left != NULL)
+ child = node->_left;
+ else
+ child = node->_right;
+
+ parent = node->_parent;
+ color = node->_color;
+
+ if (child)
+ child->_parent = parent;
+
+ if (parent) {
+ if (parent->_left == node) {
+ parent->_left = child;
+ rbn_left_mhs(parent) = child ? mhs_of_subtree(child) : 0;
+ } else {
+ parent->_right = child;
+ rbn_right_mhs(parent) = child ? mhs_of_subtree(child) : 0;
+ }
+ RecalculateMhs(parent);
+ } else
+ root = child;
+ if (color == EColor::BLACK)
+ RawRemoveFixup(root, child, parent);
+ delete node;
+ }
+
+ void Tree::RawRemove(uint64_t offset) {
+ Node *node = SearchByOffset(offset);
+ RawRemove(_root, node);
+ }
+ static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
+ return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
+ }
+ uint64_t Tree::Remove(Node *&root, Node *node, size_t size) {
+ OUUInt64 n_offset = rbn_offset(node);
+ OUUInt64 n_size = rbn_size(node);
+ OUUInt64 answer_offset(align(rbn_offset(node).ToInt(), _align));
+
+ invariant((answer_offset + size) <= (n_offset + n_size));
+ if (answer_offset == n_offset) {
+ rbn_offset(node) += size;
+ rbn_size(node) -= size;
+ RecalculateMhs(node);
+ if (rbn_size(node) == 0) {
+ RawRemove(root, node);
+ }
+
+ } else {
+ if (answer_offset + size == n_offset + n_size) {
+ rbn_size(node) -= size;
+ RecalculateMhs(node);
+ } else {
+ // well, cut in the middle...
+ rbn_size(node) = answer_offset - n_offset;
+ RecalculateMhs(node);
+ Insert(_root,
+ {(answer_offset + size),
+ (n_offset + n_size) - (answer_offset + size)});
+ }
+ }
+ return answer_offset.ToInt();
+ }
+
+ void Tree::RawRemoveFixup(Node *&root, Node *node, Node *parent) {
+ Node *other;
+ while ((!node || rbn_is_black(node)) && node != root) {
+ if (parent->_left == node) {
+ other = parent->_right;
+ if (rbn_is_red(other)) {
+ // Case 1: the brother of X, w, is read
+ rbn_set_black(other);
+ rbn_set_red(parent);
+ LeftRotate(root, parent);
+ other = parent->_right;
+ }
+ if ((!other->_left || rbn_is_black(other->_left)) &&
+ (!other->_right || rbn_is_black(other->_right))) {
+ // Case 2: w is black and both of w's children are black
+ rbn_set_red(other);
+ node = parent;
+ parent = rbn_parent(node);
+ } else {
+ if (!other->_right || rbn_is_black(other->_right)) {
+ // Case 3: w is black and left child of w is red but
+ // right
+ // child is black
+ rbn_set_black(other->_left);
+ rbn_set_red(other);
+ RightRotate(root, other);
+ other = parent->_right;
+ }
+ // Case 4: w is black and right child of w is red,
+ // regardless of
+ // left child's color
+ rbn_set_color(other, rbn_color(parent));
+ rbn_set_black(parent);
+ rbn_set_black(other->_right);
+ LeftRotate(root, parent);
+ node = root;
+ break;
+ }
+ } else {
+ other = parent->_left;
+ if (rbn_is_red(other)) {
+ // Case 1: w is red
+ rbn_set_black(other);
+ rbn_set_red(parent);
+ RightRotate(root, parent);
+ other = parent->_left;
+ }
+ if ((!other->_left || rbn_is_black(other->_left)) &&
+ (!other->_right || rbn_is_black(other->_right))) {
+ // Case 2: w is black and both children are black
+ rbn_set_red(other);
+ node = parent;
+ parent = rbn_parent(node);
+ } else {
+ if (!other->_left || rbn_is_black(other->_left)) {
+ // Case 3: w is black and left child of w is red whereas
+ // right child is black
+ rbn_set_black(other->_right);
+ rbn_set_red(other);
+ LeftRotate(root, other);
+ other = parent->_left;
+ }
+ // Case 4:w is black and right child of w is red, regardless
+ // of
+ // the left child's color
+ rbn_set_color(other, rbn_color(parent));
+ rbn_set_black(parent);
+ rbn_set_black(other->_left);
+ RightRotate(root, parent);
+ node = root;
+ break;
+ }
+ }
+ }
+ if (node)
+ rbn_set_black(node);
+ }
+
+ void Tree::Destroy(Node *&tree) {
+ if (tree == NULL)
+ return;
+
+ if (tree->_left != NULL)
+ Destroy(tree->_left);
+ if (tree->_right != NULL)
+ Destroy(tree->_right);
+
+ delete tree;
+ tree = NULL;
+ }
+
+ void Tree::Destroy() { Destroy(_root); }
+
+ void Tree::Dump(Node *tree, Node::BlockPair pair, EDirection dir) {
+ if (tree != NULL) {
+ if (dir == EDirection::NONE)
+ fprintf(stderr,
+ "(%" PRIu64 ",%" PRIu64 ", mhs:(%" PRIu64 ",%" PRIu64
+ "))(B) is root\n",
+ rbn_offset(tree).ToInt(),
+ rbn_size(tree).ToInt(),
+ rbn_left_mhs(tree),
+ rbn_right_mhs(tree));
+ else
+ fprintf(stderr,
+ "(%" PRIu64 ",%" PRIu64 ",mhs:(%" PRIu64 ",%" PRIu64
+ "))(%c) is %" PRIu64 "'s %s\n",
+ rbn_offset(tree).ToInt(),
+ rbn_size(tree).ToInt(),
+ rbn_left_mhs(tree),
+ rbn_right_mhs(tree),
+ rbn_is_red(tree) ? 'R' : 'B',
+ pair._offset.ToInt(),
+ dir == EDirection::RIGHT ? "right child" : "left child");
+
+ Dump(tree->_left, tree->_hole, EDirection::LEFT);
+ Dump(tree->_right, tree->_hole, EDirection::RIGHT);
+ }
+ }
+
+ uint64_t Tree::EffectiveSize(Node *node) {
+ OUUInt64 offset = rbn_offset(node);
+ OUUInt64 size = rbn_size(node);
+ OUUInt64 end = offset + size;
+ OUUInt64 aligned_offset(align(offset.ToInt(), _align));
+ if (aligned_offset > end) {
+ return 0;
+ }
+ return (end - aligned_offset).ToInt();
+ }
+
+ void Tree::Dump() {
+ if (_root != NULL)
+ Dump(_root, _root->_hole, (EDirection)0);
+ }
+
+ static void vis_bal_f(void *extra, Node *node, uint64_t depth) {
+ uint64_t **p = (uint64_t **)extra;
+ uint64_t min = *p[0];
+ uint64_t max = *p[1];
+ if (node->_left) {
+ Node *left = node->_left;
+ invariant(node == left->_parent);
+ }
+
+ if (node->_right) {
+ Node *right = node->_right;
+ invariant(node == right->_parent);
+ }
+
+ if (!node->_left || !node->_right) {
+ if (min > depth) {
+ *p[0] = depth;
+ } else if (max < depth) {
+ *p[1] = depth;
+ }
+ }
+ }
+
+ void Tree::ValidateBalance() {
+ uint64_t min_depth = 0xffffffffffffffff;
+ uint64_t max_depth = 0;
+ if (!_root) {
+ return;
+ }
+ uint64_t *p[2] = {&min_depth, &max_depth};
+ InOrderVisitor(vis_bal_f, (void *)p);
+ invariant((min_depth + 1) * 2 >= max_depth + 1);
+ }
+
+ static void vis_cmp_f(void *extra, Node *node, uint64_t UU(depth)) {
+ Node::BlockPair **p = (Node::BlockPair **)extra;
+
+ invariant_notnull(*p);
+ invariant((*p)->_offset == node->_hole._offset);
+
+ *p = *p + 1;
+ }
+
+ // validate the input pairs matches with sorted pairs
+ void Tree::ValidateInOrder(Node::BlockPair *pairs) {
+ InOrderVisitor(vis_cmp_f, &pairs);
+ }
+
+ uint64_t Tree::ValidateMhs(Node *node) {
+ if (!node)
+ return 0;
+ else {
+ uint64_t mhs_left = ValidateMhs(node->_left);
+ uint64_t mhs_right = ValidateMhs(node->_right);
+ if (mhs_left != rbn_left_mhs(node)) {
+ printf("assert failure: mhs_left = %" PRIu64 "\n", mhs_left);
+ Dump(node, node->_hole, (EDirection)0);
+ }
+ invariant(mhs_left == rbn_left_mhs(node));
+
+ if (mhs_right != rbn_right_mhs(node)) {
+ printf("assert failure: mhs_right = %" PRIu64 "\n", mhs_right);
+ Dump(node, node->_hole, (EDirection)0);
+ }
+ invariant(mhs_right == rbn_right_mhs(node));
+ return std::max(EffectiveSize(node), std::max(mhs_left, mhs_right));
+ }
+ }
+
+ void Tree::ValidateMhs() {
+ if (!_root)
+ return;
+ uint64_t mhs_left = ValidateMhs(_root->_left);
+ uint64_t mhs_right = ValidateMhs(_root->_right);
+ invariant(mhs_left == rbn_left_mhs(_root));
+ invariant(mhs_right == rbn_right_mhs(_root));
+ }
+
+} // namespace MhsRbTree
diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
new file mode 100644
index 00000000000..92f1e278e1a
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
@@ -0,0 +1,351 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License, version 3,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <db.h>
+
+#include "portability/toku_pthread.h"
+#include "portability/toku_stdint.h"
+#include "portability/toku_stdlib.h"
+
+// RBTree(Red-black tree) with max hole sizes for subtrees.
+
+// This is a tentative data struct to improve the block allocation time
+// complexity from the linear time to the log time. Please be noted this DS only
+// supports first-fit for now. It is actually easier to do it with
+// best-fit.(just
+// sort by size).
+
+// RBTree is a classic data struct with O(log(n)) for insertion, deletion and
+// search. Many years have seen its efficiency.
+
+// a *hole* is the representation of an available BlockPair for allocation.
+// defined as (start_address,size) or (offset, size) interchangably.
+
+// each node has a *label* to indicate a pair of the max hole sizes for its
+// subtree.
+
+// We are implementing a RBTree with max hole sizes for subtree. It is a red
+// black tree that is sorted by the start_address but also labeld with the max
+// hole sizes of the subtrees.
+
+// [(6,3)] -> [(offset, size)], the hole
+// [{2,5}] -> [{mhs_of_left, mhs_of_right}], the label
+/* / \ */
+// [(0, 1)] [(10, 5)]
+// [{0, 2}] [{0, 0}]
+/* \ */
+// [(3, 2)]
+// [{0, 0}]
+// request of allocation size=2 goes from root to [(3,2)].
+
+// above example shows a simplified RBTree_max_holes.
+// it is easier to tell the search time is O(log(n)) as we can make a decision
+// on each descent until we get to the target.
+
+// the only question is if we can keep the maintenance cost low -- and i think
+// it is not a problem becoz an insertion/deletion is only going to update the
+// max_hole_sizes of the nodes along the path from the root to the node to be
+// deleted/inserted. The path can be cached and search is anyway O(log(n)).
+
+// unlike the typical rbtree, Tree has to handle the inserts and deletes
+// with more care: an allocation that triggers the delete might leave some
+// unused space which we can simply update the start_addr and size without
+// worrying overlapping. An free might not only mean the insertion but also
+// *merging* with the adjacent holes.
+
+namespace MhsRbTree {
+
+#define offset_t uint64_t
+ enum class EColor { RED, BLACK };
+ enum class EDirection { NONE = 0, LEFT, RIGHT };
+
+ // I am a bit tired of fixing overflow/underflow, just quickly craft some
+ // int
+ // class that has an infinity-like max value and prevents overflow and
+ // underflow. If you got a file offset larger than MHS_MAX_VAL, it is not
+ // a problem here. :-/ - JYM
+ class OUUInt64 {
+ public:
+ static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff;
+ OUUInt64() : _value(0) {}
+ OUUInt64(uint64_t s) : _value(s) {}
+ bool operator<(const OUUInt64 &r) const {
+ invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+ return _value < r.ToInt();
+ }
+ bool operator>(const OUUInt64 &r) const {
+ invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+ return _value > r.ToInt();
+ }
+ bool operator<=(const OUUInt64 &r) const {
+ invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+ return _value <= r.ToInt();
+ }
+ bool operator>=(const OUUInt64 &r) const {
+ invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+ return _value >= r.ToInt();
+ }
+ OUUInt64 operator+(const OUUInt64 &r) const {
+ if (_value == MHS_MAX_VAL || r.ToInt() == MHS_MAX_VAL) {
+ OUUInt64 tmp(MHS_MAX_VAL);
+ return tmp;
+ } else {
+ // detecting overflow
+ invariant((MHS_MAX_VAL - _value) >= r.ToInt());
+ uint64_t plus = _value + r.ToInt();
+ OUUInt64 tmp(plus);
+ return tmp;
+ }
+ }
+ OUUInt64 operator-(const OUUInt64 &r) const {
+ invariant(r.ToInt() != MHS_MAX_VAL);
+ if (_value == MHS_MAX_VAL) {
+ return *this;
+ } else {
+ invariant(_value >= r.ToInt());
+ uint64_t minus = _value - r.ToInt();
+ OUUInt64 tmp(minus);
+ return tmp;
+ }
+ }
+ OUUInt64 operator-=(const OUUInt64 &r) {
+ if (_value != MHS_MAX_VAL) {
+ invariant(r.ToInt() != MHS_MAX_VAL);
+ invariant(_value >= r.ToInt());
+ _value -= r.ToInt();
+ }
+ return *this;
+ }
+ OUUInt64 operator+=(const OUUInt64 &r) {
+ if (_value != MHS_MAX_VAL) {
+ if (r.ToInt() == MHS_MAX_VAL) {
+ _value = MHS_MAX_VAL;
+ } else {
+ invariant((MHS_MAX_VAL - _value) >= r.ToInt());
+ this->_value += r.ToInt();
+ }
+ }
+ return *this;
+ }
+ bool operator==(const OUUInt64 &r) const {
+ return _value == r.ToInt();
+ }
+ bool operator!=(const OUUInt64 &r) const {
+ return _value != r.ToInt();
+ }
+ OUUInt64 operator=(const OUUInt64 &r) {
+ _value = r.ToInt();
+ return *this;
+ }
+ uint64_t ToInt() const { return _value; }
+
+ private:
+ uint64_t _value;
+ };
+
+ class Node {
+ public:
+ struct BlockPair {
+ OUUInt64 _offset;
+ OUUInt64 _size;
+
+ BlockPair() : _offset(0), _size(0) {}
+ BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
+
+ BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {}
+ int operator<(const struct BlockPair &rhs) const {
+ return _offset < rhs._offset;
+ }
+ int operator<(const uint64_t &o) const { return _offset < o; }
+ };
+
+ struct Pair {
+ uint64_t _left;
+ uint64_t _right;
+ Pair(uint64_t l, uint64_t r) : _left(l), _right(r) {}
+ };
+
+ EColor _color;
+ struct BlockPair _hole;
+ struct Pair _label;
+ Node *_left;
+ Node *_right;
+ Node *_parent;
+
+ Node(EColor c,
+ Node::BlockPair h,
+ struct Pair lb,
+ Node *l,
+ Node *r,
+ Node *p)
+ : _color(c),
+ _hole(h),
+ _label(lb),
+ _left(l),
+ _right(r),
+ _parent(p) {}
+ };
+
+ class Tree {
+ private:
+ Node *_root;
+ uint64_t _align;
+
+ public:
+ Tree();
+ Tree(uint64_t);
+ ~Tree();
+
+ void PreOrder();
+ void InOrder();
+ void PostOrder();
+ // immutable operations
+ Node *SearchByOffset(uint64_t addr);
+ Node *SearchFirstFitBySize(uint64_t size);
+
+ Node *MinNode();
+ Node *MaxNode();
+
+ Node *Successor(Node *);
+ Node *Predecessor(Node *);
+
+ // mapped from tree_allocator::free_block
+ int Insert(Node::BlockPair pair);
+ // mapped from tree_allocator::alloc_block
+ uint64_t Remove(size_t size);
+ // mapped from tree_allocator::alloc_block_after
+
+ void RawRemove(uint64_t offset);
+ void Destroy();
+ // print the tree
+ void Dump();
+ // validation
+ // balance
+ void ValidateBalance();
+ void ValidateInOrder(Node::BlockPair *);
+ void InOrderVisitor(void (*f)(void *, Node *, uint64_t), void *);
+ void ValidateMhs();
+
+ private:
+ void PreOrder(Node *node) const;
+ void InOrder(Node *node) const;
+ void PostOrder(Node *node) const;
+ Node *SearchByOffset(Node *node, offset_t addr) const;
+ Node *SearchFirstFitBySize(Node *node, size_t size) const;
+
+ Node *MinNode(Node *node);
+ Node *MaxNode(Node *node);
+
+ // rotations to fix up. we will have to update the labels too.
+ void LeftRotate(Node *&root, Node *x);
+ void RightRotate(Node *&root, Node *y);
+
+ int Insert(Node *&root, Node::BlockPair pair);
+ int InsertFixup(Node *&root, Node *node);
+
+ void RawRemove(Node *&root, Node *node);
+ uint64_t Remove(Node *&root, Node *node, size_t size);
+ void RawRemoveFixup(Node *&root, Node *node, Node *parent);
+
+ void Destroy(Node *&tree);
+ void Dump(Node *tree, Node::BlockPair pair, EDirection dir);
+ void RecalculateMhs(Node *node);
+ void IsNewNodeMergable(Node *, Node *, Node::BlockPair, bool *, bool *);
+ void AbsorbNewNode(Node *, Node *, Node::BlockPair, bool, bool, bool);
+ Node *SearchFirstFitBySizeHelper(Node *x, uint64_t size);
+
+ Node *SuccessorHelper(Node *y, Node *x);
+
+ Node *PredecessorHelper(Node *y, Node *x);
+
+ void InOrderVisitor(Node *,
+ void (*f)(void *, Node *, uint64_t),
+ void *,
+ uint64_t);
+ uint64_t ValidateMhs(Node *);
+
+ uint64_t EffectiveSize(Node *);
+// mixed with some macros.....
+#define rbn_parent(r) ((r)->_parent)
+#define rbn_color(r) ((r)->_color)
+#define rbn_is_red(r) ((r)->_color == EColor::RED)
+#define rbn_is_black(r) ((r)->_color == EColor::BLACK)
+#define rbn_set_black(r) \
+ do { \
+ (r)->_color = EColor::BLACK; \
+ } while (0)
+#define rbn_set_red(r) \
+ do { \
+ (r)->_color = EColor::RED; \
+ } while (0)
+#define rbn_set_parent(r, p) \
+ do { \
+ (r)->_parent = (p); \
+ } while (0)
+#define rbn_set_color(r, c) \
+ do { \
+ (r)->_color = (c); \
+ } while (0)
+#define rbn_set_offset(r) \
+ do { \
+ (r)->_hole._offset = (c); \
+ } while (0)
+#define rbn_set_size(r, c) \
+ do { \
+ (r)->_hole._size = (c); \
+ } while (0)
+#define rbn_set_left_mhs(r, c) \
+ do { \
+ (r)->_label._left = (c); \
+ } while (0)
+#define rbn_set_right_mhs(r, c) \
+ do { \
+ (r)->_label._right = (c); \
+ } while (0)
+#define rbn_size(r) ((r)->_hole._size)
+#define rbn_offset(r) ((r)->_hole._offset)
+#define rbn_key(r) ((r)->_hole._offset)
+#define rbn_left_mhs(r) ((r)->_label._left)
+#define rbn_right_mhs(r) ((r)->_label._right)
+#define mhs_of_subtree(y) \
+ (std::max(std::max(rbn_left_mhs(y), rbn_right_mhs(y)), EffectiveSize(y)))
+ };
+
+} // namespace MhsRbTree
diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
deleted file mode 100644
index 3670ef81cc2..00000000000
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License, version 2,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License, version 3,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-#include "ft/tests/test.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static const uint64_t alignment = 4096;
-
-static void test_first_vs_best_fit(void) {
- struct block_allocator::blockpair pairs[] = {
- block_allocator::blockpair(1 * alignment, 6 * alignment),
- // hole between 7x align -> 8x align
- block_allocator::blockpair(8 * alignment, 4 * alignment),
- // hole between 12x align -> 16x align
- block_allocator::blockpair(16 * alignment, 1 * alignment),
- block_allocator::blockpair(17 * alignment, 2 * alignment),
- // hole between 19 align -> 21x align
- block_allocator::blockpair(21 * alignment, 2 * alignment),
- };
- const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-
- block_allocator::blockpair *bp;
-
- // first fit
- bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment);
- assert(bp == &pairs[0]);
- bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment);
- assert(bp == &pairs[0]);
- bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment);
- assert(bp == &pairs[1]);
- bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment);
- assert(bp == nullptr);
-
- // best fit
- bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment);
- assert(bp == &pairs[0]);
- bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment);
- assert(bp == &pairs[3]);
- bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment);
- assert(bp == &pairs[1]);
- bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment);
- assert(bp == nullptr);
-}
-
-static void test_padded_fit(void) {
- struct block_allocator::blockpair pairs[] = {
- block_allocator::blockpair(1 * alignment, 1 * alignment),
- // 4096 byte hole after bp[0]
- block_allocator::blockpair(3 * alignment, 1 * alignment),
- // 8192 byte hole after bp[1]
- block_allocator::blockpair(6 * alignment, 1 * alignment),
- // 16384 byte hole after bp[2]
- block_allocator::blockpair(11 * alignment, 1 * alignment),
- // 32768 byte hole after bp[3]
- block_allocator::blockpair(17 * alignment, 1 * alignment),
- // 116kb hole after bp[4]
- block_allocator::blockpair(113 * alignment, 1 * alignment),
- // 256kb hole after bp[5]
- block_allocator::blockpair(371 * alignment, 1 * alignment),
- };
- const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-
- block_allocator::blockpair *bp;
-
- // padding for a 100 byte allocation will be < than standard alignment,
- // so it should fit in the first 4096 byte hole.
- bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment);
- assert(bp == &pairs[0]);
-
- // Even padded, a 12kb alloc will fit in a 16kb hole
- bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment);
- assert(bp == &pairs[2]);
-
- // would normally fit in the 116kb hole but the padding will bring it over
- bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment);
- assert(bp == &pairs[5]);
-
- bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment);
- assert(bp == &pairs[5]);
-}
-
-int test_main(int argc, const char *argv[]) {
- (void) argc;
- (void) argv;
-
- test_first_vs_best_fit();
- test_padded_fit();
-
- return 0;
-}
diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
index d80ee83cbc9..3eff52b915d 100644
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
@@ -38,253 +38,243 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "test.h"
-static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
- ba->validate();
+static void ba_alloc(BlockAllocator *ba, uint64_t size, uint64_t *answer) {
+ ba->Validate();
uint64_t actual_answer;
- const uint64_t heat = random() % 2;
- ba->alloc_block(512 * size, heat, &actual_answer);
- ba->validate();
+ ba->AllocBlock(512 * size, &actual_answer);
+ ba->Validate();
- assert(actual_answer%512==0);
- *answer = actual_answer/512;
+ invariant(actual_answer % 512 == 0);
+ *answer = actual_answer / 512;
}
-static void ba_free(block_allocator *ba, uint64_t offset) {
- ba->validate();
- ba->free_block(offset * 512);
- ba->validate();
+static void ba_free(BlockAllocator *ba, uint64_t offset, uint64_t size) {
+ ba->Validate();
+ ba->FreeBlock(offset * 512, 512 * size);
+ ba->Validate();
}
-static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order,
- uint64_t expected_offset, uint64_t expected_size) {
+static void ba_check_l(BlockAllocator *ba,
+ uint64_t blocknum_in_layout_order,
+ uint64_t expected_offset,
+ uint64_t expected_size) {
uint64_t actual_offset, actual_size;
- int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
- assert(r==0);
- assert(expected_offset*512 == actual_offset);
- assert(expected_size *512 == actual_size);
+ int r = ba->NthBlockInLayoutOrder(
+ blocknum_in_layout_order, &actual_offset, &actual_size);
+ invariant(r == 0);
+ invariant(expected_offset * 512 == actual_offset);
+ invariant(expected_size * 512 == actual_size);
}
-static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) {
+static void ba_check_none(BlockAllocator *ba,
+ uint64_t blocknum_in_layout_order) {
uint64_t actual_offset, actual_size;
- int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
- assert(r==-1);
+ int r = ba->NthBlockInLayoutOrder(
+ blocknum_in_layout_order, &actual_offset, &actual_size);
+ invariant(r == -1);
}
-
// Simple block allocator test
-static void test_ba0(block_allocator::allocation_strategy strategy) {
- block_allocator allocator;
- block_allocator *ba = &allocator;
- ba->create(100*512, 1*512);
- ba->set_strategy(strategy);
- assert(ba->allocated_limit()==100*512);
+static void test_ba0() {
+ BlockAllocator allocator;
+ BlockAllocator *ba = &allocator;
+ ba->Create(100 * 512, 1 * 512);
+ invariant(ba->AllocatedLimit() == 100 * 512);
uint64_t b2, b3, b4, b5, b6, b7;
- ba_alloc(ba, 100, &b2);
- ba_alloc(ba, 100, &b3);
- ba_alloc(ba, 100, &b4);
- ba_alloc(ba, 100, &b5);
- ba_alloc(ba, 100, &b6);
- ba_alloc(ba, 100, &b7);
- ba_free(ba, b2);
- ba_alloc(ba, 100, &b2);
- ba_free(ba, b4);
- ba_free(ba, b6);
+ ba_alloc(ba, 100, &b2);
+ ba_alloc(ba, 100, &b3);
+ ba_alloc(ba, 100, &b4);
+ ba_alloc(ba, 100, &b5);
+ ba_alloc(ba, 100, &b6);
+ ba_alloc(ba, 100, &b7);
+ ba_free(ba, b2, 100);
+ ba_alloc(ba, 100, &b2);
+ ba_free(ba, b4, 100);
+ ba_free(ba, b6, 100);
uint64_t b8, b9;
- ba_alloc(ba, 100, &b4);
- ba_free(ba, b2);
- ba_alloc(ba, 100, &b6);
- ba_alloc(ba, 100, &b8);
- ba_alloc(ba, 100, &b9);
- ba_free(ba, b6);
- ba_free(ba, b7);
- ba_free(ba, b8);
- ba_alloc(ba, 100, &b6);
- ba_alloc(ba, 100, &b7);
- ba_free(ba, b4);
- ba_alloc(ba, 100, &b4);
-
- ba->destroy();
+ ba_alloc(ba, 100, &b4);
+ ba_free(ba, b2, 100);
+ ba_alloc(ba, 100, &b6);
+ ba_alloc(ba, 100, &b8);
+ ba_alloc(ba, 100, &b9);
+ ba_free(ba, b6, 100);
+ ba_free(ba, b7, 100);
+ ba_free(ba, b8, 100);
+ ba_alloc(ba, 100, &b6);
+ ba_alloc(ba, 100, &b7);
+ ba_free(ba, b4, 100);
+ ba_alloc(ba, 100, &b4);
+
+ ba->Destroy();
}
// Manually to get coverage of all the code in the block allocator.
-static void
-test_ba1(block_allocator::allocation_strategy strategy, int n_initial) {
- block_allocator allocator;
- block_allocator *ba = &allocator;
- ba->create(0*512, 1*512);
- ba->set_strategy(strategy);
-
- int n_blocks=0;
+static void test_ba1(int n_initial) {
+ BlockAllocator allocator;
+ BlockAllocator *ba = &allocator;
+ ba->Create(0 * 512, 1 * 512);
+
+ int n_blocks = 0;
uint64_t blocks[1000];
for (int i = 0; i < 1000; i++) {
- if (i < n_initial || random() % 2 == 0) {
- if (n_blocks < 1000) {
- ba_alloc(ba, 1, &blocks[n_blocks]);
- //printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
- n_blocks++;
- }
- } else {
- if (n_blocks > 0) {
- int blocknum = random()%n_blocks;
- //printf("F[%d]%ld\n", blocknum, blocks[blocknum]);
- ba_free(ba, blocks[blocknum]);
- blocks[blocknum]=blocks[n_blocks-1];
- n_blocks--;
- }
- }
+ if (i < n_initial || random() % 2 == 0) {
+ if (n_blocks < 1000) {
+ ba_alloc(ba, 1, &blocks[n_blocks]);
+ // printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
+ n_blocks++;
+ }
+ } else {
+ if (n_blocks > 0) {
+ int blocknum = random() % n_blocks;
+ // printf("F[%d]=%ld\n", blocknum, blocks[blocknum]);
+ ba_free(ba, blocks[blocknum], 1);
+ blocks[blocknum] = blocks[n_blocks - 1];
+ n_blocks--;
+ }
+ }
}
-
- ba->destroy();
+
+ ba->Destroy();
}
-
+
// Check to see if it is first fit or best fit.
-static void
-test_ba2 (void)
-{
- block_allocator allocator;
- block_allocator *ba = &allocator;
+static void test_ba2(void) {
+ BlockAllocator allocator;
+ BlockAllocator *ba = &allocator;
uint64_t b[6];
enum { BSIZE = 1024 };
- ba->create(100*512, BSIZE*512);
- ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT);
- assert(ba->allocated_limit()==100*512);
-
- ba_check_l (ba, 0, 0, 100);
- ba_check_none (ba, 1);
-
- ba_alloc (ba, 100, &b[0]);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_none (ba, 2);
-
- ba_alloc (ba, BSIZE + 100, &b[1]);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_none (ba, 3);
-
- ba_alloc (ba, 100, &b[2]);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_none (ba, 4);
-
- ba_alloc (ba, 100, &b[3]);
- ba_alloc (ba, 100, &b[4]);
- ba_alloc (ba, 100, &b[5]);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_l (ba, 4, 5*BSIZE, 100);
- ba_check_l (ba, 5, 6*BSIZE, 100);
- ba_check_l (ba, 6, 7*BSIZE, 100);
- ba_check_none (ba, 7);
-
- ba_free (ba, 4*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 5*BSIZE, 100);
- ba_check_l (ba, 4, 6*BSIZE, 100);
- ba_check_l (ba, 5, 7*BSIZE, 100);
- ba_check_none (ba, 6);
+ ba->Create(100 * 512, BSIZE * 512);
+ invariant(ba->AllocatedLimit() == 100 * 512);
+
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_none(ba, 1);
+
+ ba_alloc(ba, 100, &b[0]);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_none(ba, 2);
+
+ ba_alloc(ba, BSIZE + 100, &b[1]);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_none(ba, 3);
+
+ ba_alloc(ba, 100, &b[2]);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_none(ba, 4);
+
+ ba_alloc(ba, 100, &b[3]);
+ ba_alloc(ba, 100, &b[4]);
+ ba_alloc(ba, 100, &b[5]);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_l(ba, 4, 5 * BSIZE, 100);
+ ba_check_l(ba, 5, 6 * BSIZE, 100);
+ ba_check_l(ba, 6, 7 * BSIZE, 100);
+ ba_check_none(ba, 7);
+
+ ba_free(ba, 4 * BSIZE, 100);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 5 * BSIZE, 100);
+ ba_check_l(ba, 4, 6 * BSIZE, 100);
+ ba_check_l(ba, 5, 7 * BSIZE, 100);
+ ba_check_none(ba, 6);
uint64_t b2;
ba_alloc(ba, 100, &b2);
- assert(b2==4*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_l (ba, 4, 5*BSIZE, 100);
- ba_check_l (ba, 5, 6*BSIZE, 100);
- ba_check_l (ba, 6, 7*BSIZE, 100);
- ba_check_none (ba, 7);
-
- ba_free (ba, BSIZE);
- ba_free (ba, 5*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 2, 4*BSIZE, 100);
- ba_check_l (ba, 3, 6*BSIZE, 100);
- ba_check_l (ba, 4, 7*BSIZE, 100);
- ba_check_none (ba, 5);
-
- // This alloc will allocate the first block after the reserve space in the case of first fit.
+ invariant(b2 == 4 * BSIZE);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_l(ba, 4, 5 * BSIZE, 100);
+ ba_check_l(ba, 5, 6 * BSIZE, 100);
+ ba_check_l(ba, 6, 7 * BSIZE, 100);
+ ba_check_none(ba, 7);
+
+ ba_free(ba, BSIZE, 100);
+ ba_free(ba, 5 * BSIZE, 100);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 2, 4 * BSIZE, 100);
+ ba_check_l(ba, 3, 6 * BSIZE, 100);
+ ba_check_l(ba, 4, 7 * BSIZE, 100);
+ ba_check_none(ba, 5);
+
+ // This alloc will allocate the first block after the reserve space in the
+ // case of first fit.
uint64_t b3;
ba_alloc(ba, 100, &b3);
- assert(b3== BSIZE); // First fit.
+ invariant(b3 == BSIZE); // First fit.
// if (b3==5*BSIZE) then it is next fit.
// Now 5*BSIZE is free
uint64_t b5;
ba_alloc(ba, 100, &b5);
- assert(b5==5*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_l (ba, 4, 5*BSIZE, 100);
- ba_check_l (ba, 5, 6*BSIZE, 100);
- ba_check_l (ba, 6, 7*BSIZE, 100);
- ba_check_none (ba, 7);
+ invariant(b5 == 5 * BSIZE);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_l(ba, 4, 5 * BSIZE, 100);
+ ba_check_l(ba, 5, 6 * BSIZE, 100);
+ ba_check_l(ba, 6, 7 * BSIZE, 100);
+ ba_check_none(ba, 7);
// Now all blocks are busy
uint64_t b6, b7, b8;
ba_alloc(ba, 100, &b6);
ba_alloc(ba, 100, &b7);
ba_alloc(ba, 100, &b8);
- assert(b6==8*BSIZE);
- assert(b7==9*BSIZE);
- assert(b8==10*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_l (ba, 4, 5*BSIZE, 100);
- ba_check_l (ba, 5, 6*BSIZE, 100);
- ba_check_l (ba, 6, 7*BSIZE, 100);
- ba_check_l (ba, 7, 8*BSIZE, 100);
- ba_check_l (ba, 8, 9*BSIZE, 100);
- ba_check_l (ba, 9, 10*BSIZE, 100);
- ba_check_none (ba, 10);
-
- ba_free(ba, 9*BSIZE);
- ba_free(ba, 7*BSIZE);
+ invariant(b6 == 8 * BSIZE);
+ invariant(b7 == 9 * BSIZE);
+ invariant(b8 == 10 * BSIZE);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_l(ba, 4, 5 * BSIZE, 100);
+ ba_check_l(ba, 5, 6 * BSIZE, 100);
+ ba_check_l(ba, 6, 7 * BSIZE, 100);
+ ba_check_l(ba, 7, 8 * BSIZE, 100);
+ ba_check_l(ba, 8, 9 * BSIZE, 100);
+ ba_check_l(ba, 9, 10 * BSIZE, 100);
+ ba_check_none(ba, 10);
+
+ ba_free(ba, 9 * BSIZE, 100);
+ ba_free(ba, 7 * BSIZE, 100);
uint64_t b9;
ba_alloc(ba, 100, &b9);
- assert(b9==7*BSIZE);
+ invariant(b9 == 7 * BSIZE);
- ba_free(ba, 5*BSIZE);
- ba_free(ba, 2*BSIZE);
+ ba_free(ba, 5 * BSIZE, 100);
+ ba_free(ba, 2 * BSIZE, BSIZE + 100);
uint64_t b10, b11;
ba_alloc(ba, 100, &b10);
- assert(b10==2*BSIZE);
+ invariant(b10 == 2 * BSIZE);
ba_alloc(ba, 100, &b11);
- assert(b11==3*BSIZE);
+ invariant(b11 == 3 * BSIZE);
ba_alloc(ba, 100, &b11);
- assert(b11==5*BSIZE);
+ invariant(b11 == 5 * BSIZE);
- ba->destroy();
+ ba->Destroy();
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
- enum block_allocator::allocation_strategy strategies[] = {
- block_allocator::BA_STRATEGY_FIRST_FIT,
- block_allocator::BA_STRATEGY_BEST_FIT,
- block_allocator::BA_STRATEGY_PADDED_FIT,
- block_allocator::BA_STRATEGY_HEAT_ZONE,
- };
- for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) {
- test_ba0(strategies[i]);
- test_ba1(strategies[i], 0);
- test_ba1(strategies[i], 10);
- test_ba1(strategies[i], 20);
- }
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
+ test_ba0();
+ test_ba1(0);
+ test_ba1(10);
+ test_ba1(20);
test_ba2();
return 0;
}
diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
index a7c48ef709a..ee68ab3ef0b 100644
--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
@@ -45,7 +45,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// #5978 is fixed. Here is what we do. We have four pairs with
// blocknums and fullhashes of 1,2,3,4. The cachetable has only
// two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4.
-// We pin all four with expensive write locks. Then, on backgroud threads,
+// We pin all four with expensive write locks. Then, on background threads,
// we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and
// we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this
// enough times, and we should see a deadlock before the fix, and no deadlock
diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
index be4bae898be..51cf70c3e76 100644
--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
@@ -77,7 +77,7 @@ flush (
//
// test the following things for simple cloning:
-// - verifies that after teh checkpoint ends, the PAIR is properly
+// - verifies that after the checkpoint ends, the PAIR is properly
// dirty or clean based on the second unpin
//
static void
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
index cb03a23e0fc..7abd2267a7e 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
@@ -38,69 +38,72 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "test.h"
-static int
-int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) {
- int64_t x = *(int64_t *) a->data;
- int64_t y = *(int64_t *) b->data;
-
- if (x<y) return -1;
- if (x>y) return 1;
+static int int64_key_cmp(DB *db UU(), const DBT *a, const DBT *b) {
+ int64_t x = *(int64_t *)a->data;
+ int64_t y = *(int64_t *)b->data;
+
+ if (x < y)
+ return -1;
+ if (x > y)
+ return 1;
return 0;
}
-static void
-test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
+static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
int r;
FT_CURSOR XMALLOC(cursor);
FTNODE dn = NULL;
PAIR_ATTR attr;
-
+
// first test that prefetching everything should work
- memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
- memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
+ memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
+ memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
cursor->left_is_neg_infty = true;
cursor->right_is_pos_infty = true;
cursor->disable_prefetching = false;
-
+
ftnode_fetch_extra bfe;
// quick test to see that we have the right behavior when we set
// disable_prefetching to true
cursor->disable_prefetching = true;
- bfe.create_for_prefetch( ft_h, cursor);
+ bfe.create_for_prefetch(ft_h, cursor);
FTNODE_DISK_DATA ndd = NULL;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
// now enable prefetching again
cursor->disable_prefetching = false;
-
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_COMPRESSED);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
@@ -108,21 +111,23 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
uint64_t left_key = 150;
toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t));
cursor->left_is_neg_infty = false;
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
@@ -130,63 +135,69 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
uint64_t right_key = 151;
toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t));
cursor->right_is_pos_infty = false;
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
left_key = 100000;
right_key = 100000;
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
bfe.destroy();
toku_free(ndd);
toku_ftnode_free(&dn);
left_key = 100;
right_key = 100;
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_COMPRESSED);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
@@ -194,20 +205,19 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
toku_free(cursor);
}
-static void
-test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
+static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
int r;
FT_CURSOR XMALLOC(cursor);
FTNODE dn = NULL;
FTNODE_DISK_DATA ndd = NULL;
PAIR_ATTR attr;
-
+
// first test that prefetching everything should work
- memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
- memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
+ memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
+ memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
cursor->left_is_neg_infty = true;
cursor->right_is_pos_infty = true;
-
+
uint64_t left_key = 150;
uint64_t right_key = 151;
DBT left, right;
@@ -216,101 +226,106 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
ftnode_fetch_extra bfe;
bfe.create_for_subset_read(
- ft_h,
- NULL,
- &left,
- &right,
- false,
- false,
- false,
- false
- );
-
+ ft_h, NULL, &left, &right, false, false, false, false);
+
// fake the childnum to read
// set disable_prefetching ON
bfe.child_to_read = 2;
bfe.disable_prefetching = true;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- // need to call this twice because we had a subset read before, that touched the clock
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ // need to call this twice because we had a subset read before, that touched
+ // the clock
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
toku_ftnode_free(&dn);
toku_free(ndd);
// fake the childnum to read
bfe.child_to_read = 2;
bfe.disable_prefetching = false;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- // need to call this twice because we had a subset read before, that touched the clock
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ // need to call this twice because we had a subset read before, that touched
+ // the clock
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
toku_ftnode_free(&dn);
toku_free(ndd);
// fake the childnum to read
bfe.child_to_read = 0;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
- // need to call this twice because we had a subset read before, that touched the clock
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_COMPRESSED);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+ // need to call this twice because we had a subset read before, that touched
+ // the clock
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
toku_ftnode_free(&dn);
toku_free(ndd);
toku_free(cursor);
}
-
-static void
-test_prefetching(void) {
+static void test_prefetching(void) {
// struct ft_handle source_ft;
struct ftnode sn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -327,7 +342,7 @@ test_prefetching(void) {
uint64_t key1 = 100;
uint64_t key2 = 200;
-
+
MALLOC_N(sn.n_children, sn.bp);
DBT pivotkeys[2];
toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1));
@@ -336,13 +351,13 @@ test_prefetching(void) {
BP_BLOCKNUM(&sn, 0).b = 30;
BP_BLOCKNUM(&sn, 1).b = 35;
BP_BLOCKNUM(&sn, 2).b = 40;
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
- BP_STATE(&sn,2) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
+ BP_STATE(&sn, 2) = PT_AVAIL;
set_BNC(&sn, 0, toku_create_empty_nl());
set_BNC(&sn, 1, toku_create_empty_nl());
set_BNC(&sn, 2, toku_create_empty_nl());
- //Create XIDS
+ // Create XIDS
XIDS xids_0 = toku_xids_get_root_xids();
XIDS xids_123;
XIDS xids_234;
@@ -352,7 +367,7 @@ test_prefetching(void) {
CKERR(r);
// data in the buffers does not matter in this test
- //Cleanup:
+ // Cleanup:
toku_xids_destroy(&xids_0);
toku_xids_destroy(&xids_123);
toku_xids_destroy(&xids_234);
@@ -363,41 +378,48 @@ test_prefetching(void) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(int64_key_cmp, nullptr);
ft->ft = ft_h;
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA ndd = NULL;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
- test_prefetch_read(fd, ft, ft_h);
+ test_prefetch_read(fd, ft, ft_h);
test_subset_read(fd, ft, ft_h);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
ft_h->cmp.destroy();
toku_free(ft_h->h);
@@ -405,11 +427,12 @@ test_prefetching(void) {
toku_free(ft);
toku_free(ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
test_prefetching();
return 0;
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
index ceef3772e2a..26a3dae673c 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
@@ -40,38 +40,28 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "ft/cursor.h"
-enum ftnode_verify_type {
- read_all=1,
- read_compressed,
- read_none
-};
+enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif
-static int
-string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
char *CAST_FROM_VOIDP(s, a->data);
char *CAST_FROM_VOIDP(t, b->data);
return strcmp(s, t);
}
-static void
-le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char *val, int vallen)
-{
+static void le_add_to_bn(bn_data *bn,
+ uint32_t idx,
+ const char *key,
+ int keylen,
+ const char *val,
+ int vallen) {
LEAFENTRY r = NULL;
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
void *maybe_free = nullptr;
- bn->get_space_for_insert(
- idx,
- key,
- keylen,
- size_needed,
- &r,
- &maybe_free
- );
+ bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
if (maybe_free) {
toku_free(maybe_free);
}
@@ -81,70 +71,67 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char
memcpy(r->u.clean.val, val, vallen);
}
-
-static void
-le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val)
-{
+static void le_malloc(bn_data *bn,
+ uint32_t idx,
+ const char *key,
+ const char *val) {
int keylen = strlen(key) + 1;
int vallen = strlen(val) + 1;
le_add_to_bn(bn, idx, key, keylen, val, vallen);
}
-
-static void
-test1(int fd, FT ft_h, FTNODE *dn) {
+static void test1(int fd, FT ft_h, FTNODE *dn) {
int r;
ftnode_fetch_extra bfe_all;
bfe_all.create_for_full_read(ft_h);
FTNODE_DISK_DATA ndd = NULL;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_all);
bool is_leaf = ((*dn)->height == 0);
- assert(r==0);
+ invariant(r == 0);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
// should sweep and NOT get rid of anything
PAIR_ATTR attr;
- memset(&attr,0,sizeof(attr));
+ memset(&attr, 0, sizeof(attr));
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
// should sweep and get compress all
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
if (!is_leaf) {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED);
- }
- else {
- assert(BP_STATE(*dn,i) == PT_ON_DISK);
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
+ } else {
+ invariant(BP_STATE(*dn, i) == PT_ON_DISK);
}
}
PAIR_ATTR size;
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
- assert(req);
+ invariant(req);
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
// should sweep and get compress all
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
if (!is_leaf) {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED);
- }
- else {
- assert(BP_STATE(*dn,i) == PT_ON_DISK);
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
+ } else {
+ invariant(BP_STATE(*dn, i) == PT_ON_DISK);
}
- }
+ }
req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
- assert(req);
+ invariant(req);
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
(*dn)->dirty = 1;
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
@@ -152,101 +139,102 @@ test1(int fd, FT ft_h, FTNODE *dn) {
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
toku_free(ndd);
toku_ftnode_free(dn);
}
-
-static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) {
+static int search_cmp(const struct ft_search &UU(so), const DBT *UU(key)) {
return 0;
}
-static void
-test2(int fd, FT ft_h, FTNODE *dn) {
+static void test2(int fd, FT ft_h, FTNODE *dn) {
DBT left, right;
DB dummy_db;
memset(&dummy_db, 0, sizeof(dummy_db));
memset(&left, 0, sizeof(left));
memset(&right, 0, sizeof(right));
ft_search search;
-
+
ftnode_fetch_extra bfe_subset;
bfe_subset.create_for_subset_read(
ft_h,
- ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
+ ft_search_init(
+ &search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
&left,
&right,
true,
true,
false,
- false
- );
+ false);
FTNODE_DISK_DATA ndd = NULL;
- int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset);
- assert(r==0);
+ int r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_subset);
+ invariant(r == 0);
bool is_leaf = ((*dn)->height == 0);
- // at this point, although both partitions are available, only the
+ // at this point, although both partitions are available, only the
// second basement node should have had its clock
// touched
- assert(BP_STATE(*dn, 0) == PT_AVAIL);
- assert(BP_STATE(*dn, 1) == PT_AVAIL);
- assert(BP_SHOULD_EVICT(*dn, 0));
- assert(!BP_SHOULD_EVICT(*dn, 1));
+ invariant(BP_STATE(*dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+ invariant(BP_SHOULD_EVICT(*dn, 0));
+ invariant(!BP_SHOULD_EVICT(*dn, 1));
PAIR_ATTR attr;
- memset(&attr,0,sizeof(attr));
+ memset(&attr, 0, sizeof(attr));
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
- assert(BP_STATE(*dn, 1) == PT_AVAIL);
- assert(BP_SHOULD_EVICT(*dn, 1));
+ invariant(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
+ invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+ invariant(BP_SHOULD_EVICT(*dn, 1));
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
+ invariant(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset);
- assert(req);
+ invariant(req);
toku_ftnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr);
- assert(BP_STATE(*dn, 0) == PT_AVAIL);
- assert(BP_STATE(*dn, 1) == PT_AVAIL);
- assert(BP_SHOULD_EVICT(*dn, 0));
- assert(!BP_SHOULD_EVICT(*dn, 1));
+ invariant(BP_STATE(*dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+ invariant(BP_SHOULD_EVICT(*dn, 0));
+ invariant(!BP_SHOULD_EVICT(*dn, 1));
toku_free(ndd);
toku_ftnode_free(dn);
}
-static void
-test3_leaf(int fd, FT ft_h, FTNODE *dn) {
+static void test3_leaf(int fd, FT ft_h, FTNODE *dn) {
DBT left, right;
DB dummy_db;
memset(&dummy_db, 0, sizeof(dummy_db));
memset(&left, 0, sizeof(left));
memset(&right, 0, sizeof(right));
-
+
ftnode_fetch_extra bfe_min;
bfe_min.create_for_min_read(ft_h);
FTNODE_DISK_DATA ndd = NULL;
- int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min);
- assert(r==0);
+ int r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_min);
+ invariant(r == 0);
//
// make sure we have a leaf
//
- assert((*dn)->height == 0);
+ invariant((*dn)->height == 0);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn, i) == PT_ON_DISK);
+ invariant(BP_STATE(*dn, i) == PT_ON_DISK);
}
toku_ftnode_free(dn);
toku_free(ndd);
}
-static void
-test_serialize_nonleaf(void) {
+static void test_serialize_nonleaf(void) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -265,11 +253,11 @@ test_serialize_nonleaf(void) {
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
BP_BLOCKNUM(&sn, 0).b = 30;
BP_BLOCKNUM(&sn, 1).b = 35;
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
set_BNC(&sn, 0, toku_create_empty_nl());
set_BNC(&sn, 1, toku_create_empty_nl());
- //Create XIDS
+ // Create XIDS
XIDS xids_0 = toku_xids_get_root_xids();
XIDS xids_123;
XIDS xids_234;
@@ -281,11 +269,38 @@ test_serialize_nonleaf(void) {
toku::comparator cmp;
cmp.create(string_key_cmp, nullptr);
- toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
- toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
- toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
-
- //Cleanup:
+ toku_bnc_insert_msg(BNC(&sn, 0),
+ "a",
+ 2,
+ "aval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_0,
+ true,
+ cmp);
+ toku_bnc_insert_msg(BNC(&sn, 0),
+ "b",
+ 2,
+ "bval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_123,
+ false,
+ cmp);
+ toku_bnc_insert_msg(BNC(&sn, 1),
+ "x",
+ 2,
+ "xval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_234,
+ true,
+ cmp);
+
+ // Cleanup:
toku_xids_destroy(&xids_0);
toku_xids_destroy(&xids_123);
toku_xids_destroy(&xids_234);
@@ -297,35 +312,41 @@ test_serialize_nonleaf(void) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(string_key_cmp, nullptr);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA ndd = NULL;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
test1(fd, ft_h, &dn);
test2(fd, ft_h, &dn);
@@ -333,22 +354,26 @@ test_serialize_nonleaf(void) {
toku_destroy_ftnode_internals(&sn);
toku_free(ndd);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
ft_h->cmp.destroy();
toku_free(ft_h);
toku_free(ft);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_leaf(void) {
+static void test_serialize_leaf(void) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -364,8 +389,8 @@ test_serialize_leaf(void) {
MALLOC_N(sn.n_children, sn.bp);
DBT pivotkey;
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
set_BLB(&sn, 0, toku_create_empty_bn());
set_BLB(&sn, 1, toku_create_empty_bn());
le_malloc(BLB_DATA(&sn, 0), 0, "a", "aval");
@@ -378,51 +403,59 @@ test_serialize_leaf(void) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA ndd = NULL;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
test1(fd, ft_h, &dn);
- test3_leaf(fd, ft_h,&dn);
+ test3_leaf(fd, ft_h, &dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
toku_free(ft);
toku_free(ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
initialize_dummymsn();
test_serialize_nonleaf();
test_serialize_leaf();
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
index 9828f49513c..d50488ae197 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
@@ -41,27 +41,21 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include <sys/time.h>
#include "test.h"
-
-
#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif
const double USECS_PER_SEC = 1000000.0;
-static void
-le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int vallen)
-{
+static void le_add_to_bn(bn_data *bn,
+ uint32_t idx,
+ char *key,
+ int keylen,
+ char *val,
+ int vallen) {
LEAFENTRY r = NULL;
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
void *maybe_free = nullptr;
- bn->get_space_for_insert(
- idx,
- key,
- keylen,
- size_needed,
- &r,
- &maybe_free
- );
+ bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
if (maybe_free) {
toku_free(maybe_free);
}
@@ -71,20 +65,24 @@ le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int va
memcpy(r->u.clean.val, val, vallen);
}
-static int
-long_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
const long *CAST_FROM_VOIDP(x, a->data);
const long *CAST_FROM_VOIDP(y, b->data);
return (*x > *y) - (*x < *y);
}
-static void
-test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
+static void test_serialize_leaf(int valsize,
+ int nelts,
+ double entropy,
+ int ser_runs,
+ int deser_runs) {
// struct ft_handle source_ft;
struct ftnode *sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -102,7 +100,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
MALLOC_N(sn->n_children, sn->bp);
sn->pivotkeys.create_empty();
for (int i = 0; i < sn->n_children; ++i) {
- BP_STATE(sn,i) = PT_AVAIL;
+ BP_STATE(sn, i) = PT_AVAIL;
set_BLB(sn, i, toku_create_empty_bn());
}
int nperbn = nelts / sn->n_children;
@@ -112,24 +110,19 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
k = ck * nperbn + i;
char buf[valsize];
int c;
- for (c = 0; c < valsize * entropy; ) {
- int *p = (int *) &buf[c];
+ for (c = 0; c < valsize * entropy;) {
+ int *p = (int *)&buf[c];
*p = rand();
c += sizeof(*p);
}
memset(&buf[c], 0, valsize - c);
le_add_to_bn(
- BLB_DATA(sn,ck),
- i,
- (char *)&k,
- sizeof k,
- buf,
- sizeof buf
- );
+ BLB_DATA(sn, ck), i, (char *)&k, sizeof k, buf, sizeof buf);
}
if (ck < 7) {
DBT pivotkey;
- sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
+ sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)),
+ ck);
}
}
@@ -139,31 +132,36 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(long_key_cmp, nullptr);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
struct timeval total_start;
@@ -176,8 +174,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
gettimeofday(&t[0], NULL);
ndd = NULL;
sn->dirty = 1;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
gettimeofday(&t[1], NULL);
total_start.tv_sec += t[0].tv_sec;
total_start.tv_usec += t[0].tv_usec;
@@ -186,12 +185,14 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
toku_free(ndd);
}
double dt;
- dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
+ dt = (total_end.tv_sec - total_start.tv_sec) +
+ ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
dt *= 1000;
dt /= ser_runs;
- printf("serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
+ printf(
+ "serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
- //reset
+ // reset
total_start.tv_sec = total_start.tv_usec = 0;
total_end.tv_sec = total_end.tv_usec = 0;
@@ -200,8 +201,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
bfe.create_for_full_read(ft_h);
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd2 = NULL;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
- assert(r==0);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
+ invariant(r == 0);
gettimeofday(&t[1], NULL);
total_start.tv_sec += t[0].tv_sec;
@@ -212,35 +214,46 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
toku_ftnode_free(&dn);
toku_free(ndd2);
}
- dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
+ dt = (total_end.tv_sec - total_start.tv_sec) +
+ ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
dt *= 1000;
dt /= deser_runs;
- printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
- printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n",
- tokutime_to_seconds(bfe.io_time)*1000,
- tokutime_to_seconds(bfe.decompress_time)*1000,
- tokutime_to_seconds(bfe.deserialize_time)*1000,
- deser_runs
- );
+ printf(
+ "deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
+ printf(
+ "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
+ "(average of %d runs)\n",
+ tokutime_to_seconds(bfe.io_time) * 1000,
+ tokutime_to_seconds(bfe.decompress_time) * 1000,
+ tokutime_to_seconds(bfe.deserialize_time) * 1000,
+ deser_runs);
toku_ftnode_free(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
ft_h->cmp.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
toku_free(ft);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
+static void test_serialize_nonleaf(int valsize,
+ int nelts,
+ double entropy,
+ int ser_runs,
+ int deser_runs) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -257,11 +270,11 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
MALLOC_N(sn.n_children, sn.bp);
sn.pivotkeys.create_empty();
for (int i = 0; i < sn.n_children; ++i) {
- BP_BLOCKNUM(&sn, i).b = 30 + (i*5);
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_BLOCKNUM(&sn, i).b = 30 + (i * 5);
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BNC(&sn, i, toku_create_empty_nl());
}
- //Create XIDS
+ // Create XIDS
XIDS xids_0 = toku_xids_get_root_xids();
XIDS xids_123;
r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
@@ -276,14 +289,23 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
k = ck * nperchild + i;
char buf[valsize];
int c;
- for (c = 0; c < valsize * entropy; ) {
- int *p = (int *) &buf[c];
+ for (c = 0; c < valsize * entropy;) {
+ int *p = (int *)&buf[c];
*p = rand();
c += sizeof(*p);
}
memset(&buf[c], 0, valsize - c);
- toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp);
+ toku_bnc_insert_msg(bnc,
+ &k,
+ sizeof k,
+ buf,
+ valsize,
+ FT_NONE,
+ next_dummymsn(),
+ xids_123,
+ true,
+ cmp);
}
if (ck < 7) {
DBT pivotkey;
@@ -291,7 +313,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
}
}
- //Cleanup:
+ // Cleanup:
toku_xids_destroy(&xids_0);
toku_xids_destroy(&xids_123);
cmp.destroy();
@@ -302,65 +324,78 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(long_key_cmp, nullptr);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
struct timeval t[2];
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd = NULL;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
gettimeofday(&t[1], NULL);
double dt;
- dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
+ dt = (t[1].tv_sec - t[0].tv_sec) +
+ ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
dt *= 1000;
- printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
+ printf(
+ "serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_h);
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd2 = NULL;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
- assert(r==0);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
+ invariant(r == 0);
gettimeofday(&t[1], NULL);
- dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
+ dt = (t[1].tv_sec - t[0].tv_sec) +
+ ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
dt *= 1000;
- printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
- printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n",
- tokutime_to_seconds(bfe.io_time)*1000,
- tokutime_to_seconds(bfe.decompress_time)*1000,
- tokutime_to_seconds(bfe.deserialize_time)*1000,
- deser_runs
- );
+ printf(
+ "deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
+ printf(
+ "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
+ "(IGNORED RUNS=%d)\n",
+ tokutime_to_seconds(bfe.io_time) * 1000,
+ tokutime_to_seconds(bfe.decompress_time) * 1000,
+ tokutime_to_seconds(bfe.deserialize_time) * 1000,
+ deser_runs);
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
ft_h->cmp.destroy();
@@ -369,17 +404,21 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
toku_free(ndd);
toku_free(ndd2);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
const int DEFAULT_RUNS = 5;
long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS;
double entropy = 0.3;
if (argc != 3 && argc != 5) {
- fprintf(stderr, "Usage: %s <valsize> <nelts> [<serialize_runs> <deserialize_runs>]\n", argv[0]);
+ fprintf(stderr,
+ "Usage: %s <valsize> <nelts> [<serialize_runs> "
+ "<deserialize_runs>]\n",
+ argv[0]);
fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS);
return 2;
}
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
index 332aaa0c170..0cddaf19651 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
@@ -39,26 +39,20 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "test.h"
#include "bndata.h"
-
-
#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif
-static size_t
-le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const char *val, int valsize)
-{
+static size_t le_add_to_bn(bn_data *bn,
+ uint32_t idx,
+ const char *key,
+ int keysize,
+ const char *val,
+ int valsize) {
LEAFENTRY r = NULL;
uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize);
void *maybe_free = nullptr;
- bn->get_space_for_insert(
- idx,
- key,
- keysize,
- size_needed,
- &r,
- &maybe_free
- );
+ bn->get_space_for_insert(idx, key, keysize, size_needed, &r, &maybe_free);
if (maybe_free) {
toku_free(maybe_free);
}
@@ -70,16 +64,19 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const cha
}
class test_key_le_pair {
- public:
+ public:
uint32_t keylen;
- char* keyp;
+ char *keyp;
LEAFENTRY le;
test_key_le_pair() : keylen(), keyp(), le() {}
void init(const char *_keyp, const char *_val) {
init(_keyp, strlen(_keyp) + 1, _val, strlen(_val) + 1);
}
- void init(const char * _keyp, uint32_t _keylen, const char*_val, uint32_t _vallen) {
+ void init(const char *_keyp,
+ uint32_t _keylen,
+ const char *_val,
+ uint32_t _vallen) {
keylen = _keylen;
CAST_FROM_VOIDP(le, toku_malloc(LE_CLEAN_MEMSIZE(_vallen)));
@@ -95,126 +92,144 @@ class test_key_le_pair {
}
};
-enum ftnode_verify_type {
- read_all=1,
- read_compressed,
- read_none
-};
+enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
-static int
-string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
char *CAST_FROM_VOIDP(s, a->data);
char *CAST_FROM_VOIDP(t, b->data);
return strcmp(s, t);
}
-static void
-setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) {
+static void setup_dn(enum ftnode_verify_type bft,
+ int fd,
+ FT ft_h,
+ FTNODE *dn,
+ FTNODE_DISK_DATA *ndd) {
int r;
if (bft == read_all) {
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_h);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
- assert(r==0);
- }
- else if (bft == read_compressed || bft == read_none) {
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe);
+ invariant(r == 0);
+ } else if (bft == read_compressed || bft == read_none) {
ftnode_fetch_extra bfe;
bfe.create_for_min_read(ft_h);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
- assert(r==0);
- // assert all bp's are compressed or on disk.
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe);
+ invariant(r == 0);
+ // invariant all bp's are compressed or on disk.
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED || BP_STATE(*dn, i) == PT_ON_DISK);
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED ||
+ BP_STATE(*dn, i) == PT_ON_DISK);
}
// if read_none, get rid of the compressed bp's
if (bft == read_none) {
if ((*dn)->height == 0) {
- toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- // assert all bp's are on disk
+ toku_ftnode_pe_callback(*dn,
+ make_pair_attr(0xffffffff),
+ ft_h,
+ def_pe_finalize_impl,
+ nullptr);
+ // invariant all bp's are on disk
for (int i = 0; i < (*dn)->n_children; i++) {
if ((*dn)->height == 0) {
- assert(BP_STATE(*dn,i) == PT_ON_DISK);
- assert(is_BNULL(*dn, i));
- }
- else {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED);
+ invariant(BP_STATE(*dn, i) == PT_ON_DISK);
+ invariant(is_BNULL(*dn, i));
+ } else {
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
}
}
- }
- else {
+ } else {
// first decompress everything, and make sure
// that it is available
// then run partial eviction to get it compressed
PAIR_ATTR attr;
bfe.create_for_full_read(ft_h);
- assert(toku_ftnode_pf_req_callback(*dn, &bfe));
+ invariant(toku_ftnode_pf_req_callback(*dn, &bfe));
r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
- assert(r==0);
- // assert all bp's are available
+ invariant(r == 0);
+ // invariant all bp's are available
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
- toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ toku_ftnode_pe_callback(*dn,
+ make_pair_attr(0xffffffff),
+ ft_h,
+ def_pe_finalize_impl,
+ nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- // assert all bp's are still available, because we touched the clock
- assert(BP_STATE(*dn,i) == PT_AVAIL);
- // now assert all should be evicted
- assert(BP_SHOULD_EVICT(*dn, i));
+ // invariant all bp's are still available, because we touched
+ // the clock
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
+ // now invariant all should be evicted
+ invariant(BP_SHOULD_EVICT(*dn, i));
}
- toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ toku_ftnode_pe_callback(*dn,
+ make_pair_attr(0xffffffff),
+ ft_h,
+ def_pe_finalize_impl,
+ nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED);
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
}
}
}
// now decompress them
bfe.create_for_full_read(ft_h);
- assert(toku_ftnode_pf_req_callback(*dn, &bfe));
+ invariant(toku_ftnode_pf_req_callback(*dn, &bfe));
PAIR_ATTR attr;
r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
- assert(r==0);
- // assert all bp's are available
+ invariant(r == 0);
+ // invariant all bp's are available
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
// continue on with test
- }
- else {
+ } else {
// if we get here, this is a test bug, NOT a bug in development code
- assert(false);
+ invariant(false);
}
}
-static void write_sn_to_disk(int fd, FT_HANDLE ft, FTNODE sn, FTNODE_DISK_DATA* src_ndd, bool do_clone) {
+static void write_sn_to_disk(int fd,
+ FT_HANDLE ft,
+ FTNODE sn,
+ FTNODE_DISK_DATA *src_ndd,
+ bool do_clone) {
int r;
if (do_clone) {
- void* cloned_node_v = NULL;
+ void *cloned_node_v = NULL;
PAIR_ATTR attr;
long clone_size;
- toku_ftnode_clone_callback(sn, &cloned_node_v, &clone_size, &attr, false, ft->ft);
+ toku_ftnode_clone_callback(
+ sn, &cloned_node_v, &clone_size, &attr, false, ft->ft);
FTNODE CAST_FROM_VOIDP(cloned_node, cloned_node_v);
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false);
+ invariant(r == 0);
toku_ftnode_free(&cloned_node);
- }
- else {
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false);
- assert(r==0);
+ } else {
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false);
+ invariant(r == 0);
}
}
-static void
-test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_check_msn(enum ftnode_verify_type bft,
+ bool do_clone) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
-#define PRESERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 42 })
-#define POSTSERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 84 })
+#define PRESERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 42})
+#define POSTSERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 84})
sn.max_msn_applied_to_node_on_disk = PRESERIALIZE_MSN_ON_DISK;
sn.flags = 0x11223344;
@@ -228,14 +243,14 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
MALLOC_N(sn.n_children, sn.bp);
DBT pivotkey;
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
set_BLB(&sn, 0, toku_create_empty_bn());
set_BLB(&sn, 1, toku_create_empty_bn());
le_add_to_bn(BLB_DATA(&sn, 0), 0, "a", 2, "aval", 5);
le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5);
le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5);
- BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN) { MIN_MSN.msn + 73 });
+ BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN){MIN_MSN.msn + 73});
BLB_MAX_MSN_APPLIED(&sn, 1) = POSTSERIALIZE_MSN_ON_DISK;
FT_HANDLE XMALLOC(ft);
@@ -244,30 +259,35 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
- //Want to use block #20
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
FTNODE_DISK_DATA dest_ndd = NULL;
@@ -276,16 +296,18 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
- assert(dn->height == 0);
- assert(dn->n_children>=1);
- assert(dn->max_msn_applied_to_node_on_disk.msn == POSTSERIALIZE_MSN_ON_DISK.msn);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+ invariant(dn->height == 0);
+ invariant(dn->n_children >= 1);
+ invariant(dn->max_msn_applied_to_node_on_disk.msn ==
+ POSTSERIALIZE_MSN_ON_DISK.msn);
{
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
test_key_le_pair elts[3];
elts[0].init("a", "aval");
@@ -294,34 +316,41 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
const uint32_t npartitions = dn->n_children;
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(BLB_MAX_MSN_APPLIED(dn, bn).msn == POSTSERIALIZE_MSN_ON_DISK.msn);
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(BLB_MAX_MSN_APPLIED(dn, bn).msn ==
+ POSTSERIALIZE_MSN_ON_DISK.msn);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
- assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0);
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(elts[last_i].le));
+ invariant(memcmp(curr_le,
+ elts[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+ elts[last_i].keyp) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
}
-
}
- assert(last_i == 3);
+ invariant(last_i == 3);
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -329,17 +358,21 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft,
+ bool do_clone) {
int r;
struct ftnode sn, *dn;
- const int keylens = 256*1024, vallens = 0;
+ const int keylens = 256 * 1024, vallens = 0;
const uint32_t nrows = 8;
- // assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ // invariant(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.flags = 0x11223344;
@@ -354,21 +387,27 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
MALLOC_N(sn.n_children, sn.bp);
sn.pivotkeys.create_empty();
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn());
}
for (uint32_t i = 0; i < nrows; ++i) { // one basement per row
char key[keylens], val[vallens];
- key[keylens-1] = '\0';
+ key[keylens - 1] = '\0';
char c = 'a' + i;
- memset(key, c, keylens-1);
- le_add_to_bn(BLB_DATA(&sn, i), 0, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
- if (i < nrows-1) {
+ memset(key, c, keylens - 1);
+ le_add_to_bn(BLB_DATA(&sn, i),
+ 0,
+ (char *)&key,
+ sizeof(key),
+ (char *)&val,
+ sizeof(val));
+ if (i < nrows - 1) {
uint32_t keylen;
- void* curr_key;
+ void *curr_key;
BLB_DATA(&sn, i)->fetch_key_and_len(0, &keylen, &curr_key);
DBT pivotkey;
- sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), i);
+ sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen),
+ i);
}
}
@@ -378,29 +417,34 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
FTNODE_DISK_DATA dest_ndd = NULL;
@@ -408,55 +452,64 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone);
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
-
- assert(dn->blocknum.b==20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+ invariant(dn->blocknum.b == 20);
+
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
{
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
test_key_le_pair *les = new test_key_le_pair[nrows];
{
char key[keylens], val[vallens];
- key[keylens-1] = '\0';
+ key[keylens - 1] = '\0';
for (uint32_t i = 0; i < nrows; ++i) {
char c = 'a' + i;
- memset(key, c, keylens-1);
- les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val));
+ memset(key, c, keylens - 1);
+ les[i].init(
+ (char *)&key, sizeof(key), (char *)&val, sizeof(val));
}
}
const uint32_t npartitions = dn->n_children;
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
- assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+ invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
- assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0);
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(les[last_i].le));
+ invariant(memcmp(curr_le,
+ les[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+ les[last_i].keyp) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
}
}
- assert(last_i == nrows);
+ invariant(last_i == nrows);
delete[] les;
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -464,15 +517,19 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft,
+ bool do_clone) {
int r;
struct ftnode sn, *dn;
- const uint32_t nrows = 196*1024;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ const uint32_t nrows = 196 * 1024;
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.flags = 0x11223344;
@@ -487,14 +544,19 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
XMALLOC_N(sn.n_children, sn.bp);
sn.pivotkeys.create_empty();
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
- set_BLB(&sn, i, toku_create_empty_bn());
+ BP_STATE(&sn, i) = PT_AVAIL;
+ set_BLB(&sn, i, toku_create_empty_bn());
}
size_t total_size = 0;
for (uint32_t i = 0; i < nrows; ++i) {
uint32_t key = i;
uint32_t val = i;
- total_size += le_add_to_bn(BLB_DATA(&sn, 0), i, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
+ total_size += le_add_to_bn(BLB_DATA(&sn, 0),
+ i,
+ (char *)&key,
+ sizeof(key),
+ (char *)&val,
+ sizeof(val));
}
FT_HANDLE XMALLOC(ft);
@@ -503,30 +565,35 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
@@ -535,56 +602,66 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
{
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
test_key_le_pair *les = new test_key_le_pair[nrows];
{
int key = 0, val = 0;
for (uint32_t i = 0; i < nrows; ++i, key++, val++) {
- les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val));
+ les[i].init(
+ (char *)&key, sizeof(key), (char *)&val, sizeof(val));
}
}
const uint32_t npartitions = dn->n_children;
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
- assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+ invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
- assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data);
- void* tmp = les[last_i].keyp;
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(les[last_i].le));
+ invariant(memcmp(curr_le,
+ les[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ uint32_t *CAST_FROM_VOIDP(pivot,
+ dn->pivotkeys.get_pivot(bn).data);
+ void *tmp = les[last_i].keyp;
uint32_t *CAST_FROM_VOIDP(item, tmp);
- assert(*pivot >= *item);
+ invariant(*pivot >= *item);
}
// TODO for later, get a key comparison here as well
last_i++;
}
// don't check soft_copy_is_up_to_date or seqinsert
- assert(BLB_DATA(dn, bn)->get_disk_size() < 128*1024); // BN_MAX_SIZE, apt to change
+ invariant(BLB_DATA(dn, bn)->get_disk_size() <
+ 128 * 1024); // BN_MAX_SIZE, apt to change
}
- assert(last_i == nrows);
+ invariant(last_i == nrows);
delete[] les;
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -592,19 +669,22 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-
-static void
-test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft,
+ bool do_clone) {
int r;
struct ftnode sn, *dn;
const uint32_t nrows = 7;
const size_t key_size = 8;
- const size_t val_size = 512*1024;
- // assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ const size_t val_size = 512 * 1024;
+ // invariant(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.flags = 0x11223344;
@@ -615,21 +695,21 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
sn.n_children = 1;
sn.dirty = 1;
sn.oldest_referenced_xid_known = TXNID_NONE;
-
+
MALLOC_N(sn.n_children, sn.bp);
sn.pivotkeys.create_empty();
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn());
}
for (uint32_t i = 0; i < nrows; ++i) {
char key[key_size], val[val_size];
- key[key_size-1] = '\0';
- val[val_size-1] = '\0';
+ key[key_size - 1] = '\0';
+ val[val_size - 1] = '\0';
char c = 'a' + i;
- memset(key, c, key_size-1);
- memset(val, c, val_size-1);
- le_add_to_bn(BLB_DATA(&sn, 0), i,key, 8, val, val_size);
+ memset(key, c, key_size - 1);
+ memset(val, c, val_size - 1);
+ le_add_to_bn(BLB_DATA(&sn, 0), i, key, 8, val, val_size);
}
FT_HANDLE XMALLOC(ft);
@@ -638,30 +718,35 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
@@ -670,58 +755,66 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
{
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
test_key_le_pair *les = new test_key_le_pair[nrows];
{
char key[key_size], val[val_size];
- key[key_size-1] = '\0';
- val[val_size-1] = '\0';
+ key[key_size - 1] = '\0';
+ val[val_size - 1] = '\0';
for (uint32_t i = 0; i < nrows; ++i) {
char c = 'a' + i;
- memset(key, c, key_size-1);
- memset(val, c, val_size-1);
+ memset(key, c, key_size - 1);
+ memset(val, c, val_size - 1);
les[i].init(key, key_size, val, val_size);
}
}
const uint32_t npartitions = dn->n_children;
- assert(npartitions == nrows);
+ invariant(npartitions == nrows);
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
- assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+ invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
- assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0);
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(les[last_i].le));
+ invariant(memcmp(curr_le,
+ les[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+ (char *)(les[last_i].keyp)) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
}
// don't check soft_copy_is_up_to_date or seqinsert
}
- assert(last_i == 7);
+ invariant(last_i == 7);
delete[] les;
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -729,15 +822,19 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-
-static void
-test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_empty_basement_nodes(
+ enum ftnode_verify_type bft,
+ bool do_clone) {
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -760,7 +857,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
toku_fill_dbt(&pivotkeys[5], "x", 2);
sn.pivotkeys.create_from_dbts(pivotkeys, 6);
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn());
BLB_SEQINSERT(&sn, i) = 0;
}
@@ -774,30 +871,35 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
FTNODE_DISK_DATA dest_ndd = NULL;
@@ -805,17 +907,18 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
- assert(dn->height == 0);
- assert(dn->n_children>0);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+ invariant(dn->height == 0);
+ invariant(dn->n_children > 0);
{
test_key_le_pair elts[3];
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
elts[0].init("a", "aval");
elts[1].init("b", "bval");
@@ -823,33 +926,39 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
const uint32_t npartitions = dn->n_children;
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
- assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0);
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(elts[last_i].le));
+ invariant(memcmp(curr_le,
+ elts[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+ (char *)(elts[last_i].keyp)) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
}
-
}
- assert(last_i == 3);
+ invariant(last_i == 3);
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -857,14 +966,19 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_multiple_empty_basement_nodes(
+ enum ftnode_verify_type bft,
+ bool do_clone) {
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -884,7 +998,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
toku_fill_dbt(&pivotkeys[2], "A", 2);
sn.pivotkeys.create_from_dbts(pivotkeys, 3);
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn());
}
@@ -894,30 +1008,35 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
@@ -926,29 +1045,31 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
- assert(dn->height == 0);
- assert(dn->n_children == 1);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+ invariant(dn->height == 0);
+ invariant(dn->n_children == 1);
{
const uint32_t npartitions = dn->n_children;
for (uint32_t i = 0; i < npartitions; ++i) {
- assert(dest_ndd[i].start > 0);
- assert(dest_ndd[i].size > 0);
+ invariant(dest_ndd[i].start > 0);
+ invariant(dest_ndd[i].size > 0);
if (i > 0) {
- assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size);
+ invariant(dest_ndd[i].start >=
+ dest_ndd[i - 1].start + dest_ndd[i - 1].size);
}
- assert(BLB_DATA(dn, i)->num_klpairs() == 0);
+ invariant(BLB_DATA(dn, i)->num_klpairs() == 0);
}
}
-
+
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -956,16 +1077,18 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-
-static void
-test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -984,11 +1107,11 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
BP_BLOCKNUM(&sn, 0).b = 30;
BP_BLOCKNUM(&sn, 1).b = 35;
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
set_BNC(&sn, 0, toku_create_empty_nl());
set_BNC(&sn, 1, toku_create_empty_nl());
- //Create XIDS
+ // Create XIDS
XIDS xids_0 = toku_xids_get_root_xids();
XIDS xids_123;
XIDS xids_234;
@@ -1000,11 +1123,38 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
toku::comparator cmp;
cmp.create(string_key_cmp, nullptr);
- toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
- toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
- toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
-
- //Cleanup:
+ toku_bnc_insert_msg(BNC(&sn, 0),
+ "a",
+ 2,
+ "aval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_0,
+ true,
+ cmp);
+ toku_bnc_insert_msg(BNC(&sn, 0),
+ "b",
+ 2,
+ "bval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_123,
+ false,
+ cmp);
+ toku_bnc_insert_msg(BNC(&sn, 1),
+ "x",
+ 2,
+ "xval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_234,
+ true,
+ cmp);
+
+ // Cleanup:
toku_xids_destroy(&xids_0);
toku_xids_destroy(&xids_123);
toku_xids_destroy(&xids_234);
@@ -1016,31 +1166,36 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(string_key_cmp, nullptr);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
FTNODE_DISK_DATA dest_ndd = NULL;
@@ -1048,30 +1203,31 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
- assert(dn->height == 1);
- assert(dn->n_children==2);
- assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0);
- assert(dn->pivotkeys.get_pivot(0).size==6);
- assert(BP_BLOCKNUM(dn,0).b==30);
- assert(BP_BLOCKNUM(dn,1).b==35);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+ invariant(dn->height == 1);
+ invariant(dn->n_children == 2);
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(0).data, "hello") == 0);
+ invariant(dn->pivotkeys.get_pivot(0).size == 6);
+ invariant(BP_BLOCKNUM(dn, 0).b == 30);
+ invariant(BP_BLOCKNUM(dn, 1).b == 35);
message_buffer *src_msg_buffer1 = &BNC(&sn, 0)->msg_buffer;
message_buffer *src_msg_buffer2 = &BNC(&sn, 1)->msg_buffer;
message_buffer *dest_msg_buffer1 = &BNC(dn, 0)->msg_buffer;
message_buffer *dest_msg_buffer2 = &BNC(dn, 1)->msg_buffer;
- assert(src_msg_buffer1->equals(dest_msg_buffer1));
- assert(src_msg_buffer2->equals(dest_msg_buffer2));
+ invariant(src_msg_buffer1->equals(dest_msg_buffer1));
+ invariant(src_msg_buffer2->equals(dest_msg_buffer2));
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
ft_h->cmp.destroy();
toku_free(ft_h->h);
@@ -1080,11 +1236,12 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
initialize_dummymsn();
test_serialize_nonleaf(read_none, false);
@@ -1103,10 +1260,12 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, false);
test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, false);
- test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, false);
+ test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed,
+ false);
test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, true);
test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, true);
- test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, true);
+ test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed,
+ true);
test_serialize_leaf_with_empty_basement_nodes(read_none, false);
test_serialize_leaf_with_empty_basement_nodes(read_all, false);
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
index 598a1cc7085..706bd94fbc3 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
@@ -164,17 +164,16 @@ static void test_read_what_was_written (void) {
int r;
const int NVALS=10000;
- if (verbose) printf("test_read_what_was_written(): "); fflush(stdout);
+ if (verbose) {
+ printf("test_read_what_was_written(): "); fflush(stdout);
+ }
unlink(fname);
-
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
- toku_cachetable_close(&ct);
-
-
+ toku_cachetable_close(&ct);
/* Now see if we can read an empty tree in. */
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
@@ -189,8 +188,6 @@ static void test_read_what_was_written (void) {
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
toku_cachetable_close(&ct);
-
-
/* Now see if we can read it in and get the value. */
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
diff --git a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
index 53973794eae..aeb5a897c48 100644
--- a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
@@ -109,7 +109,9 @@ static int run_test(void)
r = pqueue_pop(pq, &node); assert(r==0);
if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data));
if ( *(int*)(node->key->data) != i ) {
- if (verbose) printf("FAIL\n"); return -1;
+ if (verbose)
+ printf("FAIL\n");
+ return -1;
}
}
pqueue_free(pq);
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
index a78f787cdf2..f2004964862 100644
--- a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
@@ -793,7 +793,7 @@ static void test_le_garbage_collection_birdie(void) {
do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200);
invariant(do_garbage_collect);
- // It is definately worth doing when the above case is true
+ // It is definitely worth doing when the above case is true
// and there is more than one provisional entry.
ule.num_cuxrs = 1;
ule.num_puxrs = 2;
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
index 419af550545..71357a1e16a 100644
--- a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
@@ -72,7 +72,7 @@ static void dummy_update_status(FTNODE UU(child), int UU(dirtied), void* UU(extr
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
-static void test_oldest_referenced_xid_gets_propogated(void) {
+static void test_oldest_referenced_xid_gets_propagated(void) {
int r;
CACHETABLE ct;
FT_HANDLE t;
@@ -166,7 +166,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
toku_ft_flush_some_child(t->ft, node, &fa);
// pin the child, verify that oldest referenced xid was
- // propogated from parent to child during the flush
+ // propagated from parent to child during the flush
toku_pin_ftnode(
t->ft,
child_nonleaf_blocknum,
@@ -185,6 +185,6 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
default_parse_args(argc, argv);
- test_oldest_referenced_xid_gets_propogated();
+ test_oldest_referenced_xid_gets_propagated();
return 0;
}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc
index 8aded3898c1..ea4f9374dc3 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h
+++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc
@@ -36,30 +36,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-#pragma once
-
-#include <db.h>
-
-#include "ft/serialize/block_allocator.h"
-
-// Block allocation strategy implementations
-
-class block_allocator_strategy {
-public:
- static struct block_allocator::blockpair *
- first_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
- static struct block_allocator::blockpair *
- best_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
- static struct block_allocator::blockpair *
- padded_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
- static struct block_allocator::blockpair *
- heat_zone(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment,
- uint64_t heat);
-};
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+static void test_insert_remove(void) {
+ uint64_t i;
+ MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+ verbose = 0;
+
+ tree->Insert({0, 100});
+
+ for (i = 0; i < 10; i++) {
+ tree->Remove(3);
+ tree->Remove(2);
+ }
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+
+ for (i = 0; i < 10; i++) {
+ tree->Insert({5 * i, 3});
+ }
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+
+ uint64_t offset = tree->Remove(2);
+ invariant(offset == 0);
+ offset = tree->Remove(10);
+ invariant(offset == 50);
+ offset = tree->Remove(3);
+ invariant(offset == 5);
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+
+ tree->Insert({48, 2});
+ tree->Insert({50, 10});
+
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+
+ tree->Insert({3, 7});
+ offset = tree->Remove(10);
+ invariant(offset == 2);
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+ tree->Dump();
+ delete tree;
+}
+
+int test_main(int argc, const char *argv[]) {
+ default_parse_args(argc, argv);
+
+ test_insert_remove();
+ if (verbose)
+ printf("test ok\n");
+ return 0;
+}
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
new file mode 100644
index 00000000000..85f29ce9813
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
@@ -0,0 +1,102 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License, version 3,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+#define N 1000000
+std::vector<MhsRbTree::Node::BlockPair> input_vector;
+MhsRbTree::Node::BlockPair old_vector[N];
+
+static int myrandom(int i) { return std::rand() % i; }
+
+static void generate_random_input() {
+ std::srand(unsigned(std::time(0)));
+
+ // set some values:
+ for (uint64_t i = 1; i < N; ++i) {
+ input_vector.push_back({i, 0});
+ old_vector[i] = {i, 0};
+ }
+ // using built-in random generator:
+ std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom);
+}
+
+static void test_insert_remove(void) {
+ int i;
+ MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+ verbose = 0;
+ generate_random_input();
+ if (verbose) {
+ printf("\n we are going to insert the following block offsets\n");
+ for (i = 0; i < N; i++)
+ printf("%" PRIu64 "\t", input_vector[i]._offset.ToInt());
+ }
+ for (i = 0; i < N; i++) {
+ tree->Insert(input_vector[i]);
+ // tree->ValidateBalance();
+ }
+ tree->ValidateBalance();
+ MhsRbTree::Node::BlockPair *p_bps = &old_vector[0];
+ tree->ValidateInOrder(p_bps);
+ printf("min node of the tree:%" PRIu64 "\n",
+ rbn_offset(tree->MinNode()).ToInt());
+ printf("max node of the tree:%" PRIu64 "\n",
+ rbn_offset(tree->MaxNode()).ToInt());
+
+ for (i = 0; i < N; i++) {
+ // tree->ValidateBalance();
+ tree->RawRemove(input_vector[i]._offset.ToInt());
+ }
+
+ tree->Destroy();
+ delete tree;
+}
+
+int test_main(int argc, const char *argv[]) {
+ default_parse_args(argc, argv);
+
+ test_insert_remove();
+ if (verbose)
+ printf("test ok\n");
+ return 0;
+}
diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc
index 407116b983c..90eee1e580a 100644
--- a/storage/tokudb/PerconaFT/ft/txn/roll.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc
@@ -49,7 +49,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// functionality provided by roll.c is exposed by an autogenerated
// header file, logheader.h
//
-// this (poorly) explains the absense of "roll.h"
+// this (poorly) explains the absence of "roll.h"
// these flags control whether or not we send commit messages for
// various operations
diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
index df830afd0df..c9464c3ed60 100644
--- a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
@@ -169,7 +169,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
txn->roll_info.spilled_rollback_head = ROLLBACK_NONE;
txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE;
}
- // if we're commiting a child rollback, put its entries into the parent
+ // if we're committing a child rollback, put its entries into the parent
// by pinning both child and parent and then linking the child log entry
// list to the end of the parent log entry list.
if (txn_has_current_rollback_log(txn)) {
diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
index 68c94c2ad11..08d7c8874e5 100644
--- a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
@@ -59,21 +59,18 @@ rollback_log_destroy(ROLLBACK_LOG_NODE log) {
// flush an ununused log to disk, by allocating a size 0 blocknum in
// the blocktable
-static void
-toku_rollback_flush_unused_log(
- ROLLBACK_LOG_NODE log,
- BLOCKNUM logname,
- int fd,
- FT ft,
- bool write_me,
- bool keep_me,
- bool for_checkpoint,
- bool is_clone
- )
-{
+static void toku_rollback_flush_unused_log(ROLLBACK_LOG_NODE log,
+ BLOCKNUM logname,
+ int fd,
+ FT ft,
+ bool write_me,
+ bool keep_me,
+ bool for_checkpoint,
+ bool is_clone) {
if (write_me) {
DISKOFF offset;
- ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX);
+ ft->blocktable.realloc_on_disk(
+ logname, 0, &offset, ft, fd, for_checkpoint);
}
if (!keep_me && !is_clone) {
toku_free(log);
diff --git a/storage/tokudb/PerconaFT/ft/ule.cc b/storage/tokudb/PerconaFT/ft/ule.cc
index ac393fbf179..e3dce6d27dd 100644
--- a/storage/tokudb/PerconaFT/ft/ule.cc
+++ b/storage/tokudb/PerconaFT/ft/ule.cc
@@ -587,8 +587,8 @@ bool toku_le_worth_running_garbage_collection(
// by new txns.
// 2.) There is only one committed entry, but the outermost
// provisional entry is older than the oldest known referenced
-// xid, so it must have commited. Therefor we can promote it to
-// committed and get rid of the old commited entry.
+// xid, so it must have committed. Therefor we can promote it to
+// committed and get rid of the old committed entry.
if (le->type != LE_MVCC) {
return false;
}
diff --git a/storage/tokudb/PerconaFT/portability/CMakeLists.txt b/storage/tokudb/PerconaFT/portability/CMakeLists.txt
index 9f84d9b03df..4793db63cc1 100644
--- a/storage/tokudb/PerconaFT/portability/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/portability/CMakeLists.txt
@@ -14,12 +14,11 @@ set(tokuportability_srcs
)
add_library(${LIBTOKUPORTABILITY} SHARED ${tokuportability_srcs})
-target_link_libraries(${LIBTOKUPORTABILITY} LINK_PRIVATE ${LIBJEMALLOC})
target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
add_library(tokuportability_static_conv STATIC ${tokuportability_srcs})
set_target_properties(tokuportability_static_conv PROPERTIES POSITION_INDEPENDENT_CODE ON)
-set(tokuportability_source_libs tokuportability_static_conv ${LIBJEMALLOC} ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
+set(tokuportability_source_libs tokuportability_static_conv ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
toku_merge_static_libs(${LIBTOKUPORTABILITY}_static ${LIBTOKUPORTABILITY}_static "${tokuportability_source_libs}")
maybe_add_gcov_to_libraries(${LIBTOKUPORTABILITY} tokuportability_static_conv)
diff --git a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
index bc48e93937d..8e73c56a6c5 100644
--- a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
+++ b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
@@ -90,7 +90,13 @@ static bool check_huge_pages_in_practice(void)
const long pagesize = 4096;
const long n_pages = TWO_MB/pagesize;
+#ifdef __linux__
+ // On linux mincore is defined as mincore(void *, size_t, unsigned char *)
unsigned char vec[n_pages];
+#else
+ // On BSD (OS X included) it is defined as mincore(void *, size_t, char *)
+ char vec[n_pages];
+#endif
{
int r = mincore(second, TWO_MB, vec);
if (r!=0 && errno==ENOMEM) {
diff --git a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
index 880f9a3a9bb..dbbea974a49 100644
--- a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
+++ b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
@@ -64,7 +64,7 @@ int main(int argc, char *const argv[]) {
if (verbose) printf("maxdata=%" PRIu64 " 0x%" PRIx64 "\n", maxdata, maxdata);
// check the data size
-#if __x86_64__
+#if defined(__x86_64__) || defined(__aarch64__)
assert(maxdata > (1ULL << 32));
#elif __i386__
assert(maxdata < (1ULL << 32));
diff --git a/storage/tokudb/PerconaFT/portability/toku_config.h.in b/storage/tokudb/PerconaFT/portability/toku_config.h.in
index e1412cc9e14..1a34bf1ef45 100644
--- a/storage/tokudb/PerconaFT/portability/toku_config.h.in
+++ b/storage/tokudb/PerconaFT/portability/toku_config.h.in
@@ -42,7 +42,6 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#cmakedefine TOKU_DEBUG_PARANOID 1
#cmakedefine USE_VALGRIND 1
-
#cmakedefine HAVE_ALLOCA_H 1
#cmakedefine HAVE_ARPA_INET_H 1
#cmakedefine HAVE_BYTESWAP_H 1
diff --git a/storage/tokudb/PerconaFT/portability/toku_time.h b/storage/tokudb/PerconaFT/portability/toku_time.h
index 11a3f3aa2b9..a1278ef0337 100644
--- a/storage/tokudb/PerconaFT/portability/toku_time.h
+++ b/storage/tokudb/PerconaFT/portability/toku_time.h
@@ -98,9 +98,17 @@ double tokutime_to_seconds(tokutime_t) __attribute__((__visibility__("default")
// Get the value of tokutime for right now. We want this to be fast, so we expose the implementation as RDTSC.
static inline tokutime_t toku_time_now(void) {
+#if defined(__x86_64__) || defined(__i386__)
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
+#elif defined (__aarch64__)
+ uint64_t result;
+ __asm __volatile__ ("mrs %[rt], cntvct_el0" : [rt] "=r" (result));
+ return result;
+#else
+#error No timer implementation for this platform
+#endif
}
static inline uint64_t toku_current_time_microsec(void) {
diff --git a/storage/tokudb/PerconaFT/src/indexer-internal.h b/storage/tokudb/PerconaFT/src/indexer-internal.h
index 48e62ee49b2..fdaa561e3d0 100644
--- a/storage/tokudb/PerconaFT/src/indexer-internal.h
+++ b/storage/tokudb/PerconaFT/src/indexer-internal.h
@@ -42,7 +42,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include <toku_pthread.h>
// the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array.
-// the array is a resizeable array with max size "max_keys" and current size "current_keys".
+// the array is a resizable array with max size "max_keys" and current size "current_keys".
// the ordered set is used by the hotindex undo function to collect the commit keys.
struct indexer_commit_keys {
int max_keys; // max number of keys
diff --git a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
index 8d0b080b9fe..4c7f5336161 100644
--- a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
+++ b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
@@ -528,7 +528,7 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u
}
// inject "delete" message into ft with logging in recovery and rollback logs,
-// and making assocation between txn and ft
+// and making association between txn and ft
static int
indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn) {
int result = 0;
@@ -577,7 +577,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
}
// inject "insert" message into ft with logging in recovery and rollback logs,
-// and making assocation between txn and ft
+// and making association between txn and ft
static int
indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn) {
int result = 0;
diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
index 20df13923e6..7cce68e6ff8 100644
--- a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
+++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
@@ -1,3 +1,3 @@
-# commited insert
+# committed insert
key k1
insert committed 0 v100
diff --git a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
index 3f2f8d7455a..aaf77c503cc 100644
--- a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
+++ b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
@@ -51,7 +51,7 @@ int DISALLOW_PUTS=0;
int COMPRESS=0;
enum {MAGIC=311};
-bool dup_row_at_end = false; // false: duplicate at the begining. true: duplicate at the end. The duplicated row is row 0.
+bool dup_row_at_end = false; // false: duplicate at the beginning. true: duplicate at the end. The duplicated row is row 0.
int dup_row_id = 0; // 0 means to use row 1 if inserting at the end, row NUM_ROWS if inserting at the beginning. Otherwise insert the row specified here.
//
diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
index a4dc0ea9236..2c905c5ff12 100644
--- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
+++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
@@ -156,7 +156,7 @@ do_args(int argc, char * const argv[]) {
choices[i] = -1;
}
- char c;
+ int c;
while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:X:")) != -1) {
switch(c) {
case 'v':
diff --git a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
index a2b48e443cd..48843a0bd32 100644
--- a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
+++ b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
@@ -166,7 +166,7 @@ run_test (void) {
DB_BTREE_STAT64 s;
r = db->stat64(db, NULL, &s); CKERR(r);
- assert(s.bt_nkeys == 0);
+ assert(s.bt_nkeys == 1);
r = db->close(db, 0); CKERR(r);
@@ -176,7 +176,7 @@ run_test (void) {
r = txn->commit(txn, 0); CKERR(r);
r = db->stat64(db, NULL, &s); CKERR(r);
- assert(s.bt_nkeys == 0);
+ assert(s.bt_nkeys == 1);
}
// verify update callback overwrites the row
diff --git a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
index 8e5109cd2a9..f6111d4b67c 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
@@ -78,7 +78,7 @@ static void test_insert_many_gc(void) {
// from having an MVCC stack of size 'N'. At the time of this
// writing, we run full GC on leaf-inject when the leaf is
// 32mb or larger. A good invariant is that the max LE size
- // never grew larger than 35mb and that the max commited xr stack
+ // never grew larger than 35mb and that the max committed xr stack
// length never exceeded 35
const uint64_t le_max_memsize = get_engine_status_val(env, "LE_MAX_MEMSIZE");
const uint64_t le_max_committed_xr = get_engine_status_val(env, "LE_MAX_COMMITTED_XR");
diff --git a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
index aaafe284906..88140dd1731 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
@@ -53,7 +53,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// This test is a micro stress test that does multithreaded updates on a fixed size table.
// There is also a thread that scans the table with bulk fetch, ensuring the sum is zero.
//
-// This test is targetted at stressing the locktree, hence the small table and many update threads.
+// This test is targeted at stressing the locktree, hence the small table and many update threads.
//
static int UU() lock_escalation_op(DB_TXN *UU(txn), ARG arg, void* operation_extra, void *UU(stats_extra)) {
diff --git a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
index fec454b8009..301eed1560e 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
@@ -123,7 +123,8 @@ test_main(int argc, char *const argv[]) {
continue;
}
}
- if (verbose>0) printf("%s", __FILE__); if (verbose>1) printf("\n");
+ if (verbose>0) printf("%s", __FILE__);
+ if (verbose>1) printf("\n");
for (i=1; i<100; i++)
test_txn_abort(i);
if (verbose>1) printf("%s OK\n", __FILE__);
diff --git a/storage/tokudb/PerconaFT/src/ydb-internal.h b/storage/tokudb/PerconaFT/src/ydb-internal.h
index 462a2a3d861..2d6c84126e1 100644
--- a/storage/tokudb/PerconaFT/src/ydb-internal.h
+++ b/storage/tokudb/PerconaFT/src/ydb-internal.h
@@ -114,7 +114,7 @@ struct __toku_db_env_internal {
char *real_data_dir; // data dir used when the env is opened (relative to cwd, or absolute with leading /)
char *real_log_dir; // log dir used when the env is opened (relative to cwd, or absolute with leading /)
- char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absoulte with leading /)
+ char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absolute with leading /)
fs_redzone_state fs_state;
uint64_t fs_seq; // how many times has fs_poller run?
diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
index da833146088..7501b1bee01 100644
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
@@ -1,10 +1,10 @@
#! /bin/sh
# Attempt to guess a canonical system name.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
-# Free Software Foundation, Inc.
+# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+# 2011, 2012 Free Software Foundation, Inc.
-timestamp='2009-04-27'
+timestamp='2016-06-22'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@ timestamp='2009-04-27'
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -27,16 +25,16 @@ timestamp='2009-04-27'
# the same distribution terms that you use for the rest of that program.
-# Originally written by Per Bothner <per@bothner.com>.
-# Please send patches to <config-patches@gnu.org>. Submit a context
-# diff and a properly formatted ChangeLog entry.
+# Originally written by Per Bothner. Please send patches (context
+# diff format) to <config-patches@gnu.org> and include a ChangeLog
+# entry.
#
# This script attempts to guess a canonical system name similar to
# config.sub. If it succeeds, it prints the system name on stdout, and
# exits with 0. Otherwise, it exits with 1.
#
-# The plan is that this can be called by configure scripts if you
-# don't specify an explicit build system type.
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
me=`echo "$0" | sed -e 's,.*/,,'`
@@ -56,8 +54,9 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -144,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
*:NetBSD:*:*)
# NetBSD (nbsd) targets should (where applicable) match one or
- # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+ # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
# *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
# switched to ELF, *-*-netbsd* would select the old
# object file format. This provides both forward
@@ -170,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
eval $set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
- | grep __ELF__ >/dev/null
+ | grep -q __ELF__
then
# Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
# Return netbsd for either. FIX?
@@ -180,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
fi
;;
*)
- os=netbsd
+ os=netbsd
;;
esac
# The OS release
@@ -223,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
;;
*5.*)
- UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
;;
esac
# According to Compaq, /usr/sbin/psrinfo has been available on
@@ -269,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# A Xn.n version is an unreleased experimental baselevel.
# 1.2 uses "1.2" for uname -r.
echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
- exit ;;
+ # Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+ exitcode=$?
+ trap '' 0
+ exit $exitcode ;;
Alpha\ *:Windows_NT*:*)
# How do we know it's Interix rather than the generic POSIX subsystem?
# Should we change UNAME_MACHINE based on the output of uname instead
@@ -295,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
echo s390-ibm-zvmoe
exit ;;
*:OS400:*:*)
- echo powerpc-ibm-os400
+ echo powerpc-ibm-os400
exit ;;
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
echo arm-acorn-riscix${UNAME_RELEASE}
@@ -333,6 +335,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
+ i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+ echo i386-pc-auroraux${UNAME_RELEASE}
+ exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
eval $set_cc_for_build
SUN_ARCH="i386"
@@ -391,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# MiNT. But MiNT is downward compatible to TOS, so this should
# be no problem.
atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
+ echo m68k-atari-mint${UNAME_RELEASE}
exit ;;
atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
echo m68k-atari-mint${UNAME_RELEASE}
- exit ;;
+ exit ;;
*falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
+ echo m68k-atari-mint${UNAME_RELEASE}
exit ;;
milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
- echo m68k-milan-mint${UNAME_RELEASE}
- exit ;;
+ echo m68k-milan-mint${UNAME_RELEASE}
+ exit ;;
hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
- echo m68k-hades-mint${UNAME_RELEASE}
- exit ;;
+ echo m68k-hades-mint${UNAME_RELEASE}
+ exit ;;
*:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
- echo m68k-unknown-mint${UNAME_RELEASE}
- exit ;;
+ echo m68k-unknown-mint${UNAME_RELEASE}
+ exit ;;
m68k:machten:*:*)
echo m68k-apple-machten${UNAME_RELEASE}
exit ;;
@@ -477,8 +482,8 @@ EOF
echo m88k-motorola-sysv3
exit ;;
AViiON:dgux:*:*)
- # DG/UX returns AViiON for all architectures
- UNAME_PROCESSOR=`/usr/bin/uname -p`
+ # DG/UX returns AViiON for all architectures
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
then
if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@@ -491,7 +496,7 @@ EOF
else
echo i586-dg-dgux${UNAME_RELEASE}
fi
- exit ;;
+ exit ;;
M88*:DolphinOS:*:*) # DolphinOS (SVR3)
echo m88k-dolphin-sysv3
exit ;;
@@ -548,7 +553,7 @@ EOF
echo rs6000-ibm-aix3.2
fi
exit ;;
- *:AIX:*:[456])
+ *:AIX:*:[4567])
IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
IBM_ARCH=rs6000
@@ -591,52 +596,52 @@ EOF
9000/[678][0-9][0-9])
if [ -x /usr/bin/getconf ]; then
sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
- sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
- case "${sc_cpu_version}" in
- 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
- 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
- 532) # CPU_PA_RISC2_0
- case "${sc_kernel_bits}" in
- 32) HP_ARCH="hppa2.0n" ;;
- 64) HP_ARCH="hppa2.0w" ;;
+ sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+ case "${sc_cpu_version}" in
+ 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+ 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+ 532) # CPU_PA_RISC2_0
+ case "${sc_kernel_bits}" in
+ 32) HP_ARCH="hppa2.0n" ;;
+ 64) HP_ARCH="hppa2.0w" ;;
'') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
- esac ;;
- esac
+ esac ;;
+ esac
fi
if [ "${HP_ARCH}" = "" ]; then
eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
+ sed 's/^ //' << EOF >$dummy.c
- #define _HPUX_SOURCE
- #include <stdlib.h>
- #include <unistd.h>
+ #define _HPUX_SOURCE
+ #include <stdlib.h>
+ #include <unistd.h>
- int main ()
- {
- #if defined(_SC_KERNEL_BITS)
- long bits = sysconf(_SC_KERNEL_BITS);
- #endif
- long cpu = sysconf (_SC_CPU_VERSION);
+ int main ()
+ {
+ #if defined(_SC_KERNEL_BITS)
+ long bits = sysconf(_SC_KERNEL_BITS);
+ #endif
+ long cpu = sysconf (_SC_CPU_VERSION);
- switch (cpu)
- {
- case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
- case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
- case CPU_PA_RISC2_0:
- #if defined(_SC_KERNEL_BITS)
- switch (bits)
- {
- case 64: puts ("hppa2.0w"); break;
- case 32: puts ("hppa2.0n"); break;
- default: puts ("hppa2.0"); break;
- } break;
- #else /* !defined(_SC_KERNEL_BITS) */
- puts ("hppa2.0"); break;
- #endif
- default: puts ("hppa1.0"); break;
- }
- exit (0);
- }
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+ case CPU_PA_RISC2_0:
+ #if defined(_SC_KERNEL_BITS)
+ switch (bits)
+ {
+ case 64: puts ("hppa2.0w"); break;
+ case 32: puts ("hppa2.0n"); break;
+ default: puts ("hppa2.0"); break;
+ } break;
+ #else /* !defined(_SC_KERNEL_BITS) */
+ puts ("hppa2.0"); break;
+ #endif
+ default: puts ("hppa1.0"); break;
+ }
+ exit (0);
+ }
EOF
(CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
test -z "$HP_ARCH" && HP_ARCH=hppa
@@ -656,7 +661,7 @@ EOF
# => hppa64-hp-hpux11.23
if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
- grep __LP64__ >/dev/null
+ grep -q __LP64__
then
HP_ARCH="hppa2.0w"
else
@@ -727,22 +732,22 @@ EOF
exit ;;
C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
echo c1-convex-bsd
- exit ;;
+ exit ;;
C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
if getsysinfo -f scalar_acc
then echo c32-convex-bsd
else echo c2-convex-bsd
fi
- exit ;;
+ exit ;;
C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
echo c34-convex-bsd
- exit ;;
+ exit ;;
C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
echo c38-convex-bsd
- exit ;;
+ exit ;;
C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
echo c4-convex-bsd
- exit ;;
+ exit ;;
CRAY*Y-MP:*:*:*)
echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;;
@@ -766,14 +771,14 @@ EOF
exit ;;
F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
- echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
- exit ;;
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+ echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ exit ;;
5000:UNIX_System_V:4.*:*)
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
- echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+ echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
@@ -785,13 +790,12 @@ EOF
echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
exit ;;
*:FreeBSD:*:*)
- case ${UNAME_MACHINE} in
- pc98)
- echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+ case ${UNAME_PROCESSOR} in
amd64)
echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
*)
- echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
esac
exit ;;
i*:CYGWIN*:*)
@@ -800,19 +804,22 @@ EOF
*:MINGW*:*)
echo ${UNAME_MACHINE}-pc-mingw32
exit ;;
+ i*:MSYS*:*)
+ echo ${UNAME_MACHINE}-pc-msys
+ exit ;;
i*:windows32*:*)
- # uname -m includes "-pc" on this system.
- echo ${UNAME_MACHINE}-mingw32
+ # uname -m includes "-pc" on this system.
+ echo ${UNAME_MACHINE}-mingw32
exit ;;
i*:PW*:*)
echo ${UNAME_MACHINE}-pc-pw32
exit ;;
- *:Interix*:[3456]*)
- case ${UNAME_MACHINE} in
+ *:Interix*:*)
+ case ${UNAME_MACHINE} in
x86)
echo i586-pc-interix${UNAME_RELEASE}
exit ;;
- EM64T | authenticamd | genuineintel)
+ authenticamd | genuineintel | EM64T)
echo x86_64-unknown-interix${UNAME_RELEASE}
exit ;;
IA64)
@@ -822,6 +829,9 @@ EOF
[345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
echo i${UNAME_MACHINE}-pc-mks
exit ;;
+ 8664:Windows_NT:*)
+ echo x86_64-pc-mks
+ exit ;;
i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
# How do we know it's Interix rather than the generic POSIX subsystem?
# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
@@ -851,6 +861,27 @@ EOF
i*86:Minix:*:*)
echo ${UNAME_MACHINE}-pc-minix
exit ;;
+ aarch64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ aarch64_be:Linux:*:*)
+ UNAME_MACHINE=aarch64_be
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ alpha:Linux:*:*)
+ case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ EV5) UNAME_MACHINE=alphaev5 ;;
+ EV56) UNAME_MACHINE=alphaev56 ;;
+ PCA56) UNAME_MACHINE=alphapca56 ;;
+ PCA57) UNAME_MACHINE=alphapca56 ;;
+ EV6) UNAME_MACHINE=alphaev6 ;;
+ EV67) UNAME_MACHINE=alphaev67 ;;
+ EV68*) UNAME_MACHINE=alphaev68 ;;
+ esac
+ objdump --private-headers /bin/sh | grep -q ld.so.1
+ if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+ echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+ exit ;;
arm*:Linux:*:*)
eval $set_cc_for_build
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
@@ -858,20 +889,40 @@ EOF
then
echo ${UNAME_MACHINE}-unknown-linux-gnu
else
- echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+ if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+ | grep -q __ARM_PCS_VFP
+ then
+ echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+ else
+ echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+ fi
fi
exit ;;
avr32*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
cris:Linux:*:*)
- echo cris-axis-linux-gnu
+ echo ${UNAME_MACHINE}-axis-linux-gnu
exit ;;
crisv32:Linux:*:*)
- echo crisv32-axis-linux-gnu
+ echo ${UNAME_MACHINE}-axis-linux-gnu
exit ;;
frv:Linux:*:*)
- echo frv-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ hexagon:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ i*86:Linux:*:*)
+ LIBC=gnu
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #ifdef __dietlibc__
+ LIBC=dietlibc
+ #endif
+EOF
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+ echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
exit ;;
ia64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -882,78 +933,34 @@ EOF
m68*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
- mips:Linux:*:*)
+ mips:Linux:*:* | mips64:Linux:*:*)
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#undef CPU
- #undef mips
- #undef mipsel
+ #undef ${UNAME_MACHINE}
+ #undef ${UNAME_MACHINE}el
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
- CPU=mipsel
+ CPU=${UNAME_MACHINE}el
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
- CPU=mips
+ CPU=${UNAME_MACHINE}
#else
CPU=
#endif
#endif
EOF
- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
- /^CPU/{
- s: ::g
- p
- }'`"
- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
- ;;
- mips64:Linux:*:*)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #undef CPU
- #undef mips64
- #undef mips64el
- #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
- CPU=mips64el
- #else
- #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
- CPU=mips64
- #else
- CPU=
- #endif
- #endif
-EOF
- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
- /^CPU/{
- s: ::g
- p
- }'`"
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
;;
or32:Linux:*:*)
- echo or32-unknown-linux-gnu
- exit ;;
- ppc:Linux:*:*)
- echo powerpc-unknown-linux-gnu
- exit ;;
- ppc64:Linux:*:*)
- echo powerpc64-unknown-linux-gnu
- exit ;;
- alpha:Linux:*:*)
- case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
- EV5) UNAME_MACHINE=alphaev5 ;;
- EV56) UNAME_MACHINE=alphaev56 ;;
- PCA56) UNAME_MACHINE=alphapca56 ;;
- PCA57) UNAME_MACHINE=alphapca56 ;;
- EV6) UNAME_MACHINE=alphaev6 ;;
- EV67) UNAME_MACHINE=alphaev67 ;;
- EV68*) UNAME_MACHINE=alphaev68 ;;
- esac
- objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
padre:Linux:*:*)
echo sparc-unknown-linux-gnu
exit ;;
+ parisc64:Linux:*:* | hppa64:Linux:*:*)
+ echo hppa64-unknown-linux-gnu
+ exit ;;
parisc:Linux:*:* | hppa:Linux:*:*)
# Look for CPU level
case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
@@ -962,14 +969,17 @@ EOF
*) echo hppa-unknown-linux-gnu ;;
esac
exit ;;
- parisc64:Linux:*:* | hppa64:Linux:*:*)
- echo hppa64-unknown-linux-gnu
+ ppc64:Linux:*:*)
+ echo powerpc64-unknown-linux-gnu
+ exit ;;
+ ppc:Linux:*:*)
+ echo powerpc-unknown-linux-gnu
exit ;;
s390:Linux:*:* | s390x:Linux:*:*)
echo ${UNAME_MACHINE}-ibm-linux
exit ;;
sh64*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
sh*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -977,75 +987,18 @@ EOF
sparc:Linux:*:* | sparc64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
+ tile*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
vax:Linux:*:*)
echo ${UNAME_MACHINE}-dec-linux-gnu
exit ;;
x86_64:Linux:*:*)
- echo x86_64-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
xtensa*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
- i*86:Linux:*:*)
- # The BFD linker knows what the default object file format is, so
- # first see if it will tell us. cd to the root directory to prevent
- # problems with other programs or directories called `ld' in the path.
- # Set LC_ALL=C to ensure ld outputs messages in English.
- ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
- | sed -ne '/supported targets:/!d
- s/[ ][ ]*/ /g
- s/.*supported targets: *//
- s/ .*//
- p'`
- case "$ld_supported_targets" in
- elf32-i386)
- TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
- ;;
- a.out-i386-linux)
- echo "${UNAME_MACHINE}-pc-linux-gnuaout"
- exit ;;
- "")
- # Either a pre-BFD a.out linker (linux-gnuoldld) or
- # one that does not give us useful --help.
- echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
- exit ;;
- esac
- # Determine whether the default compiler is a.out or elf
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #include <features.h>
- #ifdef __ELF__
- # ifdef __GLIBC__
- # if __GLIBC__ >= 2
- LIBC=gnu
- # else
- LIBC=gnulibc1
- # endif
- # else
- LIBC=gnulibc1
- # endif
- #else
- #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
- LIBC=gnu
- #else
- LIBC=gnuaout
- #endif
- #endif
- #ifdef __dietlibc__
- LIBC=dietlibc
- #endif
-EOF
- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
- /^LIBC/{
- s: ::g
- p
- }'`"
- test x"${LIBC}" != x && {
- echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
- exit
- }
- test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
- ;;
i*86:DYNIX/ptx:4*:*)
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
# earlier versions are messed up and put the nodename in both
@@ -1053,11 +1006,11 @@ EOF
echo i386-sequent-sysv4
exit ;;
i*86:UNIX_SV:4.2MP:2.*)
- # Unixware is an offshoot of SVR4, but it has its own version
- # number series starting with 2...
- # I am not positive that other SVR4 systems won't match this,
+ # Unixware is an offshoot of SVR4, but it has its own version
+ # number series starting with 2...
+ # I am not positive that other SVR4 systems won't match this,
# I just have to hope. -- rms.
- # Use sysv4.2uw... so that sysv4* matches it.
+ # Use sysv4.2uw... so that sysv4* matches it.
echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
exit ;;
i*86:OS/2:*:*)
@@ -1074,7 +1027,7 @@ EOF
i*86:syllable:*:*)
echo ${UNAME_MACHINE}-pc-syllable
exit ;;
- i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+ i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
echo i386-unknown-lynxos${UNAME_RELEASE}
exit ;;
i*86:*DOS:*:*)
@@ -1089,7 +1042,7 @@ EOF
fi
exit ;;
i*86:*:5:[678]*)
- # UnixWare 7.x, OpenUNIX and OpenServer 6.
+ # UnixWare 7.x, OpenUNIX and OpenServer 6.
case `/bin/uname -X | grep "^Machine"` in
*486*) UNAME_MACHINE=i486 ;;
*Pentium) UNAME_MACHINE=i586 ;;
@@ -1117,13 +1070,13 @@ EOF
exit ;;
pc:*:*:*)
# Left here for compatibility:
- # uname -m prints for DJGPP always 'pc', but it prints nothing about
- # the processor, so we play safe by assuming i586.
+ # uname -m prints for DJGPP always 'pc', but it prints nothing about
+ # the processor, so we play safe by assuming i586.
# Note: whatever this is, it MUST be the same as what config.sub
# prints for the "djgpp" host, or else GDB configury will decide that
# this is a cross-build.
echo i586-pc-msdosdjgpp
- exit ;;
+ exit ;;
Intel:Mach:3*:*)
echo i386-pc-mach3
exit ;;
@@ -1158,8 +1111,8 @@ EOF
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
- /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4; exit; } ;;
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && { echo i486-ncr-sysv4; exit; } ;;
NCR*:*:4.2:* | MPRAS*:*:4.2:*)
OS_REL='.3'
test -r /etc/.relid \
@@ -1182,7 +1135,7 @@ EOF
rs6000:LynxOS:2.*:*)
echo rs6000-unknown-lynxos${UNAME_RELEASE}
exit ;;
- PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+ PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
echo powerpc-unknown-lynxos${UNAME_RELEASE}
exit ;;
SM[BE]S:UNIX_SV:*:*)
@@ -1202,10 +1155,10 @@ EOF
echo ns32k-sni-sysv
fi
exit ;;
- PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
- # says <Richard.M.Bartel@ccMail.Census.GOV>
- echo i586-unisys-sysv4
- exit ;;
+ PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+ # says <Richard.M.Bartel@ccMail.Census.GOV>
+ echo i586-unisys-sysv4
+ exit ;;
*:UNIX_System_V:4*:FTX*)
# From Gerald Hewes <hewes@openmarket.com>.
# How about differentiating between stratus architectures? -djm
@@ -1231,11 +1184,11 @@ EOF
exit ;;
R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
if [ -d /usr/nec ]; then
- echo mips-nec-sysv${UNAME_RELEASE}
+ echo mips-nec-sysv${UNAME_RELEASE}
else
- echo mips-unknown-sysv${UNAME_RELEASE}
+ echo mips-unknown-sysv${UNAME_RELEASE}
fi
- exit ;;
+ exit ;;
BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
echo powerpc-be-beos
exit ;;
@@ -1275,6 +1228,16 @@ EOF
*:Darwin:*:*)
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
case $UNAME_PROCESSOR in
+ i386)
+ eval $set_cc_for_build
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_64BIT_ARCH >/dev/null
+ then
+ UNAME_PROCESSOR="x86_64"
+ fi
+ fi ;;
unknown) UNAME_PROCESSOR=powerpc ;;
esac
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
@@ -1290,6 +1253,9 @@ EOF
*:QNX:*:4*)
echo i386-pc-qnx
exit ;;
+ NEO-?:NONSTOP_KERNEL:*:*)
+ echo neo-tandem-nsk${UNAME_RELEASE}
+ exit ;;
NSE-?:NONSTOP_KERNEL:*:*)
echo nse-tandem-nsk${UNAME_RELEASE}
exit ;;
@@ -1335,13 +1301,13 @@ EOF
echo pdp10-unknown-its
exit ;;
SEI:*:*:SEIUX)
- echo mips-sei-seiux${UNAME_RELEASE}
+ echo mips-sei-seiux${UNAME_RELEASE}
exit ;;
*:DragonFly:*:*)
echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
exit ;;
*:*VMS:*:*)
- UNAME_MACHINE=`(uname -p) 2>/dev/null`
+ UNAME_MACHINE=`(uname -p) 2>/dev/null`
case "${UNAME_MACHINE}" in
A*) echo alpha-dec-vms ; exit ;;
I*) echo ia64-dec-vms ; exit ;;
@@ -1359,6 +1325,9 @@ EOF
i*86:AROS:*:*)
echo ${UNAME_MACHINE}-pc-aros
exit ;;
+ x86_64:VMkernel:*:*)
+ echo ${UNAME_MACHINE}-unknown-esx
+ exit ;;
esac
#echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1381,11 +1350,11 @@ main ()
#include <sys/param.h>
printf ("m68k-sony-newsos%s\n",
#ifdef NEWSOS4
- "4"
+ "4"
#else
- ""
+ ""
#endif
- ); exit (0);
+ ); exit (0);
#endif
#endif
diff --git a/storage/tokudb/PerconaFT/tools/CMakeLists.txt b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
index af82b4357d2..f11b9f350d7 100644
--- a/storage/tokudb/PerconaFT/tools/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
@@ -1,6 +1,6 @@
set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO)
-set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify ba_replay)
+set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify)
foreach(tool ${tools})
add_executable(${tool} ${tool}.cc)
add_dependencies(${tool} install_tdb_h)
@@ -14,4 +14,3 @@ target_link_libraries(ftverify m)
install(TARGETS tokuftdump DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
install(TARGETS tokuft_logprint DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
-
diff --git a/storage/tokudb/PerconaFT/tools/ba_replay.cc b/storage/tokudb/PerconaFT/tools/ba_replay.cc
deleted file mode 100644
index cade7e5dfaf..00000000000
--- a/storage/tokudb/PerconaFT/tools/ba_replay.cc
+++ /dev/null
@@ -1,629 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License, version 2,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License, version 3,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-// Replay a block allocator trace against different strategies and compare
-// the results
-
-#include <db.h>
-
-#include <getopt.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <map>
-#include <set>
-#include <string>
-#include <sstream>
-#include <vector>
-
-#include <portability/memory.h>
-#include <portability/toku_assert.h>
-#include <portability/toku_stdlib.h>
-
-#include "ft/serialize/block_allocator.h"
-
-using std::map;
-using std::set;
-using std::string;
-using std::vector;
-
-static int verbose = false;
-
-static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
- if (!pred) {
- fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line);
- abort();
- }
-}
-
-static char *trim_whitespace(char *line) {
- // skip leading whitespace
- while (isspace(*line)) {
- line++;
- }
- return line;
-}
-
-static int64_t parse_number(char **ptr, int line_num, int base) {
- *ptr = trim_whitespace(*ptr);
- char *line = *ptr;
-
- char *new_ptr;
- int64_t n = strtoll(line, &new_ptr, base);
- ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num);
- ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num);
- *ptr = new_ptr;
- return n;
-}
-
-static uint64_t parse_uint64(char **ptr, int line_num) {
- int64_t n = parse_number(ptr, line_num, 10);
- // we happen to know that the uint64's we deal with will
- // take less than 63 bits (they come from pointers)
- return static_cast<uint64_t>(n);
-}
-
-static string parse_token(char **ptr, int line_num) {
- *ptr = trim_whitespace(*ptr);
- char *line = *ptr;
-
- // parse the first token, which represents the traced function
- char token[64];
- int r = sscanf(*ptr, "%64s", token);
- ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num);
- *ptr += strlen(token);
- return string(token);
-}
-
-static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
- *ptr = trim_whitespace(*ptr);
- char *line = *ptr;
-
- uint64_t offset, size;
- int bytes_read;
- int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read);
- ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num);
- *ptr += bytes_read;
- return block_allocator::blockpair(offset, size);
-}
-
-static char *strip_newline(char *line, bool *found) {
- char *ptr = strchr(line, '\n');
- if (ptr != nullptr) {
- if (found != nullptr) {
- *found = true;
- }
- *ptr = '\0';
- }
- return line;
-}
-
-static char *read_trace_line(FILE *file) {
- const int buf_size = 4096;
- char buf[buf_size];
- std::stringstream ss;
- while (true) {
- if (fgets(buf, buf_size, file) == nullptr) {
- break;
- }
- bool has_newline = false;
- ss << strip_newline(buf, &has_newline);
- if (has_newline) {
- // end of the line, we're done out
- break;
- }
- }
- std::string s = ss.str();
- return s.size() ? toku_strdup(s.c_str()) : nullptr;
-}
-
-static vector<string> canonicalize_trace_from(FILE *file) {
- // new trace, canonicalized from a raw trace
- vector<string> canonicalized_trace;
-
- // raw allocator id -> canonical allocator id
- //
- // keeps track of allocators that were created as part of the trace,
- // and therefore will be part of the canonicalized trace.
- uint64_t allocator_id_seq_num = 0;
- map<uint64_t, uint64_t> allocator_ids;
-
- // allocated offset -> allocation seq num
- //
- uint64_t allocation_seq_num = 0;
- static const uint64_t ASN_NONE = (uint64_t) -1;
- typedef map<uint64_t, uint64_t> offset_seq_map;
-
- // raw allocator id -> offset_seq_map that tracks its allocations
- map<uint64_t, offset_seq_map> offset_to_seq_num_maps;
-
- int line_num = 0;
- char *line;
- while ((line = read_trace_line(file)) != nullptr) {
- line_num++;
- char *ptr = line;
-
- string fn = parse_token(&ptr, line_num);
- int64_t allocator_id = parse_number(&ptr, line_num, 16);
-
- std::stringstream ss;
- if (fn.find("ba_trace_create") != string::npos) {
- ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
- ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs",
- "corrupted trace: bad fn", line, line_num);
-
- // we only convert the allocator_id to an allocator_id_seq_num
- // in the canonical trace and leave the rest of the line as-is.
- allocator_ids[allocator_id] = allocator_id_seq_num;
- ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl;
- allocator_id_seq_num++;
-
- // First, read passed the reserve / alignment values.
- (void) parse_uint64(&ptr, line_num);
- (void) parse_uint64(&ptr, line_num);
- if (fn == "ba_trace_create_from_blockpairs") {
- // For each blockpair created by this traceline, add its offset to the offset seq map
- // with asn ASN_NONE so that later canonicalizations of `free' know whether to write
- // down the asn or the raw offset.
- offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
- while (*trim_whitespace(ptr) != '\0') {
- const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
- (*map)[bp.offset] = ASN_NONE;
- }
- }
- } else {
- ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num);
- uint64_t canonical_allocator_id = allocator_ids[allocator_id];
-
- // this is the map that tracks allocations for this allocator
- offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
-
- if (fn == "ba_trace_alloc") {
- const uint64_t size = parse_uint64(&ptr, line_num);
- const uint64_t heat = parse_uint64(&ptr, line_num);
- const uint64_t offset = parse_uint64(&ptr, line_num);
- ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num);
-
- // remember that an allocation at `offset' has the current alloc seq num
- (*map)[offset] = allocation_seq_num;
-
- // translate `offset = alloc(size)' to `asn = alloc(size)'
- ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl;
- allocation_seq_num++;
- } else if (fn == "ba_trace_free") {
- const uint64_t offset = parse_uint64(&ptr, line_num);
- ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num);
-
- // get the alloc seq num for an allcation that occurred at `offset'
- const uint64_t asn = (*map)[offset];
- map->erase(offset);
-
- // if there's an asn, then a corresponding ba_trace_alloc occurred and we should
- // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs
- // and we write the original offset.
- if (asn != ASN_NONE) {
- ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
- } else {
- ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl;
- }
- } else if (fn == "ba_trace_destroy") {
- // Remove this allocator from both maps
- allocator_ids.erase(allocator_id);
- offset_to_seq_num_maps.erase(allocator_id);
-
- // translate `destroy(ptr_id) to destroy(canonical_id)'
- ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl;
- } else {
- ba_replay_assert(false, "corrupted trace: bad fn", line, line_num);
- }
- }
- canonicalized_trace.push_back(ss.str());
-
- toku_free(line);
- }
-
- if (allocator_ids.size() != 0) {
- fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running");
- }
-
- return canonicalized_trace;
-}
-
-struct streaming_variance_calculator {
- int64_t n_samples;
- int64_t mean;
- int64_t variance;
-
- // math credit: AoCP, Donald Knuth, '62
- void add_sample(int64_t x) {
- n_samples++;
- if (n_samples == 1) {
- mean = x;
- variance = 0;
- } else {
- int64_t old_mean = mean;
- mean = old_mean + ((x - old_mean) / n_samples);
- variance = (((n_samples - 1) * variance) +
- ((x - old_mean) * (x - mean))) / n_samples;
- }
- }
-};
-
-struct canonical_trace_stats {
- uint64_t n_lines_replayed;
-
- uint64_t n_create;
- uint64_t n_create_from_blockpairs;
- uint64_t n_alloc_hot;
- uint64_t n_alloc_cold;
- uint64_t n_free;
- uint64_t n_destroy;
-
- struct streaming_variance_calculator alloc_hot_bytes;
- struct streaming_variance_calculator alloc_cold_bytes;
-
- canonical_trace_stats() {
- memset(this, 0, sizeof(*this));
- }
-};
-
-struct fragmentation_report {
- TOKU_DB_FRAGMENTATION_S beginning;
- TOKU_DB_FRAGMENTATION_S end;
- fragmentation_report() {
- memset(this, 0, sizeof(*this));
- }
- void merge(const struct fragmentation_report &src_report) {
- for (int i = 0; i < 2; i++) {
- TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end;
- const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end;
- dst->file_size_bytes += src->file_size_bytes;
- dst->data_bytes += src->data_bytes;
- dst->data_blocks += src->data_blocks;
- dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional;
- dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional;
- dst->unused_bytes += src->unused_bytes;
- dst->unused_blocks += src->unused_blocks;
- dst->largest_unused_block += src->largest_unused_block;
- }
- }
-};
-
-static void replay_canonicalized_trace(const vector<string> &canonicalized_trace,
- block_allocator::allocation_strategy strategy,
- map<uint64_t, struct fragmentation_report> *reports,
- struct canonical_trace_stats *stats) {
- // maps an allocator id to its block allocator
- map<uint64_t, block_allocator *> allocator_map;
-
- // maps allocation seq num to allocated offset
- map<uint64_t, uint64_t> seq_num_to_offset;
-
- for (vector<string>::const_iterator it = canonicalized_trace.begin();
- it != canonicalized_trace.end(); it++) {
- const int line_num = stats->n_lines_replayed++;
-
- char *line = toku_strdup(it->c_str());
- line = strip_newline(line, nullptr);
-
- char *ptr = trim_whitespace(line);
-
- // canonical allocator id is in base 10, not 16
- string fn = parse_token(&ptr, line_num);
- int64_t allocator_id = parse_number(&ptr, line_num, 10);
-
- if (fn.find("ba_trace_create") != string::npos) {
- const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num);
- const uint64_t alignment = parse_uint64(&ptr, line_num);
- ba_replay_assert(allocator_map.count(allocator_id) == 0,
- "corrupted canonical trace: double create", line, line_num);
-
- block_allocator *ba = new block_allocator();
- if (fn == "ba_trace_create") {
- ba->create(reserve_at_beginning, alignment);
- stats->n_create++;
- } else {
- ba_replay_assert(fn == "ba_trace_create_from_blockpairs",
- "corrupted canonical trace: bad create fn", line, line_num);
- vector<block_allocator::blockpair> pairs;
- while (*trim_whitespace(ptr) != '\0') {
- const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
- pairs.push_back(bp);
- }
- ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size());
- stats->n_create_from_blockpairs++;
- }
- ba->set_strategy(strategy);
-
- TOKU_DB_FRAGMENTATION_S report;
- ba->get_statistics(&report);
- (*reports)[allocator_id].beginning = report;
- allocator_map[allocator_id] = ba;
- } else {
- ba_replay_assert(allocator_map.count(allocator_id) > 0,
- "corrupted canonical trace: no such allocator", line, line_num);
-
- block_allocator *ba = allocator_map[allocator_id];
- if (fn == "ba_trace_alloc") {
- // replay an `alloc' whose result will be associated with a certain asn
- const uint64_t size = parse_uint64(&ptr, line_num);
- const uint64_t heat = parse_uint64(&ptr, line_num);
- const uint64_t asn = parse_uint64(&ptr, line_num);
- ba_replay_assert(seq_num_to_offset.count(asn) == 0,
- "corrupted canonical trace: double alloc (asn in use)", line, line_num);
-
- uint64_t offset;
- ba->alloc_block(size, heat, &offset);
- seq_num_to_offset[asn] = offset;
- heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++;
- heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size);
- } else if (fn == "ba_trace_free_asn") {
- // replay a `free' on a block whose offset is the result of an alloc with an asn
- const uint64_t asn = parse_uint64(&ptr, line_num);
- ba_replay_assert(seq_num_to_offset.count(asn) == 1,
- "corrupted canonical trace: double free (asn unused)", line, line_num);
-
- const uint64_t offset = seq_num_to_offset[asn];
- ba->free_block(offset);
- seq_num_to_offset.erase(asn);
- stats->n_free++;
- } else if (fn == "ba_trace_free_offset") {
- // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs
- const uint64_t offset = parse_uint64(&ptr, line_num);
- ba->free_block(offset);
- stats->n_free++;
- } else if (fn == "ba_trace_destroy") {
- TOKU_DB_FRAGMENTATION_S report;
- ba->get_statistics(&report);
- ba->destroy();
- (*reports)[allocator_id].end = report;
- allocator_map.erase(allocator_id);
- stats->n_destroy++;
- } else {
- ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num);
- }
- }
-
- toku_free(line);
- }
-}
-
-static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) {
- switch (strategy) {
- case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT:
- return "first-fit";
- case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT:
- return "best-fit";
- case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE:
- return "heat-zone";
- case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT:
- return "padded-fit";
- default:
- abort();
- }
-}
-
-static block_allocator::allocation_strategy cstring_to_strategy(const char *str) {
- if (strcmp(str, "first-fit") == 0) {
- return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT;
- }
- if (strcmp(str, "best-fit") == 0) {
- return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT;
- }
- if (strcmp(str, "heat-zone") == 0) {
- return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE;
- }
- if (strcmp(str, "padded-fit") != 0) {
- fprintf(stderr, "bad strategy string: %s\n", str);
- abort();
- }
- return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT;
-}
-
-static void print_result_verbose(uint64_t allocator_id,
- block_allocator::allocation_strategy strategy,
- const struct fragmentation_report &report) {
- if (report.end.data_bytes + report.end.unused_bytes +
- report.beginning.data_bytes + report.beginning.unused_bytes
- < 32UL * 1024 * 1024) {
- printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
- return;
- }
-
- printf(" allocator_id: %20" PRId64 "\n", allocator_id);
- printf(" strategy: %20s\n", strategy_to_cstring(strategy));
-
- for (int i = 0; i < 2; i++) {
- const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end;
- printf("%s\n", i == 0 ? "BEFORE" : "AFTER");
-
- uint64_t total_bytes = r->data_bytes + r->unused_bytes;
- uint64_t total_blocks = r->data_blocks + r->unused_blocks;
-
- // byte statistics
- printf(" total bytes: %20" PRId64 "\n", total_bytes);
- printf(" used bytes: %20" PRId64 " (%.3lf)\n", r->data_bytes,
- static_cast<double>(r->data_bytes) / total_bytes);
- printf(" unused bytes: %20" PRId64 " (%.3lf)\n", r->unused_bytes,
- static_cast<double>(r->unused_bytes) / total_bytes);
-
- // block statistics
- printf(" total blocks: %20" PRId64 "\n", total_blocks);
- printf(" used blocks: %20" PRId64 " (%.3lf)\n", r->data_blocks,
- static_cast<double>(r->data_blocks) / total_blocks);
- printf(" unused blocks: %20" PRId64 " (%.3lf)\n", r->unused_blocks,
- static_cast<double>(r->unused_blocks) / total_blocks);
-
- // misc
- printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block);
- }
-}
-
-static void print_result(uint64_t allocator_id,
- block_allocator::allocation_strategy strategy,
- const struct fragmentation_report &report) {
- const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning;
- const TOKU_DB_FRAGMENTATION_S *end = &report.end;
-
- uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes;
- uint64_t total_end_bytes = end->data_bytes + end->unused_bytes;
- if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) {
- if (verbose) {
- printf("\n");
- printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
- }
- return;
- }
- printf("\n");
- if (verbose) {
- print_result_verbose(allocator_id, strategy, report);
- } else {
- printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n",
- strategy_to_cstring(strategy), allocator_id,
- static_cast<double>(report.end.data_bytes) / total_end_bytes,
- static_cast<double>(report.beginning.data_bytes) / total_beginning_bytes);
- }
-}
-
-static int only_aggregate_reports;
-
-static struct option getopt_options[] = {
- { "verbose", no_argument, &verbose, 1 },
- { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 },
- { "include-strategy", required_argument, nullptr, 'i' },
- { "exclude-strategy", required_argument, nullptr, 'x' },
- { nullptr, 0, nullptr, 0 },
-};
-
-int main(int argc, char *argv[]) {
- int opt;
- set<block_allocator::allocation_strategy> candidate_strategies, excluded_strategies;
- while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) {
- switch (opt) {
- case 0:
- break;
- case 'i':
- candidate_strategies.insert(cstring_to_strategy(optarg));
- break;
- case 'x':
- excluded_strategies.insert(cstring_to_strategy(optarg));
- break;
- case '?':
- default:
- abort();
- };
- }
- // Default to everything if nothing was explicitly included.
- if (candidate_strategies.empty()) {
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
- }
- // ..but remove anything that was explicitly excluded
- for (set<block_allocator::allocation_strategy>::const_iterator it = excluded_strategies.begin();
- it != excluded_strategies.end(); it++) {
- candidate_strategies.erase(*it);
- }
-
- // Run the real trace
- //
- // First, read the raw trace from stdin
- vector<string> canonicalized_trace = canonicalize_trace_from(stdin);
-
- if (!only_aggregate_reports) {
- printf("\n");
- printf("Individual reports, by allocator:\n");
- }
-
- struct canonical_trace_stats stats;
- map<block_allocator::allocation_strategy, struct fragmentation_report> reports_by_strategy;
- for (set<block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
- it != candidate_strategies.end(); it++) {
- const block_allocator::allocation_strategy strategy(*it);
-
- // replay the canonicalized trace against the current strategy.
- //
- // we provided the allocator map so we can gather statistics later
- struct canonical_trace_stats dummy_stats;
- map<uint64_t, struct fragmentation_report> reports;
- replay_canonicalized_trace(canonicalized_trace, strategy, &reports,
- // Only need to gather canonical trace stats once
- it == candidate_strategies.begin() ? &stats : &dummy_stats);
-
- struct fragmentation_report aggregate_report;
- memset(&aggregate_report, 0, sizeof(aggregate_report));
- for (map<uint64_t, struct fragmentation_report>::iterator rp = reports.begin();
- rp != reports.end(); rp++) {
- const struct fragmentation_report &report = rp->second;
- aggregate_report.merge(report);
- if (!only_aggregate_reports) {
- print_result(rp->first, strategy, report);
- }
- }
- reports_by_strategy[strategy] = aggregate_report;
- }
-
- printf("\n");
- printf("Aggregate reports, by strategy:\n");
-
- for (map<block_allocator::allocation_strategy, struct fragmentation_report>::iterator it = reports_by_strategy.begin();
- it != reports_by_strategy.end(); it++) {
- print_result(0, it->first, it->second);
- }
-
- printf("\n");
- printf("Overall trace stats:\n");
- printf("\n");
- printf(" n_lines_played: %15" PRIu64 "\n", stats.n_lines_replayed);
- printf(" n_create: %15" PRIu64 "\n", stats.n_create);
- printf(" n_create_from_blockpairs: %15" PRIu64 "\n", stats.n_create_from_blockpairs);
- printf(" n_alloc_hot: %15" PRIu64 "\n", stats.n_alloc_hot);
- printf(" n_alloc_cold: %15" PRIu64 "\n", stats.n_alloc_cold);
- printf(" n_free: %15" PRIu64 "\n", stats.n_free);
- printf(" n_destroy: %15" PRIu64 "\n", stats.n_destroy);
- printf("\n");
- printf(" avg_alloc_hot: %15" PRIu64 "\n", stats.alloc_hot_bytes.mean);
- printf(" stddev_alloc_hot: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance));
- printf(" avg_alloc_cold: %15" PRIu64 "\n", stats.alloc_cold_bytes.mean);
- printf(" stddev_alloc_cold: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance));
- printf("\n");
-
- return 0;
-}
diff --git a/storage/tokudb/PerconaFT/tools/ftverify.cc b/storage/tokudb/PerconaFT/tools/ftverify.cc
index 5920be8deda..2324249ba00 100644
--- a/storage/tokudb/PerconaFT/tools/ftverify.cc
+++ b/storage/tokudb/PerconaFT/tools/ftverify.cc
@@ -148,7 +148,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p)
}
}
{
- toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
r1 = deserialize_ft_from_fd_into_rbuf(
fd,
header_1_off,
diff --git a/storage/tokudb/PerconaFT/tools/tokuftdump.cc b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
index 23ef72218ac..f6d777b4161 100644
--- a/storage/tokudb/PerconaFT/tools/tokuftdump.cc
+++ b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
@@ -192,6 +192,7 @@ static void dump_header(FT ft) {
dump_descriptor(&ft->descriptor);
printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows);
printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes);
+ printf(" logical row count=%" PRId64 "\n", ft->in_memory_logical_rows);
}
static int64_t getRootNode(FT ft) {
diff --git a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
index 48ff28e89af..76b1d9c713e 100644
--- a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
+++ b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
@@ -110,7 +110,7 @@ test2 (void) {
static void
test3 (void)
-// Compare the simple version to the highly optimized verison.
+// Compare the simple version to the highly optimized version.
{
const int datalen = 1000;
char data[datalen];
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 672ae32f80a..7e9e6100c6e 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -382,17 +382,17 @@ void TOKUDB_SHARE::update_row_count(
pct_of_rows_changed_to_trigger = ((_rows * auto_threshold) / 100);
if (_row_delta_activity >= pct_of_rows_changed_to_trigger) {
char msg[200];
- snprintf(
- msg,
- sizeof(msg),
- "TokuDB: Auto %s background analysis for %s, delta_activity "
- "%llu is greater than %llu percent of %llu rows.",
- tokudb::sysvars::analyze_in_background(thd) > 0 ?
- "scheduling" : "running",
- full_table_name(),
- _row_delta_activity,
- auto_threshold,
- (ulonglong)(_rows));
+ snprintf(msg,
+ sizeof(msg),
+ "TokuDB: Auto %s analysis for %s, delta_activity %llu is "
+ "greater than %llu percent of %llu rows.",
+ tokudb::sysvars::analyze_in_background(thd) > 0
+ ? "scheduling background"
+ : "running foreground",
+ full_table_name(),
+ _row_delta_activity,
+ auto_threshold,
+ (ulonglong)(_rows));
// analyze_standard will unlock _mutex regardless of success/failure
int ret = analyze_standard(thd, NULL);
@@ -4097,7 +4097,7 @@ int ha_tokudb::write_row(uchar * record) {
goto cleanup;
}
if (curr_num_DBs == 1) {
- error = insert_row_to_main_dictionary(record,&prim_key, &row, txn);
+ error = insert_row_to_main_dictionary(record, &prim_key, &row, txn);
if (error) { goto cleanup; }
} else {
error = insert_rows_to_dictionaries_mult(&prim_key, &row, txn, thd);
@@ -6130,7 +6130,7 @@ int ha_tokudb::info(uint flag) {
// we should always have a primary key
assert_always(share->file != NULL);
- error = estimate_num_rows(share->file,&num_rows, txn);
+ error = estimate_num_rows(share->file, &num_rows, txn);
if (error == 0) {
share->set_row_count(num_rows, false);
stats.records = num_rows;
diff --git a/storage/tokudb/ha_tokudb_admin.cc b/storage/tokudb/ha_tokudb_admin.cc
index db3d6c112d4..6d8e7173c8d 100644
--- a/storage/tokudb/ha_tokudb_admin.cc
+++ b/storage/tokudb/ha_tokudb_admin.cc
@@ -7,7 +7,7 @@ This file is part of TokuDB
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
- TokuDBis is free software: you can redistribute it and/or modify
+ TokuDB is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
@@ -43,13 +43,11 @@ public:
virtual ~recount_rows_t();
virtual const char* key();
-
- virtual void status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status);
+ virtual const char* database();
+ virtual const char* table();
+ virtual const char* type();
+ virtual const char* parameters();
+ virtual const char* status();
protected:
virtual void on_run();
@@ -64,6 +62,8 @@ private:
ulonglong _throttle;
// for recount rows status reporting
+ char _parameters[256];
+ char _status[1024];
int _result;
ulonglong _recount_start; // in microseconds
ulonglong _total_elapsed_time; // in microseconds
@@ -78,7 +78,6 @@ private:
uint64_t deleted,
void* extra);
int analyze_recount_rows_progress(uint64_t count, uint64_t deleted);
- void get_analyze_status(char*);
};
void* recount_rows_t::operator new(size_t sz) {
@@ -114,10 +113,19 @@ recount_rows_t::recount_rows_t(
}
_throttle = tokudb::sysvars::analyze_throttle(thd);
+
+ snprintf(_parameters,
+ sizeof(_parameters),
+ "TOKUDB_ANALYZE_THROTTLE=%llu;",
+ _throttle);
+ _status[0] = '\0';
}
recount_rows_t::~recount_rows_t() {
}
void recount_rows_t::on_run() {
+ const char* orig_proc_info = NULL;
+ if (_thd)
+ orig_proc_info = tokudb_thd_get_proc_info(_thd);
_recount_start = tokudb::time::microsec();
_total_elapsed_time = 0;
@@ -171,6 +179,8 @@ void recount_rows_t::on_run() {
_result,
_share->row_count());
error:
+ if(_thd)
+ tokudb_thd_set_proc_info(_thd, orig_proc_info);
return;
}
void recount_rows_t::on_destroy() {
@@ -179,18 +189,21 @@ void recount_rows_t::on_destroy() {
const char* recount_rows_t::key() {
return _share->full_table_name();
}
-void recount_rows_t::status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status) {
-
- strcpy(database, _share->database_name());
- strcpy(table, _share->table_name());
- strcpy(type, "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS");
- sprintf(params, "TOKUDB_ANALYZE_THROTTLE=%llu;", _throttle);
- get_analyze_status(status);
+const char* recount_rows_t::database() {
+ return _share->database_name();
+}
+const char* recount_rows_t::table() {
+ return _share->table_name();
+}
+const char* recount_rows_t::type() {
+ static const char* type = "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS";
+ return type;
+}
+const char* recount_rows_t::parameters() {
+ return _parameters;
+}
+const char* recount_rows_t::status() {
+ return _status;
}
int recount_rows_t::analyze_recount_rows_progress(
uint64_t count,
@@ -217,12 +230,32 @@ int recount_rows_t::analyze_recount_rows_progress(
return ER_ABORTING_CONNECTION;
}
+ // rebuild status
+ // There is a slight race condition here,
+ // _status is used here for tokudb_thd_set_proc_info and it is also used
+ // for the status column in i_s.background_job_status.
+ // If someone happens to be querying/building the i_s table
+ // at the exact same time that the status is being rebuilt here,
+ // the i_s table could get some garbage status.
+ // This solution is a little heavy handed but it works, it prevents us
+ // from changing the status while someone might be immediately observing
+ // us and it prevents someone from observing us while we change the
+ // status
+ tokudb::background::_job_manager->lock();
+ snprintf(_status,
+ sizeof(_status),
+ "recount_rows %s.%s counted %llu rows and %llu deleted "
+ "in %llu seconds.",
+ _share->database_name(),
+ _share->table_name(),
+ _rows,
+ _deleted_rows,
+ _total_elapsed_time / tokudb::time::MICROSECONDS);
+ tokudb::background::_job_manager->unlock();
+
// report
- if (_thd) {
- char status[256];
- get_analyze_status(status);
- thd_proc_info(_thd, status);
- }
+ if (_thd)
+ tokudb_thd_set_proc_info(_thd, _status);
// throttle
// given the throttle value, lets calculate the maximum number of rows
@@ -238,18 +271,6 @@ int recount_rows_t::analyze_recount_rows_progress(
}
return 0;
}
-void recount_rows_t::get_analyze_status(char* msg) {
- sprintf(
- msg,
- "recount_rows %s.%s counted %llu rows and %llu deleted in %llu "
- "seconds.",
- _share->database_name(),
- _share->table_name(),
- _rows,
- _deleted_rows,
- _total_elapsed_time / tokudb::time::MICROSECONDS);
-}
-
class standard_t : public tokudb::background::job_manager_t::job_t {
public:
@@ -261,13 +282,11 @@ public:
virtual ~standard_t();
virtual const char* key(void);
-
- virtual void status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status);
+ virtual const char* database();
+ virtual const char* table();
+ virtual const char* type();
+ virtual const char* parameters();
+ virtual const char* status();
protected:
virtual void on_run();
@@ -284,6 +303,8 @@ private:
double _delete_fraction;
// for analyze status reporting, may also use other state
+ char _parameters[256];
+ char _status[1024];
int _result;
ulonglong _analyze_start; // in microseconds
ulonglong _total_elapsed_time; // in microseconds
@@ -305,7 +326,6 @@ private:
uint64_t deleted_rows);
bool analyze_standard_cursor_callback(uint64_t deleted_rows);
- void get_analyze_status(char*);
int analyze_key_progress();
int analyze_key(uint64_t* rec_per_key_part);
};
@@ -351,6 +371,16 @@ standard_t::standard_t(
_time_limit =
tokudb::sysvars::analyze_time(thd) * tokudb::time::MICROSECONDS;
_delete_fraction = tokudb::sysvars::analyze_delete_fraction(thd);
+
+ snprintf(_parameters,
+ sizeof(_parameters),
+ "TOKUDB_ANALYZE_DELETE_FRACTION=%f; "
+ "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;",
+ _delete_fraction,
+ _time_limit / tokudb::time::MICROSECONDS,
+ _throttle);
+
+ _status[0] = '\0';
}
standard_t::~standard_t() {
}
@@ -358,6 +388,10 @@ void standard_t::on_run() {
DB_BTREE_STAT64 stat64;
uint64_t rec_per_key_part[_share->_max_key_parts];
uint64_t total_key_parts = 0;
+ const char* orig_proc_info = NULL;
+ if (_thd)
+ orig_proc_info = tokudb_thd_get_proc_info(_thd);
+
_analyze_start = tokudb::time::microsec();
_half_time = _time_limit > 0 ? _time_limit/2 : 0;
@@ -395,7 +429,7 @@ void standard_t::on_run() {
_result = HA_ADMIN_FAILED;
}
if (_thd && (_result == HA_ADMIN_FAILED ||
- (double)_deleted_rows >
+ static_cast<double>(_deleted_rows) >
_delete_fraction * (_rows + _deleted_rows))) {
char name[256]; int namelen;
@@ -460,8 +494,9 @@ cleanup:
}
error:
+ if (_thd)
+ tokudb_thd_set_proc_info(_thd, orig_proc_info);
return;
-
}
void standard_t::on_destroy() {
_share->lock();
@@ -472,24 +507,21 @@ void standard_t::on_destroy() {
const char* standard_t::key() {
return _share->full_table_name();
}
-void standard_t::status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status) {
-
- strcpy(database, _share->database_name());
- strcpy(table, _share->table_name());
- strcpy(type, "TOKUDB_ANALYZE_MODE_STANDARD");
- sprintf(
- params,
- "TOKUDB_ANALYZE_DELETE_FRACTION=%f; "
- "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;",
- _delete_fraction,
- _time_limit / tokudb::time::MICROSECONDS,
- _throttle);
- get_analyze_status(status);
+const char* standard_t::database() {
+ return _share->database_name();
+}
+const char* standard_t::table() {
+ return _share->table_name();
+}
+const char* standard_t::type() {
+ static const char* type = "TOKUDB_ANALYZE_MODE_STANDARD";
+ return type;
+}
+const char* standard_t::parameters() {
+ return _parameters;
+}
+const char* standard_t::status() {
+ return _status;
}
bool standard_t::analyze_standard_cursor_callback(
void* extra,
@@ -502,41 +534,6 @@ bool standard_t::analyze_standard_cursor_callback(uint64_t deleted_rows) {
_ticks += deleted_rows;
return analyze_key_progress() != 0;
}
-void standard_t::get_analyze_status(char* msg) {
- static const char* scan_direction_str[] = {
- "not scanning",
- "scanning forward",
- "scanning backward",
- "scan unknown"
- };
-
- const char* scan_direction = NULL;
- switch (_scan_direction) {
- case 0: scan_direction = scan_direction_str[0]; break;
- case DB_NEXT: scan_direction = scan_direction_str[1]; break;
- case DB_PREV: scan_direction = scan_direction_str[2]; break;
- default: scan_direction = scan_direction_str[3]; break;
- }
-
- float progress_rows = 0.0;
- if (_share->row_count() > 0)
- progress_rows = (float) _rows / (float) _share->row_count();
- float progress_time = 0.0;
- if (_time_limit > 0)
- progress_time = (float) _key_elapsed_time / (float) _time_limit;
- sprintf(
- msg,
- "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% time, "
- "%s",
- _share->database_name(),
- _share->table_name(),
- _share->_key_descriptors[_current_key]._name,
- _current_key,
- _share->_keys,
- progress_rows * 100.0,
- progress_time * 100.0,
- scan_direction);
-}
int standard_t::analyze_key_progress(void) {
if (_ticks > 1000) {
_ticks = 0;
@@ -546,19 +543,72 @@ int standard_t::analyze_key_progress(void) {
if ((_thd && thd_killed(_thd)) || cancelled()) {
// client killed
return ER_ABORTING_CONNECTION;
- } else if(_time_limit > 0 &&
- (uint64_t)_key_elapsed_time > _time_limit) {
+ } else if (_time_limit > 0 &&
+ static_cast<uint64_t>(_key_elapsed_time) > _time_limit) {
// time limit reached
return ETIME;
}
- // report
- if (_thd) {
- char status[256];
- get_analyze_status(status);
- thd_proc_info(_thd, status);
+ // rebuild status
+ // There is a slight race condition here,
+ // _status is used here for tokudb_thd_set_proc_info and it is also used
+ // for the status column in i_s.background_job_status.
+ // If someone happens to be querying/building the i_s table
+ // at the exact same time that the status is being rebuilt here,
+ // the i_s table could get some garbage status.
+ // This solution is a little heavy handed but it works, it prevents us
+ // from changing the status while someone might be immediately observing
+ // us and it prevents someone from observing us while we change the
+ // status.
+ static const char* scan_direction_str[] = {"not scanning",
+ "scanning forward",
+ "scanning backward",
+ "scan unknown"};
+
+ const char* scan_direction = NULL;
+ switch (_scan_direction) {
+ case 0:
+ scan_direction = scan_direction_str[0];
+ break;
+ case DB_NEXT:
+ scan_direction = scan_direction_str[1];
+ break;
+ case DB_PREV:
+ scan_direction = scan_direction_str[2];
+ break;
+ default:
+ scan_direction = scan_direction_str[3];
+ break;
}
+ float progress_rows = 0.0;
+ if (_share->row_count() > 0)
+ progress_rows = static_cast<float>(_rows) /
+ static_cast<float>(_share->row_count());
+ float progress_time = 0.0;
+ if (_time_limit > 0)
+ progress_time = static_cast<float>(_key_elapsed_time) /
+ static_cast<float>(_time_limit);
+ tokudb::background::_job_manager->lock();
+ snprintf(
+ _status,
+ sizeof(_status),
+ "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% "
+ "time, %s",
+ _share->database_name(),
+ _share->table_name(),
+ _share->_key_descriptors[_current_key]._name,
+ _current_key,
+ _share->_keys,
+ progress_rows * 100.0,
+ progress_time * 100.0,
+ scan_direction);
+ tokudb::background::_job_manager->unlock();
+
+ // report
+ if (_thd)
+ tokudb_thd_set_proc_info(_thd, _status);
+
// throttle
// given the throttle value, lets calculate the maximum number of rows
// we should have seen so far in a .1 sec resolution
@@ -694,6 +744,11 @@ int standard_t::analyze_key(uint64_t* rec_per_key_part) {
assert_always(close_error == 0);
done:
+ // in case we timed out (bunch of deleted records) without hitting a
+ // single row
+ if (_rows == 0)
+ _rows = 1;
+
// return cardinality
for (uint64_t i = 0; i < num_key_parts; i++) {
rec_per_key_part[i] = _rows / unique_rows[i];
@@ -733,7 +788,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) {
assert_always(thd != NULL);
- const char *orig_proc_info = tokudb_thd_get_proc_info(thd);
int result = HA_ADMIN_OK;
tokudb::analyze::recount_rows_t* job
@@ -753,8 +807,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) {
result = HA_ADMIN_FAILED;
}
- thd_proc_info(thd, orig_proc_info);
-
TOKUDB_HANDLER_DBUG_RETURN(result);
}
@@ -778,8 +830,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) {
TOKUDB_HANDLER_DBUG_RETURN(result);
}
- const char *orig_proc_info = tokudb_thd_get_proc_info(thd);
-
tokudb::analyze::standard_t* job
= new tokudb::analyze::standard_t(txn == NULL ? false : true, thd,
this, txn);
@@ -808,8 +858,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) {
lock();
- thd_proc_info(thd, orig_proc_info);
-
TOKUDB_HANDLER_DBUG_RETURN(result);
}
diff --git a/storage/tokudb/hatoku_defines.h b/storage/tokudb/hatoku_defines.h
index b7726a746ad..1b33e0a53e4 100644
--- a/storage/tokudb/hatoku_defines.h
+++ b/storage/tokudb/hatoku_defines.h
@@ -7,7 +7,7 @@ This file is part of TokuDB
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
- TokuDBis is free software: you can redistribute it and/or modify
+ TokuDB is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
@@ -234,9 +234,12 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// mysql 5.6.15 removed the test macro, so we define our own
#define tokudb_test(e) ((e) ? 1 : 0)
-inline const char* tokudb_thd_get_proc_info(const THD *thd) {
+inline const char* tokudb_thd_get_proc_info(const THD* thd) {
return thd->proc_info;
}
+inline void tokudb_thd_set_proc_info(THD* thd, const char* proc_info) {
+ thd_proc_info(thd, proc_info);
+}
// uint3korr reads 4 bytes and valgrind reports an error, so we use this function instead
inline uint tokudb_uint3korr(const uchar *a) {
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result
deleted file mode 100644
index ccfffb53976..00000000000
--- a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result
+++ /dev/null
@@ -1,51 +0,0 @@
-include/master-slave.inc
-[connection master]
-CREATE TABLE t1 (a INT AUTO_INCREMENT KEY) ENGINE=TokuDB;
-CREATE TABLE t2 (b INT AUTO_INCREMENT KEY, c INT, FOREIGN KEY(b) REFERENCES t1(a)) ENGINE=TokuDB;
-SET FOREIGN_KEY_CHECKS=0;
-INSERT INTO t1 VALUES (10);
-INSERT INTO t1 VALUES (NULL),(NULL),(NULL);
-INSERT INTO t2 VALUES (5,0);
-INSERT INTO t2 VALUES (NULL,LAST_INSERT_ID());
-SET FOREIGN_KEY_CHECKS=1;
-SELECT * FROM t1 ORDER BY a;
-a
-10
-11
-12
-13
-SELECT * FROM t2 ORDER BY b;
-b c
-5 0
-6 11
-SELECT * FROM t1 ORDER BY a;
-a
-10
-11
-12
-13
-SELECT * FROM t2 ORDER BY b;
-b c
-5 0
-6 11
-SET TIMESTAMP=1000000000;
-CREATE TABLE t3 ( a INT UNIQUE );
-SET FOREIGN_KEY_CHECKS=0;
-INSERT INTO t3 VALUES (1),(1);
-ERROR 23000: Duplicate entry '1' for key 'a'
-SET FOREIGN_KEY_CHECKS=0;
-DROP TABLE IF EXISTS t1,t2,t3;
-SET FOREIGN_KEY_CHECKS=1;
-create table t1 (b int primary key) engine = TokuDB;
-create table t2 (a int primary key, b int, foreign key (b) references t1(b))
-engine = TokuDB;
-insert into t1 set b=1;
-insert into t2 set a=1, b=1;
-set foreign_key_checks=0;
-delete from t1;
-must sync w/o a problem (could not with the buggy code)
-select count(*) from t1 /* must be zero */;
-count(*)
-0
-drop table t2,t1;
-include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test b/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test
deleted file mode 100644
index 120ad0d5c1e..00000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test
+++ /dev/null
@@ -1,3 +0,0 @@
--- source include/have_tokudb.inc
-let $engine_type=TokuDB;
--- source extra/rpl_tests/rpl_foreign_key.test
diff --git a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
index 5769ee74071..8b53f89efa3 100644
--- a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
+++ b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
@@ -25,7 +25,7 @@ TokuDB_background_job_status CREATE TEMPORARY TABLE `TokuDB_background_job_statu
`scheduler` varchar(32) NOT NULL DEFAULT '',
`scheduled_time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`started_time` datetime DEFAULT NULL,
- `status` varchar(256) DEFAULT NULL
+ `status` varchar(1024) DEFAULT NULL
) ENGINE=MEMORY DEFAULT CHARSET=utf8
create table t1 (a int not null auto_increment, b int, c int, primary key(a), key kb(b), key kc(c), key kabc(a,b,c), key kab(a,b), key kbc(b,c));
insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3);
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
index 6100d9aeec2..8b6df4966f4 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
@@ -12,33 +12,11 @@ let $MYSQLD_DATADIR= `SELECT @@datadir`;
create table foo (a int, b int);
create table bar (a int, key(a));
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
remove_file $MYSQLD_DATADIR/test/foo.frm;
copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
show create table foo;
show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
index e1acea13ed7..53c1037b051 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
@@ -15,33 +15,11 @@ create table bar (a int);
alter table foo drop column a;
alter table bar add column b int, add column c int;
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
remove_file $MYSQLD_DATADIR/test/foo.frm;
copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
show create table foo;
show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
index 17a124249da..0421b8e9d26 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
@@ -14,33 +14,11 @@ create table bar (a bigint)engine=TokuDB;
alter table foo drop index b;
alter table bar add index (a);
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
remove_file $MYSQLD_DATADIR/test/foo.frm;
copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
show create table foo;
show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
index 42dbb30058a..4c40339be5a 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
@@ -7,17 +7,7 @@ set default_storage_engine='tokudb';
# capture the datadir
let $MYSQLD_DATADIR= `SELECT @@datadir`;
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
# remove all tokudb file in the datadir
system mkdir $MYSQLD_DATADIR/save;
system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save;
@@ -25,13 +15,7 @@ system mkdir $MYSQLD_DATADIR/test;
# install 6.6.8 tokudb test files
system cp -r std_data/tokudb_drop_part_table_668/data/* $MYSQLD_DATADIR;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
create table tc (a int, b int, c int, primary key(a), key(b)) engine=tokudb partition by hash(a) partitions 2;
@@ -45,26 +29,9 @@ select dictionary_name from information_schema.tokudb_file_map;
# check that the test dir is empty
list_files $MYSQLD_DATADIR/test *.frm;
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
# restore saved datadir
system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test;
system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR;
system rmdir $MYSQLD_DATADIR/save;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
-
+--source include/start_mysqld.inc
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
index 3903c2cef9f..0340b960fa5 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
@@ -6,17 +6,7 @@ set default_storage_engine='tokudb';
# capture the datadir
let $MYSQLD_DATADIR= `SELECT @@datadir`;
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
# remove all tokudb file in the datadir
system mkdir $MYSQLD_DATADIR/save;
system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save;
@@ -24,13 +14,7 @@ system mkdir $MYSQLD_DATADIR/test;
# install 6.6.8 tokudb test files
system cp -r std_data/tokudb_drop_simple_table_668/data/* $MYSQLD_DATADIR;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
create table tc (id int, x int, primary key(id), key(x));
@@ -46,26 +30,9 @@ select dictionary_name from information_schema.tokudb_file_map;
# check that the test dir is empty
list_files $MYSQLD_DATADIR/test *.frm;
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
# restore saved datadir
system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test;
system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR;
system rmdir $MYSQLD_DATADIR/save;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
-
+--source include/start_mysqld.inc
diff --git a/storage/tokudb/tokudb_background.cc b/storage/tokudb/tokudb_background.cc
index d8ef54a5972..e019e41c788 100644
--- a/storage/tokudb/tokudb_background.cc
+++ b/storage/tokudb/tokudb_background.cc
@@ -8,7 +8,7 @@ This file is part of TokuDB
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
- TokuDBis is free software: you can redistribute it and/or modify
+ TokuDB is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
@@ -68,7 +68,8 @@ void job_manager_t::destroy() {
while (_background_jobs.size()) {
_mutex.lock();
job_t* job = _background_jobs.front();
- cancel(job);
+ if (!job->cancelled())
+ cancel(job);
_background_jobs.pop_front();
delete job;
_mutex.unlock();
@@ -148,11 +149,8 @@ bool job_manager_t::cancel_job(const char* key) {
it != _background_jobs.end(); it++) {
job_t* job = *it;
- if (!job->cancelled() &&
- strcmp(job->key(), key) == 0) {
-
+ if (!job->cancelled() && strcmp(job->key(), key) == 0) {
cancel(job);
-
ret = true;
}
}
@@ -162,8 +160,6 @@ bool job_manager_t::cancel_job(const char* key) {
}
void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const {
- char database[256], table[256], type[256], params[256], status[256];
-
_mutex.lock();
for (jobs_t::const_iterator it = _background_jobs.begin();
@@ -171,19 +167,7 @@ void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const {
it++) {
job_t* job = *it;
if (!job->cancelled()) {
- database[0] = table[0] = type[0] = params[0] = status[0] = '\0';
- job->status(database, table, type, params, status);
- callback(
- job->id(),
- database,
- table,
- type,
- params,
- status,
- job->user_scheduled(),
- job->scheduled_time(),
- job->started_time(),
- extra);
+ callback(job, extra);
}
}
@@ -233,6 +217,7 @@ void job_manager_t::run(job_t* job) {
}
void job_manager_t::cancel(job_t* job) {
assert_debug(_mutex.is_owned_by_me());
+ assert_always(!job->cancelled());
job->cancel();
}
job_manager_t* _job_manager = NULL;
diff --git a/storage/tokudb/tokudb_background.h b/storage/tokudb/tokudb_background.h
index 3786701fd0f..29991ab325d 100644
--- a/storage/tokudb/tokudb_background.h
+++ b/storage/tokudb/tokudb_background.h
@@ -7,7 +7,7 @@ This file is part of TokuDB
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
- TokuDBis is free software: you can redistribute it and/or modify
+ TokuDB is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
@@ -58,13 +58,20 @@ public:
// (or jobs) usually used to find jobs to cancel
virtual const char* key() = 0;
- // method to get info for information schema, 255 chars per buffer
- virtual void status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status) = 0;
+ // method to obtain the database name the job is scheduled on
+ virtual const char* database() = 0;
+
+ // method to obtain the table name the job is scheduled on
+ virtual const char* table() = 0;
+
+ // method to obtain the type of job
+ virtual const char* type() = 0;
+
+ // method to obtain a stringized list of job parameters
+ virtual const char* parameters() = 0;
+
+ // method to obtain a sting identifying the current status of the job
+ virtual const char* status() = 0;
inline bool running() const;
@@ -99,17 +106,7 @@ public:
};
// pfn for iterate callback
- typedef void (*pfn_iterate_t)(
- uint64_t,
- const char*,
- const char*,
- const char*,
- const char*,
- const char*,
- bool,
- time_t,
- time_t,
- void*);
+ typedef void (*pfn_iterate_t)(class job_t*, void*);
public:
void* operator new(size_t sz);
@@ -144,6 +141,11 @@ public:
// data passed when the job was scheduled
void iterate_jobs(pfn_iterate_t callback, void* extra) const;
+ // lock the bjm, this prevents anyone from running, cancelling or iterating
+ // jobs in the bjm.
+ inline void lock();
+ inline void unlock();
+
private:
static void* thread_func(void* v);
@@ -170,6 +172,15 @@ extern job_manager_t* _job_manager;
bool initialize();
bool destroy();
+inline void job_manager_t::lock() {
+ assert_debug(!_mutex.is_owned_by_me());
+ _mutex.lock();
+}
+inline void job_manager_t::unlock() {
+ assert_debug(_mutex.is_owned_by_me());
+ _mutex.unlock();
+}
+
inline void job_manager_t::job_t::run() {
if (!_cancelled) {
_running = true;
diff --git a/storage/tokudb/tokudb_information_schema.cc b/storage/tokudb/tokudb_information_schema.cc
index e69a7899b45..b3d77eef2d9 100644
--- a/storage/tokudb/tokudb_information_schema.cc
+++ b/storage/tokudb/tokudb_information_schema.cc
@@ -1085,7 +1085,7 @@ ST_FIELD_INFO background_job_status_field_info[] = {
{"scheduler", 32, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
{"scheduled_time", 0, MYSQL_TYPE_DATETIME, 0, 0, NULL, SKIP_OPEN_TABLE },
{"started_time", 0, MYSQL_TYPE_DATETIME, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE },
- {"status", 256, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE },
+ {"status", 1024, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE },
{NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
};
@@ -1095,15 +1095,7 @@ struct background_job_status_extra {
};
void background_job_status_callback(
- uint64_t id,
- const char* database_name,
- const char* table_name,
- const char* type,
- const char* params,
- const char* status,
- bool user_scheduled,
- time_t scheduled_time,
- time_t started_time,
+ tokudb::background::job_manager_t::job_t* job,
void* extra) {
background_job_status_extra* e =
@@ -1111,24 +1103,33 @@ void background_job_status_callback(
THD* thd = e->thd;
TABLE* table = e->table;
+ const char* tmp = NULL;
- table->field[0]->store(id, false);
- table->field[1]->store(
- database_name,
- strlen(database_name),
- system_charset_info);
- table->field[2]->store(table_name, strlen(table_name), system_charset_info);
- table->field[3]->store(type, strlen(type), system_charset_info);
- table->field[4]->store(params, strlen(params), system_charset_info);
- if (user_scheduled)
+ table->field[0]->store(job->id(), false);
+
+ tmp = job->database();
+ table->field[1]->store(tmp, strlen(tmp), system_charset_info);
+
+ tmp = job->table();
+ table->field[2]->store(tmp, strlen(tmp), system_charset_info);
+
+ tmp = job->type();
+ table->field[3]->store(tmp, strlen(tmp), system_charset_info);
+
+ tmp = job->parameters();
+ table->field[4]->store(tmp, strlen(tmp), system_charset_info);
+
+ if (job->user_scheduled())
table->field[5]->store("USER", strlen("USER"), system_charset_info);
else
table->field[5]->store("AUTO", strlen("AUTO"), system_charset_info);
- field_store_time_t(table->field[6], scheduled_time);
- field_store_time_t(table->field[7], started_time);
- if (status[0] != '\0') {
- table->field[8]->store(status, strlen(status), system_charset_info);
+ field_store_time_t(table->field[6], job->scheduled_time());
+ field_store_time_t(table->field[7], job->started_time());
+
+ tmp = job->status();
+ if (tmp && tmp[0] != '\0') {
+ table->field[8]->store(tmp, strlen(tmp), system_charset_info);
table->field[8]->set_notnull();
} else {
table->field[8]->store(NULL, 0, system_charset_info);
diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc
index c2a70cce7aa..bce81f95ead 100644
--- a/storage/xtradb/btr/btr0btr.cc
+++ b/storage/xtradb/btr/btr0btr.cc
@@ -80,7 +80,7 @@ btr_corruption_report(
buf_block_get_zip_size(block),
BUF_PAGE_PRINT_NO_CRASH);
}
- buf_page_print(buf_block_get_frame_fast(block), 0, 0);
+ buf_page_print(buf_nonnull_block_get_frame(block), 0, 0);
}
#ifndef UNIV_HOTBACKUP
@@ -827,11 +827,12 @@ btr_height_get(
/* S latches the page */
root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+ ut_ad(root_block); // The index must not be corrupted
if (root_block) {
- height = btr_page_get_level(buf_block_get_frame_fast(root_block), mtr);
-
+ height = btr_page_get_level(buf_nonnull_block_get_frame(root_block),
+ mtr);
/* Release the S latch on the root page. */
mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
#ifdef UNIV_SYNC_DEBUG
@@ -2912,7 +2913,7 @@ btr_attach_half_pages(
}
/* Get the level of the split pages */
- level = btr_page_get_level(buf_block_get_frame_fast(block), mtr);
+ level = btr_page_get_level(buf_nonnull_block_get_frame(block), mtr);
ut_ad(level
== btr_page_get_level(buf_block_get_frame(new_block), mtr));
@@ -4289,8 +4290,10 @@ btr_discard_page(
/* Decide the page which will inherit the locks */
- left_page_no = btr_page_get_prev(buf_block_get_frame_fast(block), mtr);
- right_page_no = btr_page_get_next(buf_block_get_frame_fast(block), mtr);
+ left_page_no = btr_page_get_prev(buf_nonnull_block_get_frame(block),
+ mtr);
+ right_page_no = btr_page_get_next(buf_nonnull_block_get_frame(block),
+ mtr);
if (left_page_no != FIL_NULL) {
merge_block = btr_block_get(space, zip_size, left_page_no,
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index a5ce3f3f983..873edec62b4 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -305,6 +305,8 @@ buf_flush_init_flush_rbt(void)
buf_flush_list_mutex_enter(buf_pool);
+ ut_ad(buf_pool->flush_rbt == NULL);
+
/* Create red black tree for speedy insertions in flush list. */
buf_pool->flush_rbt = rbt_create(
sizeof(buf_page_t*), buf_flush_block_cmp);
diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc
index f21fd560235..c13d4583fef 100644
--- a/storage/xtradb/dict/dict0stats.cc
+++ b/storage/xtradb/dict/dict0stats.cc
@@ -736,7 +736,7 @@ dict_stats_copy(
if (dst_idx->type & DICT_FTS) {
continue;
}
- dict_stats_empty_index(dst_idx);
+ dict_stats_empty_index(dst_idx, true);
} else {
continue;
}
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 81f26b27662..93df92e6e63 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -1787,6 +1787,9 @@ fil_close_all_files(void)
{
fil_space_t* space;
+ // Must check both flags as it's possible for this to be called during
+ // server startup with srv_track_changed_pages == true but
+ // srv_redo_log_thread_started == false
if (srv_track_changed_pages && srv_redo_log_thread_started)
os_event_wait(srv_redo_log_tracked_event);
@@ -1826,6 +1829,9 @@ fil_close_log_files(
{
fil_space_t* space;
+ // Must check both flags as it's possible for this to be called during
+ // server startup with srv_track_changed_pages == true but
+ // srv_redo_log_thread_started == false
if (srv_track_changed_pages && srv_redo_log_thread_started)
os_event_wait(srv_redo_log_tracked_event);
diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc
index 5e008b37b8d..0507be04412 100644
--- a/storage/xtradb/fts/fts0fts.cc
+++ b/storage/xtradb/fts/fts0fts.cc
@@ -265,13 +265,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] sync sync state
@param[in] unlock_cache whether unlock cache lock when write node
@param[in] wait whether wait when a sync is in progress
+@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS if all OK */
static
dberr_t
fts_sync(
fts_sync_t* sync,
bool unlock_cache,
- bool wait);
+ bool wait,
+ bool has_dict);
/****************************************************************//**
Release all resources help by the words rb tree e.g., the node ilist. */
@@ -3567,7 +3569,7 @@ fts_add_doc_by_id(
DBUG_EXECUTE_IF(
"fts_instrument_sync_debug",
- fts_sync(cache->sync, true, true);
+ fts_sync(cache->sync, true, true, false);
);
DEBUG_SYNC_C("fts_instrument_sync_request");
@@ -4379,13 +4381,11 @@ fts_sync_index(
}
/** Check if index cache has been synced completely
-@param[in,out] sync sync state
@param[in,out] index_cache index cache
@return true if index is synced, otherwise false. */
static
bool
fts_sync_index_check(
- fts_sync_t* sync,
fts_index_cache_t* index_cache)
{
const ib_rbt_node_t* rbt_node;
@@ -4408,14 +4408,36 @@ fts_sync_index_check(
return(true);
}
-/*********************************************************************//**
-Commit the SYNC, change state of processed doc ids etc.
+/** Reset synced flag in index cache when rollback
+@param[in,out] index_cache index cache */
+static
+void
+fts_sync_index_reset(
+ fts_index_cache_t* index_cache)
+{
+ const ib_rbt_node_t* rbt_node;
+
+ for (rbt_node = rbt_first(index_cache->words);
+ rbt_node != NULL;
+ rbt_node = rbt_next(index_cache->words, rbt_node)) {
+
+ fts_tokenizer_word_t* word;
+ word = rbt_value(fts_tokenizer_word_t, rbt_node);
+
+ fts_node_t* fts_node;
+ fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
+
+ fts_node->synced = false;
+ }
+}
+
+/** Commit the SYNC, change state of processed doc ids etc.
+@param[in,out] sync sync state
@return DB_SUCCESS if all OK */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
fts_sync_commit(
-/*============*/
- fts_sync_t* sync) /*!< in: sync state */
+ fts_sync_t* sync)
{
dberr_t error;
trx_t* trx = sync->trx;
@@ -4468,6 +4490,8 @@ fts_sync_commit(
(double) n_nodes/ (double) elapsed_time);
}
+ /* Avoid assertion in trx_free(). */
+ trx->dict_operation_lock_mode = 0;
trx_free_for_background(trx);
return(error);
@@ -4490,6 +4514,10 @@ fts_sync_rollback(
index_cache = static_cast<fts_index_cache_t*>(
ib_vector_get(cache->indexes, i));
+ /* Reset synced flag so nodes will not be skipped
+ in the next sync, see fts_sync_write_words(). */
+ fts_sync_index_reset(index_cache);
+
for (j = 0; fts_index_selector[j].value; ++j) {
if (index_cache->ins_graph[j] != NULL) {
@@ -4515,6 +4543,9 @@ fts_sync_rollback(
rw_lock_x_unlock(&cache->lock);
fts_sql_rollback(trx);
+
+ /* Avoid assertion in trx_free(). */
+ trx->dict_operation_lock_mode = 0;
trx_free_for_background(trx);
}
@@ -4523,13 +4554,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] sync sync state
@param[in] unlock_cache whether unlock cache lock when write node
@param[in] wait whether wait when a sync is in progress
+@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS if all OK */
static
dberr_t
fts_sync(
fts_sync_t* sync,
bool unlock_cache,
- bool wait)
+ bool wait,
+ bool has_dict)
{
ulint i;
dberr_t error = DB_SUCCESS;
@@ -4558,6 +4591,12 @@ fts_sync(
DEBUG_SYNC_C("fts_sync_begin");
fts_sync_begin(sync);
+ /* When sync in background, we hold dict operation lock
+ to prevent DDL like DROP INDEX, etc. */
+ if (has_dict) {
+ sync->trx->dict_operation_lock_mode = RW_S_LATCH;
+ }
+
begin_sync:
if (cache->total_size > fts_max_cache_size) {
/* Avoid the case: sync never finish when
@@ -4598,7 +4637,7 @@ begin_sync:
ib_vector_get(cache->indexes, i));
if (index_cache->index->to_be_dropped
- || fts_sync_index_check(sync, index_cache)) {
+ || fts_sync_index_check(index_cache)) {
continue;
}
@@ -4613,6 +4652,7 @@ end_sync:
}
rw_lock_x_lock(&cache->lock);
+ sync->interrupted = false;
sync->in_progress = false;
os_event_set(sync->event);
rw_lock_x_unlock(&cache->lock);
@@ -4636,20 +4676,23 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] table fts table
@param[in] unlock_cache whether unlock cache when write node
@param[in] wait whether wait for existing sync to finish
+@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS on success, error code on failure. */
UNIV_INTERN
dberr_t
fts_sync_table(
dict_table_t* table,
bool unlock_cache,
- bool wait)
+ bool wait,
+ bool has_dict)
{
dberr_t err = DB_SUCCESS;
ut_ad(table->fts);
if (!dict_table_is_discarded(table) && table->fts->cache) {
- err = fts_sync(table->fts->cache->sync, unlock_cache, wait);
+ err = fts_sync(table->fts->cache->sync,
+ unlock_cache, wait, has_dict);
}
return(err);
diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc
index d9f2532578e..ea937c20752 100644
--- a/storage/xtradb/fts/fts0opt.cc
+++ b/storage/xtradb/fts/fts0opt.cc
@@ -2986,7 +2986,7 @@ fts_optimize_sync_table(
if (table) {
if (dict_table_has_fts_index(table) && table->fts->cache) {
- fts_sync_table(table, true, false);
+ fts_sync_table(table, true, false, true);
}
dict_table_close(table, FALSE, FALSE);
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 14870659b0e..320b900d019 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -864,6 +864,19 @@ innobase_is_fake_change(
THD* thd) __attribute__((unused)); /*!< in: MySQL thread handle of the user for
whom the transaction is being committed */
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd The thread handle
+@param path Path to the table
+@param f_key_list[out] The list of foreign keys
+
+@return error code or zero for success */
+static
+int
+innobase_get_parent_fk_list(
+ THD* thd,
+ const char* path,
+ List<FOREIGN_KEY_INFO>* f_key_list);
/******************************************************************//**
Maps a MySQL trx isolation level code to the InnoDB isolation level code
@@ -8398,6 +8411,7 @@ dberr_t
ha_innobase::innobase_lock_autoinc(void)
/*====================================*/
{
+ DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
dberr_t error = DB_SUCCESS;
ut_ad(!srv_read_only_mode);
@@ -8437,6 +8451,8 @@ ha_innobase::innobase_lock_autoinc(void)
/* Fall through to old style locking. */
case AUTOINC_OLD_STYLE_LOCKING:
+ DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
+ ut_ad(0););
error = row_lock_table_autoinc_for_mysql(prebuilt);
if (error == DB_SUCCESS) {
@@ -8450,7 +8466,7 @@ ha_innobase::innobase_lock_autoinc(void)
ut_error;
}
- return(error);
+ DBUG_RETURN(error);
}
/********************************************************************//**
@@ -14469,7 +14485,7 @@ ha_innobase::optimize(
if (innodb_optimize_fulltext_only) {
if (prebuilt->table->fts && prebuilt->table->fts->cache
&& !dict_table_is_discarded(prebuilt->table)) {
- fts_sync_table(prebuilt->table, false, true);
+ fts_sync_table(prebuilt->table, false, true, false);
fts_optimize_table(prebuilt->table);
}
return(HA_ADMIN_OK);
@@ -14686,7 +14702,14 @@ ha_innobase::check(
prebuilt->select_lock_type = LOCK_NONE;
- if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
+ bool check_result
+ = row_check_index_for_mysql(prebuilt, index, &n_rows);
+ DBUG_EXECUTE_IF(
+ "dict_set_index_corrupted",
+ if (!(index->type & DICT_CLUSTERED)) {
+ check_result = false;
+ });
+ if (!check_result) {
innobase_format_name(
index_name, sizeof index_name,
index->name, TRUE);
@@ -15013,6 +15036,75 @@ get_foreign_key_info(
return(pf_key_info);
}
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd The thread handle
+@param path Path to the table
+@param f_key_list[out] The list of foreign keys */
+static
+void
+fill_foreign_key_list(THD* thd,
+ const dict_table_t* table,
+ List<FOREIGN_KEY_INFO>* f_key_list)
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ for (dict_foreign_set::iterator it = table->referenced_set.begin();
+ it != table->referenced_set.end(); ++it) {
+
+ dict_foreign_t* foreign = *it;
+
+ FOREIGN_KEY_INFO* pf_key_info
+ = get_foreign_key_info(thd, foreign);
+ if (pf_key_info) {
+ f_key_list->push_back(pf_key_info);
+ }
+ }
+}
+
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd The thread handle
+@param path Path to the table
+@param f_key_list[out] The list of foreign keys
+
+@return error code or zero for success */
+static
+int
+innobase_get_parent_fk_list(
+ THD* thd,
+ const char* path,
+ List<FOREIGN_KEY_INFO>* f_key_list)
+{
+ ut_a(strlen(path) <= FN_REFLEN);
+ char norm_name[FN_REFLEN + 1];
+ normalize_table_name(norm_name, path);
+
+ trx_t* parent_trx = check_trx_exists(thd);
+ parent_trx->op_info = "getting list of referencing foreign keys";
+ trx_search_latch_release_if_reserved(parent_trx);
+
+ mutex_enter(&dict_sys->mutex);
+
+ dict_table_t* table
+ = dict_table_open_on_name(norm_name, TRUE, FALSE,
+ static_cast<dict_err_ignore_t>(
+ DICT_ERR_IGNORE_INDEX_ROOT
+ | DICT_ERR_IGNORE_CORRUPT));
+ if (!table) {
+ mutex_exit(&dict_sys->mutex);
+ return(HA_ERR_NO_SUCH_TABLE);
+ }
+
+ fill_foreign_key_list(thd, table, f_key_list);
+
+ dict_table_close(table, TRUE, FALSE);
+
+ mutex_exit(&dict_sys->mutex);
+ parent_trx->op_info = "";
+ return(0);
+}
+
/*******************************************************************//**
Gets the list of foreign keys in this table.
@return always 0, that is, always succeeds */
@@ -15065,9 +15157,6 @@ ha_innobase::get_parent_foreign_key_list(
THD* thd, /*!< in: user thread handle */
List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
{
- FOREIGN_KEY_INFO* pf_key_info;
- dict_foreign_t* foreign;
-
ut_a(prebuilt != NULL);
update_thd(ha_thd());
@@ -15076,20 +15165,7 @@ ha_innobase::get_parent_foreign_key_list(
trx_search_latch_release_if_reserved(prebuilt->trx);
mutex_enter(&(dict_sys->mutex));
-
- for (dict_foreign_set::iterator it
- = prebuilt->table->referenced_set.begin();
- it != prebuilt->table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- pf_key_info = get_foreign_key_info(thd, foreign);
- if (pf_key_info) {
- f_key_list->push_back(pf_key_info);
- }
- }
-
+ fill_foreign_key_list(thd, prebuilt->table, f_key_list);
mutex_exit(&(dict_sys->mutex));
prebuilt->trx->op_info = "";
@@ -18892,7 +18968,6 @@ innodb_track_changed_pages_validate(
for update function */
struct st_mysql_value* value) /*!< in: incoming bool */
{
- static bool enabled_on_startup = false;
long long intbuf = 0;
if (value->val_int(value, &intbuf)) {
@@ -18900,8 +18975,7 @@ innodb_track_changed_pages_validate(
return 1;
}
- if (srv_track_changed_pages || enabled_on_startup) {
- enabled_on_startup = true;
+ if (srv_redo_log_thread_started) {
*reinterpret_cast<ulong*>(save)
= static_cast<ulong>(intbuf);
return 0;
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index d0e26f1352c..d96ff377b4a 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2016, Oracle and/or its affiliates.
Copyrigth (c) 2014, 2016, MariaDB Corporation
This program is free software; you can redistribute it and/or modify it under
@@ -2935,15 +2935,26 @@ i_s_fts_deleted_generic_fill(
DBUG_RETURN(0);
}
- deleted = fts_doc_ids_create();
+ /* Prevent DDL to drop fts aux tables. */
+ rw_lock_s_lock(&dict_operation_lock);
user_table = dict_table_open_on_name(
fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
+ rw_lock_s_unlock(&dict_operation_lock);
+
+ DBUG_RETURN(0);
+ } else if (!dict_table_has_fts_index(user_table)) {
+ dict_table_close(user_table, FALSE, FALSE);
+
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
+ deleted = fts_doc_ids_create();
+
trx = trx_allocate_for_background();
trx->op_info = "Select for FTS DELETE TABLE";
@@ -2971,6 +2982,8 @@ i_s_fts_deleted_generic_fill(
dict_table_close(user_table, FALSE, FALSE);
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
@@ -3342,6 +3355,12 @@ i_s_fts_index_cache_fill(
DBUG_RETURN(0);
}
+ if (user_table->fts == NULL || user_table->fts->cache == NULL) {
+ dict_table_close(user_table, FALSE, FALSE);
+
+ DBUG_RETURN(0);
+ }
+
cache = user_table->fts->cache;
ut_a(cache);
@@ -3775,10 +3794,15 @@ i_s_fts_index_table_fill(
DBUG_RETURN(0);
}
+ /* Prevent DDL to drop fts aux tables. */
+ rw_lock_s_lock(&dict_operation_lock);
+
user_table = dict_table_open_on_name(
fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
@@ -3791,6 +3815,8 @@ i_s_fts_index_table_fill(
dict_table_close(user_table, FALSE, FALSE);
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
@@ -3925,14 +3951,21 @@ i_s_fts_config_fill(
fields = table->field;
+ /* Prevent DDL to drop fts aux tables. */
+ rw_lock_s_lock(&dict_operation_lock);
+
user_table = dict_table_open_on_name(
fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
} else if (!dict_table_has_fts_index(user_table)) {
dict_table_close(user_table, FALSE, FALSE);
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
@@ -3988,6 +4021,8 @@ i_s_fts_config_fill(
dict_table_close(user_table, FALSE, FALSE);
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index f599997be02..6924481af49 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -1084,10 +1084,20 @@ buf_block_get_frame(
/*================*/
const buf_block_t* block) /*!< in: pointer to the control block */
MY_ATTRIBUTE((pure));
-# define buf_block_get_frame_fast(block) buf_block_get_frame(block)
+
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block, where block is known not to be
+NULL.
+@return pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_nonnull_block_get_frame(
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ MY_ATTRIBUTE((pure));
+
#else /* UNIV_DEBUG */
# define buf_block_get_frame(block) (block ? (block)->frame : 0)
-# define buf_block_get_frame_fast(block) (block)->frame
+# define buf_nonnull_block_get_frame(block) ((block)->frame)
#endif /* UNIV_DEBUG */
/*********************************************************************//**
Gets the space id of a block.
diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic
index 7b1c66f2a05..20721b28ef2 100644
--- a/storage/xtradb/include/buf0buf.ic
+++ b/storage/xtradb/include/buf0buf.ic
@@ -744,6 +744,19 @@ buf_block_get_frame(
SRV_CORRUPT_TABLE_CHECK(block, return(0););
+ return(buf_nonnull_block_get_frame(block));
+}
+
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block, where block is known not to be
+NULL.
+@return pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_nonnull_block_get_frame(
+/*========================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+{
switch (buf_block_get_state(block)) {
case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
@@ -768,6 +781,7 @@ buf_block_get_frame(
ok:
return((buf_frame_t*) block->frame);
}
+
#endif /* UNIV_DEBUG */
/*********************************************************************//**
diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h
index 68d4d333245..87b5787d416 100644
--- a/storage/xtradb/include/fts0fts.h
+++ b/storage/xtradb/include/fts0fts.h
@@ -840,13 +840,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] table fts table
@param[in] unlock_cache whether unlock cache when write node
@param[in] wait whether wait for existing sync to finish
+@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS on success, error code on failure. */
UNIV_INTERN
dberr_t
fts_sync_table(
dict_table_t* table,
bool unlock_cache,
- bool wait);
+ bool wait,
+ bool has_dict);
/****************************************************************//**
Free the query graph but check whether dict_sys->mutex is already
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index d95adf00814..f60cfde1264 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -225,8 +225,10 @@ extern os_event_t srv_checkpoint_completed_event;
log tracking iteration */
extern os_event_t srv_redo_log_tracked_event;
-/** srv_redo_log_follow_thread spawn flag */
-extern bool srv_redo_log_thread_started;
+/** Whether the redo log tracker thread has been started. Does not take into
+account whether the tracking is currently enabled (see srv_track_changed_pages
+for that) */
+extern bool srv_redo_log_thread_started;
/* If the last data file is auto-extended, we add this many pages to it
at a time */
@@ -344,6 +346,10 @@ extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
extern ulint* srv_data_file_is_raw_partition;
+
+/** Whether the redo log tracking is currently enabled. Note that it is
+possible for the log tracker thread to be running and the tracking to be
+disabled */
extern my_bool srv_track_changed_pages;
extern ulonglong srv_max_bitmap_file_size;
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 5320776c042..a42b8b8bc25 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -45,10 +45,10 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 5
#define INNODB_VERSION_MINOR 6
-#define INNODB_VERSION_BUGFIX 31
+#define INNODB_VERSION_BUGFIX 32
#ifndef PERCONA_INNODB_VERSION
-#define PERCONA_INNODB_VERSION 77.0
+#define PERCONA_INNODB_VERSION 78.1
#endif
/* Enable UNIV_LOG_ARCHIVE in XtraDB */
diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc
index 0b5d27b8fd1..411fed91ac5 100644
--- a/storage/xtradb/log/log0log.cc
+++ b/storage/xtradb/log/log0log.cc
@@ -3752,7 +3752,7 @@ loop:
/* Wake the log tracking thread which will then immediatelly
quit because of srv_shutdown_state value */
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
os_event_reset(srv_redo_log_tracked_event);
os_event_set(srv_checkpoint_completed_event);
}
@@ -3831,7 +3831,7 @@ loop:
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
/* Signal the log following thread to quit */
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
os_event_reset(srv_redo_log_tracked_event);
os_event_set(srv_checkpoint_completed_event);
}
diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc
index 63f1ef39568..167d46e2ae8 100644
--- a/storage/xtradb/log/log0online.cc
+++ b/storage/xtradb/log/log0online.cc
@@ -1788,20 +1788,20 @@ log_online_purge_changed_page_bitmaps(
lsn = LSN_MAX;
}
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
/* User requests might happen with both enabled and disabled
tracking */
mutex_enter(&log_bmp_sys->mutex);
}
if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, LSN_MAX)) {
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
mutex_exit(&log_bmp_sys->mutex);
}
return TRUE;
}
- if (srv_track_changed_pages && lsn > log_bmp_sys->end_lsn) {
+ if (srv_redo_log_thread_started && lsn > log_bmp_sys->end_lsn) {
/* If we have to delete the current output file, close it
first. */
os_file_close(log_bmp_sys->out.file);
@@ -1834,7 +1834,7 @@ log_online_purge_changed_page_bitmaps(
}
}
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
if (lsn > log_bmp_sys->end_lsn) {
lsn_t new_file_lsn;
if (lsn == LSN_MAX) {
@@ -1845,9 +1845,7 @@ log_online_purge_changed_page_bitmaps(
new_file_lsn = log_bmp_sys->end_lsn;
}
if (!log_online_rotate_bitmap_file(new_file_lsn)) {
- /* If file create failed, signal the log
- tracking thread to quit next time it wakes
- up. */
+ /* If file create failed, stop log tracking */
srv_track_changed_pages = FALSE;
}
}
diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc
index 759687e3fe5..092c2ed88dc 100644
--- a/storage/xtradb/log/log0recv.cc
+++ b/storage/xtradb/log/log0recv.cc
@@ -392,12 +392,6 @@ recv_sys_init(
}
#ifndef UNIV_HOTBACKUP
- /* Initialize red-black tree for fast insertions into the
- flush_list during recovery process.
- As this initialization is done while holding the buffer pool
- mutex we perform it before acquiring recv_sys->mutex. */
- buf_flush_init_flush_rbt();
-
mutex_enter(&(recv_sys->mutex));
recv_sys->heap = mem_heap_create_typed(256,
@@ -490,9 +484,6 @@ recv_sys_debug_free(void)
recv_sys->last_block_buf_start = NULL;
mutex_exit(&(recv_sys->mutex));
-
- /* Free up the flush_rbt. */
- buf_flush_free_flush_rbt();
}
# endif /* UNIV_LOG_DEBUG */
@@ -3140,6 +3131,11 @@ recv_recovery_from_checkpoint_start_func(
byte* log_hdr_buf_base = reinterpret_cast<byte *>
(alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
dberr_t err;
+
+ /* Initialize red-black tree for fast insertions into the
+ flush_list during recovery process. */
+ buf_flush_init_flush_rbt();
+
ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr);
log_hdr_buf = static_cast<byte *>
@@ -3568,6 +3564,9 @@ recv_recovery_from_checkpoint_finish(void)
#ifndef UNIV_LOG_DEBUG
recv_sys_debug_free();
#endif
+ /* Free up the flush_rbt. */
+ buf_flush_free_flush_rbt();
+
/* Roll back any recovered data dictionary transactions, so
that the data dictionary tables will be free of any locks.
The data dictionary latch should guarantee that there is at
diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc
index f5967ede3e7..3d7a5d2ef5d 100644
--- a/storage/xtradb/row/row0merge.cc
+++ b/storage/xtradb/row/row0merge.cc
@@ -2177,7 +2177,7 @@ wait_again:
/* Sync fts cache for other fts indexes to keep all
fts indexes consistent in sync_doc_id. */
err = fts_sync_table(const_cast<dict_table_t*>(new_table),
- false, true);
+ false, true, false);
if (err == DB_SUCCESS) {
fts_update_next_doc_id(
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
index 1e0d21d4a9e..7c2e549e188 100644
--- a/storage/xtradb/srv/srv0mon.cc
+++ b/storage/xtradb/srv/srv0mon.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2013, 2016, MariaDB Corporation.
@@ -1499,7 +1499,10 @@ srv_mon_set_module_control(
module */
set_current_module = FALSE;
} else if (module_id == MONITOR_ALL_COUNTER) {
- continue;
+ if (!(innodb_counter_info[ix].monitor_type
+ & MONITOR_GROUP_MODULE)) {
+ continue;
+ }
} else {
/* Hitting the next module, stop */
break;
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index a836442eb70..f9c75ffe576 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -219,6 +219,9 @@ UNIV_INTERN char** srv_data_file_names = NULL;
/* size in database pages */
UNIV_INTERN ulint* srv_data_file_sizes = NULL;
+/** Whether the redo log tracking is currently enabled. Note that it is
+possible for the log tracker thread to be running and the tracking to be
+disabled */
UNIV_INTERN my_bool srv_track_changed_pages = FALSE;
UNIV_INTERN ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024;
@@ -848,6 +851,9 @@ UNIV_INTERN os_event_t srv_checkpoint_completed_event;
UNIV_INTERN os_event_t srv_redo_log_tracked_event;
+/** Whether the redo log tracker thread has been started. Does not take into
+account whether the tracking is currently enabled (see srv_track_changed_pages
+for that) */
UNIV_INTERN bool srv_redo_log_thread_started = false;
/*********************************************************************//**
@@ -2546,13 +2552,8 @@ DECLARE_THREAD(srv_redo_log_follow_thread)(
os_event_wait(srv_checkpoint_completed_event);
os_event_reset(srv_checkpoint_completed_event);
-#ifdef UNIV_DEBUG
- if (!srv_track_changed_pages) {
- continue;
- }
-#endif
-
- if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
+ if (srv_track_changed_pages
+ && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
if (!log_online_follow_redo_log()) {
/* TODO: sync with I_S log tracking status? */
ib_logf(IB_LOG_LEVEL_ERROR,
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index cae85f38c12..f2bcf69bbc6 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1,5 +1,5 @@
/* Copyright (c) 2003, 2013, Oracle and/or its affiliates
- Copyright (c) 2009, 2014, SkySQL Ab.
+ Copyright (c) 2009, 2016, MariaDB
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index e4eb2832dff..900e2d3500a 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1,5 +1,5 @@
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
- Copyright (c) 2009, 2013, Monty Program Ab
+ Copyright (c) 2009, 2016, MariaDB
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh
index e5cdbfd3ce8..54f9ff55e3b 100644
--- a/support-files/mysql.server.sh
+++ b/support-files/mysql.server.sh
@@ -308,7 +308,7 @@ case "$mode" in
then
# Give extra arguments to mysqld with the my.cnf file. This script
# may be overwritten at next upgrade.
- $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" >/dev/null 2>&1 &
+ $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" >/dev/null &
wait_for_ready; return_value=$?
# Make lock for RedHat / SuSE
diff --git a/tests/async_queries.c b/tests/async_queries.c
index 76e884e6a69..a8889fc8d5a 100644
--- a/tests/async_queries.c
+++ b/tests/async_queries.c
@@ -425,7 +425,7 @@ main(int argc, char *argv[])
event_dispatch();
- free(sds);
+ my_free(sds);
mysql_library_end();
diff --git a/win/packaging/CMakeLists.txt b/win/packaging/CMakeLists.txt
index 0535a486d57..1682bae6986 100644
--- a/win/packaging/CMakeLists.txt
+++ b/win/packaging/CMakeLists.txt
@@ -24,10 +24,13 @@ ENDIF()
SET(MANUFACTURER "MariaDB Corporation Ab")
-FIND_PATH(WIX_DIR heat.exe
- "$ENV{ProgramFiles}/WiX Toolset v3.9/bin"
- "$ENV{ProgramFiles}/WiX Toolset v3.10/bin"
-)
+SET(WIX_BIN_PATHS)
+FOREACH(WIX_VER 3.9 3.10 3.11)
+ LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles}/WiX Toolset v${WIX_VER}/bin")
+ LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles} (x86)/WiX Toolset v${WIX_VER}/bin")
+ENDFOREACH()
+
+FIND_PATH(WIX_DIR heat.exe ${WIX_BIN_PATHS})
SET(CPACK_WIX_PACKAGE_BASE_NAME "MariaDB")
IF(CMAKE_SIZEOF_VOID_P EQUAL 4)
SET(CPACK_WIX_UPGRADE_CODE "49EB7A6A-1CEF-4A1E-9E89-B9A4993963E3")
diff --git a/win/packaging/create_msi.cmake.in b/win/packaging/create_msi.cmake.in
index c2ab648a6db..1f847a39695 100644
--- a/win/packaging/create_msi.cmake.in
+++ b/win/packaging/create_msi.cmake.in
@@ -434,6 +434,7 @@ EXECUTE_PROCESS(
IF(SIGNCODE)
EXECUTE_PROCESS(
COMMAND ${SIGNTOOL_EXECUTABLE} sign ${SIGNTOOL_PARAMETERS}
+ /d ${CPACK_PACKAGE_FILE_NAME}.msi
${CPACK_PACKAGE_FILE_NAME}.msi
)
ENDIF()