Merge branch '10.0' into 10.1

author: Sergei Golubchik <serg@mariadb.org> 2016-09-28 17:55:28 +0200
committer: Sergei Golubchik <serg@mariadb.org> 2016-09-28 17:55:28 +0200
commit: 66d9696596edbc20ad36bf3d5bffb5595e8235c3 (patch)
tree: bbef37c9a90b63d25bee59386cac04298a13846f
parent: 66a58f46e937cdc3d7e0529b52ad8b658d9b2cd4 (diff)
parent: 23af6f5942e7235a7c14a36cb8dd0d2796b5ef37 (diff)
download: mariadb-git-66d9696596edbc20ad36bf3d5bffb5595e8235c3.tar.gz
160 files changed, 4948 insertions, 4270 deletions
diff --git a/CREDITS b/CREDITS
index f0e6de7f08f..35ab4d48a8f 100644
--- a/CREDITS
+++ b/CREDITS
@@ -10,6 +10,7 @@ Visma                           http://visma.com (2015 - 2016)
 Acronis                         http://acronis.com (2016)
 Nexedi				https://www.nexedi.com (2016)
 Automattic              	https://automattic.com  (2014 - 2016)
+Tencent Game DBA		http://tencentdba.com/about (2016)
 Verkkokauppa.com		https://www.verkkokauppa.com (2015 - 2016)
 Virtuozzo                       https://virtuozzo.com (2016)
 
diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake
index 0e0a121dbb8..703e7424159 100644
--- a/cmake/cpack_rpm.cmake
+++ b/cmake/cpack_rpm.cmake
@@ -230,6 +230,9 @@ SETA(CPACK_RPM_test_PACKAGE_PROVIDES
   "perl(mtr_io.pl)"
   "perl(mtr_match)"
   "perl(mtr_misc.pl)"
+  "perl(mtr_gcov.pl)"
+  "perl(mtr_gprof.pl)"
+  "perl(mtr_process.pl)"
   "perl(mtr_report)"
   "perl(mtr_results)"
   "perl(mtr_unique)")
diff --git a/include/my_global.h b/include/my_global.h
index f5af8083cdc..bca03bfc4d6 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -880,8 +880,7 @@ typedef long long	my_ptrdiff_t;
   and related routines are refactored.
 */
 
-#define my_offsetof(TYPE, MEMBER) \
-        ((size_t)((char *)&(((TYPE *)0x10)->MEMBER) - (char*)0x10))
+#define my_offsetof(TYPE, MEMBER) PTR_BYTE_DIFF(&((TYPE *)0x10)->MEMBER, 0x10)
 
 #define NullS		(char *) 0
 
diff --git a/include/my_sys.h b/include/my_sys.h
index 36530eb94e9..a89480d3fcc 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -1,5 +1,5 @@
 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
-   Copyright (c) 2010, 2013, Monty Program Ab.
+   Copyright (c) 2010, 2016, Monty Program Ab.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -279,7 +279,7 @@ extern my_bool my_use_symdir;
 extern ulong	my_default_record_cache_size;
 extern my_bool  my_disable_locking, my_disable_async_io,
                 my_disable_flush_key_blocks, my_disable_symlinks;
-extern my_bool my_disable_sync;
+extern my_bool my_disable_sync, my_disable_copystat_in_redel;
 extern char	wild_many,wild_one,wild_prefix;
 extern const char *charsets_dir;
 extern my_bool timed_mutexes;
diff --git a/mysql-test/extra/binlog_tests/database.test b/mysql-test/extra/binlog_tests/database.test
index 6b3da087f01..2e093aacb0d 100644
--- a/mysql-test/extra/binlog_tests/database.test
+++ b/mysql-test/extra/binlog_tests/database.test
@@ -52,7 +52,7 @@ eval SELECT 'hello' INTO OUTFILE 'fake_file.$prefix';
 
 # Use '/' instead of '\' in the error message. On windows platform, dir is
 # formed with '\'.
---replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /File exists/Directory not empty/
+--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /247/39/ /File exists/Directory not empty/
 --error 1010
 DROP DATABASE testing_1;
 let $wait_binlog_event= DROP TABLE IF EXIST;
diff --git a/mysql-test/include/index_merge2.inc b/mysql-test/include/index_merge2.inc
index c50a45a9923..03afa49d323 100644
--- a/mysql-test/include/index_merge2.inc
+++ b/mysql-test/include/index_merge2.inc
@@ -341,6 +341,7 @@ while ($1)
 alter table t1 add index i2(key2);
 alter table t1 add index i3(key3);
 update t1 set key2=key1,key3=key1;
+analyze table t1;
 
 # to test the bug, the following must use "sort_union":
 --replace_column 9 REF
diff --git a/mysql-test/lib/My/CoreDump.pm b/mysql-test/lib/My/CoreDump.pm
index 0e90967ef95..f9f7b3d8d4b 100644
--- a/mysql-test/lib/My/CoreDump.pm
+++ b/mysql-test/lib/My/CoreDump.pm
@@ -261,11 +261,7 @@ sub show {
   # On Windows, rely on cdb to be there...
   if (IS_WINDOWS)
   {
-    # Starting cdb is unsafe when used with --parallel > 1 option 
-    if ( $parallel < 2 )
-    {
-      _cdb($core_name);
-    }
+    _cdb($core_name);
     return;
   }
   
diff --git a/mysql-test/lib/mtr_cases.pm b/mysql-test/lib/mtr_cases.pm
index 2be903abf42..d758b81c1c7 100644
--- a/mysql-test/lib/mtr_cases.pm
+++ b/mysql-test/lib/mtr_cases.pm
@@ -58,8 +58,6 @@ use My::Test;
 use My::Find;
 use My::Suite;
 
-require "mtr_misc.pl";
-
 # locate plugin suites, depending on whether it's a build tree or installed
 my @plugin_suitedirs;
 my $plugin_suitedir_regex;
@@ -1096,7 +1094,7 @@ sub get_tags_from_file($$) {
   $file_to_tags{$file}= $tags;
   $file_to_master_opts{$file}= $master_opts;
   $file_to_slave_opts{$file}= $slave_opts;
-  $file_combinations{$file}= [ uniq(@combinations) ];
+  $file_combinations{$file}= [ ::uniq(@combinations) ];
   $file_in_overlay{$file} = 1 if $in_overlay;
   return @{$tags};
 }
diff --git a/mysql-test/lib/mtr_report.pm b/mysql-test/lib/mtr_report.pm
index 9ab82c454ed..97ace54f0fb 100644
--- a/mysql-test/lib/mtr_report.pm
+++ b/mysql-test/lib/mtr_report.pm
@@ -34,7 +34,6 @@ use mtr_match;
 use My::Platform;
 use POSIX qw[ _exit ];
 use IO::Handle qw[ flush ];
-require "mtr_io.pl";
 use mtr_results;
 
 my $tot_real_time= 0;
@@ -92,7 +91,7 @@ sub mtr_report_test_passed ($) {
   my $timer_str=  "";
   if ( $timer and -f "$::opt_vardir/log/timer" )
   {
-    $timer_str= mtr_fromfile("$::opt_vardir/log/timer");
+    $timer_str= ::mtr_fromfile("$::opt_vardir/log/timer");
     $tinfo->{timer}= $timer_str;
     resfile_test_info('duration', $timer_str) if $::opt_resfile;
   }
diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl
index c6a71b91f69..2bd89f5ae49 100755
--- a/mysql-test/mysql-test-run.pl
+++ b/mysql-test/mysql-test-run.pl
@@ -102,11 +102,11 @@ use mtr_results;
 use IO::Socket::INET;
 use IO::Select;
 
-require "lib/mtr_process.pl";
-require "lib/mtr_io.pl";
-require "lib/mtr_gcov.pl";
-require "lib/mtr_gprof.pl";
-require "lib/mtr_misc.pl";
+require "mtr_process.pl";
+require "mtr_io.pl";
+require "mtr_gcov.pl";
+require "mtr_gprof.pl";
+require "mtr_misc.pl";
 
 $SIG{INT}= sub { mtr_error("Got ^C signal"); };
 $SIG{HUP}= sub { mtr_error("Hangup detected on controlling terminal"); };
diff --git a/mysql-test/r/contributors.result b/mysql-test/r/contributors.result
index 918ceaa496f..f3f5e227d3a 100644
--- a/mysql-test/r/contributors.result
+++ b/mysql-test/r/contributors.result
@@ -9,6 +9,7 @@ Acronis	http://www.acronis.com	Silver Sponsor of the MariaDB Foundation
 Auttomattic	https://automattic.com	Bronze Sponsor of the MariaDB Foundation
 Verkkokauppa.com	https://virtuozzo.com	Bronze Sponsor of the MariaDB Foundation
 Virtuozzo	https://virtuozzo.com/	Bronze Sponsor of the MariaDB Foundation
+Tencent Game DBA	http://tencentdba.com/about/	Bronze Sponsor of the MariaDB Foundation
 Google	USA	Sponsoring encryption, parallel replication and GTID
 Facebook	USA	Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc
 Ronald Bradford	Brisbane, Australia	EFF contribution for UC2006 Auction
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result
index a0a8072265c..5797a030e73 100644
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@@ -1662,6 +1662,11 @@ CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061))
 SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
 CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061))
 1
+select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
+c
+0000003F0000003F0000003F0000003F
+Warnings:
+Warning	1300	Invalid utf32 character string: '\xFF\xFF\x00\x00'
 #
 # End of 5.5 tests
 #
diff --git a/mysql-test/r/group_min_max_innodb.result b/mysql-test/r/group_min_max_innodb.result
index 77c74fbc041..2803107b97e 100644
--- a/mysql-test/r/group_min_max_innodb.result
+++ b/mysql-test/r/group_min_max_innodb.result
@@ -286,3 +286,19 @@ F	28	28
 F	29	29
 F	30	30
 DROP TABLE t0,t1,t2;
+#
+# MDEV-MariaDB daemon leaks memory with specific query
+#
+CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
+`language_id` int(11) unsigned NOT NULL DEFAULT '1'
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
+`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
+SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
+translation_resources	serialized_c
+NULL	cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+NULL	bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+NULL	aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+NULL	aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+drop table t1,t2;
diff --git a/mysql-test/r/index_merge_innodb.result b/mysql-test/r/index_merge_innodb.result
index 5202c79f3c7..5bf56e213ab 100644
--- a/mysql-test/r/index_merge_innodb.result
+++ b/mysql-test/r/index_merge_innodb.result
@@ -311,6 +311,9 @@ set @d=@d*2;
 alter table t1 add index i2(key2);
 alter table t1 add index i3(key3);
 update t1 set key2=key1,key3=key1;
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
 explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index_merge	i2,i3	i3,i2	4,4	NULL	REF	Using sort_union(i3,i2); Using where
diff --git a/mysql-test/r/index_merge_myisam.result b/mysql-test/r/index_merge_myisam.result
index a857e2a21ff..b3f78c12af9 100644
--- a/mysql-test/r/index_merge_myisam.result
+++ b/mysql-test/r/index_merge_myisam.result
@@ -1146,6 +1146,9 @@ set @d=@d*2;
 alter table t1 add index i2(key2);
 alter table t1 add index i3(key3);
 update t1 set key2=key1,key3=key1;
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
 explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index_merge	i2,i3	i3,i2	4,4	NULL	REF	Using sort_union(i3,i2); Using where
diff --git a/mysql-test/r/merge.result b/mysql-test/r/merge.result
index 804313af701..36e196497e5 100644
--- a/mysql-test/r/merge.result
+++ b/mysql-test/r/merge.result
@@ -3835,6 +3835,23 @@ test.m1	repair	error	Corrupt
 # Clean-up.
 drop tables m1, t1, t4;
 drop view t3;
+#
+# MDEV-10424 - Assertion `ticket == __null' failed in
+#              MDL_request::set_type
+#
+CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
+CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
+PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
+EXECUTE stmt;
+Table	Op	Msg_type	Msg_text
+test.tmerge	analyze	note	The storage engine for the table doesn't support analyze
+test.t1	analyze	status	Table is already up to date
+EXECUTE stmt;
+Table	Op	Msg_type	Msg_text
+test.tmerge	analyze	note	The storage engine for the table doesn't support analyze
+test.t1	analyze	status	Table is already up to date
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1, tmerge;
 End of 5.5 tests
 #
 # Additional coverage for refactoring which is made as part
diff --git a/mysql-test/r/ps.result b/mysql-test/r/ps.result
index ca6a5cf876a..6c21f4225a0 100644
--- a/mysql-test/r/ps.result
+++ b/mysql-test/r/ps.result
@@ -4077,4 +4077,35 @@ id	value
 deallocate prepare stmt;
 SET SESSION sql_mode = @save_sql_mode;
 DROP TABLE t1,t2;
-# End of 10.0 tests
+#
+# MDEV-8833: Crash of server on prepared statement with
+# conversion to semi-join
+#
+CREATE TABLE t1 (column1 INT);
+INSERT INTO t1 VALUES (3),(9);
+CREATE TABLE t2 (column2 INT);
+INSERT INTO t2 VALUES (1),(4);
+CREATE TABLE t3 (column3 INT);
+INSERT INTO t3 VALUES (6),(8);
+CREATE TABLE t4 (column4 INT);
+INSERT INTO t4 VALUES (2),(5);
+PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1 
+FROM t1 AS table1
+WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 ) 
+) AS sq
+FROM t3 AS table3, t4 AS table4";
+EXECUTE stmt;
+sq
+NULL
+NULL
+NULL
+NULL
+EXECUTE stmt;
+sq
+NULL
+NULL
+NULL
+NULL
+deallocate prepare stmt;
+drop table t1,t2,t3,t4;
+# End of 5.5 tests
diff --git a/mysql-test/r/type_uint.result b/mysql-test/r/type_uint.result
index 10aa2f2f393..c970f2ff896 100644
--- a/mysql-test/r/type_uint.result
+++ b/mysql-test/r/type_uint.result
@@ -14,6 +14,25 @@ this
 0
 4294967295
 drop table t1;
+create table t1 (a bigint unsigned, b mediumint unsigned);
+insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
+select coalesce(a,b), coalesce(b,a) from t1;
+coalesce(a,b)	coalesce(b,a)
+1	2
+18446744073709551615	16777215
+create table t2 as select a from t1 union select b from t1;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `a` bigint(20) unsigned DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select * from t2;
+a
+1
+18446744073709551615
+2
+16777215
+drop table t1, t2;
 #
 # Start of 10.0 tests
 #
diff --git a/mysql-test/suite/innodb/r/innodb_bug54044.result b/mysql-test/suite/innodb/r/innodb_bug54044.result
index d80c451c841..7d6133adb74 100644
--- a/mysql-test/suite/innodb/r/innodb_bug54044.result
+++ b/mysql-test/suite/innodb/r/innodb_bug54044.result
@@ -6,7 +6,8 @@ table_54044	CREATE TEMPORARY TABLE `table_54044` (
   `IF(NULL  IS NOT NULL, NULL, NULL)` binary(0) DEFAULT NULL
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 DROP TABLE table_54044;
-CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
+CREATE TABLE tmp ENGINE = INNODB
+AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
 SHOW CREATE TABLE tmp;
 Table	Create Table
 tmp	CREATE TABLE `tmp` (
diff --git a/mysql-test/suite/innodb/r/system_tables.result b/mysql-test/suite/innodb/r/system_tables.result
new file mode 100644
index 00000000000..79a24f7e455
--- /dev/null
+++ b/mysql-test/suite/innodb/r/system_tables.result
@@ -0,0 +1,8 @@
+alter table mysql.time_zone_name engine=InnoDB;
+create table envois3 (starttime datetime) engine=InnoDB;
+insert envois3 values ('2008-08-11 22:43:00');
+select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
+starttime
+2008-08-12 02:43:00
+drop table envois3;
+alter table mysql.time_zone_name engine=MyISAM;
diff --git a/mysql-test/suite/innodb/t/innodb_bug54044.test b/mysql-test/suite/innodb/t/innodb_bug54044.test
index aa19c51018c..61a09375ae1 100644
--- a/mysql-test/suite/innodb/t/innodb_bug54044.test
+++ b/mysql-test/suite/innodb/t/innodb_bug54044.test
@@ -10,7 +10,10 @@ CREATE TEMPORARY TABLE table_54044 ENGINE = INNODB
 SHOW CREATE TABLE table_54044;
 DROP TABLE table_54044;
 
-CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
+# This 'create table' should pass since it uses a Field_string of size 0.
+
+CREATE TABLE tmp ENGINE = INNODB
+ AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
 SHOW CREATE TABLE tmp;
 DROP TABLE tmp;
 
@@ -23,4 +26,3 @@ FLUSH TABLES;
 --error 1005
 CREATE TEMPORARY TABLE tmp ENGINE=InnoDB AS SELECT VALUES(a) FROM t1;
 DROP TABLE t1;
-
diff --git a/mysql-test/suite/innodb/t/system_tables.test b/mysql-test/suite/innodb/t/system_tables.test
new file mode 100644
index 00000000000..90cb8c59fbd
--- /dev/null
+++ b/mysql-test/suite/innodb/t/system_tables.test
@@ -0,0 +1,12 @@
+--source include/have_innodb.inc
+
+#
+# MDEV-10775 System table in InnoDB format allowed in MariaDB could lead to crash
+#
+alter table mysql.time_zone_name engine=InnoDB;
+create table envois3 (starttime datetime) engine=InnoDB;
+insert envois3 values ('2008-08-11 22:43:00');
+--source include/restart_mysqld.inc
+select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
+drop table envois3;
+alter table mysql.time_zone_name engine=MyISAM;
diff --git a/mysql-test/suite/perfschema/r/aggregate.result b/mysql-test/suite/perfschema/r/aggregate.result
deleted file mode 100644
index c8fa1cc2b24..00000000000
--- a/mysql-test/suite/perfschema/r/aggregate.result
+++ /dev/null
@@ -1,121 +0,0 @@
-"General cleanup"
-set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
-set @@global.aria_checkpoint_interval= 0;
-drop table if exists t1;
-update performance_schema.setup_instruments set enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-truncate table performance_schema.file_summary_by_event_name;
-truncate table performance_schema.file_summary_by_instance;
-truncate table performance_schema.socket_summary_by_event_name;
-truncate table performance_schema.socket_summary_by_instance;
-truncate table performance_schema.events_waits_summary_global_by_event_name;
-truncate table performance_schema.events_waits_summary_by_instance;
-truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-set enabled = 'YES', timed = 'YES';
-create table t1 (
-id INT PRIMARY KEY,
-b CHAR(100) DEFAULT 'initial value')
-ENGINE=MyISAM;
-insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
-update performance_schema.setup_instruments SET enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-set @dump_all=FALSE;
-"Verifying file aggregate consistency"
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-EVENT_NAME	COUNT_READ	SUM(i.COUNT_READ)
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-EVENT_NAME	COUNT_WRITE	SUM(i.COUNT_WRITE)
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-EVENT_NAME	COUNT_READ	SUM(i.COUNT_READ)
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-EVENT_NAME	COUNT_WRITE	SUM(i.COUNT_WRITE)
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
-OR @dump_all;
-EVENT_NAME	SUM_NUMBER_OF_BYTES_READ	SUM(i.SUM_NUMBER_OF_BYTES_READ)
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
-OR @dump_all;
-EVENT_NAME	SUM_NUMBER_OF_BYTES_WRITE	SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-"Verifying waits aggregate consistency (instance)"
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME	SUM_TIMER_WAIT	SUM(i.SUM_TIMER_WAIT)
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
-AND (MIN(i.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-EVENT_NAME	MIN_TIMER_WAIT	MIN(i.MIN_TIMER_WAIT)
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME	MAX_TIMER_WAIT	MAX(i.MAX_TIMER_WAIT)
-"Verifying waits aggregate consistency (thread)"
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME	SUM_TIMER_WAIT	SUM(t.SUM_TIMER_WAIT)
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
-AND (MIN(t.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-EVENT_NAME	MIN_TIMER_WAIT	MIN(t.MIN_TIMER_WAIT)
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME	MAX_TIMER_WAIT	MAX(t.MAX_TIMER_WAIT)
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-set enabled = 'YES', timed = 'YES';
-drop table test.t1;
-set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
diff --git a/mysql-test/suite/perfschema/t/aggregate.test b/mysql-test/suite/perfschema/t/aggregate.test
deleted file mode 100644
index fe30a7b8697..00000000000
--- a/mysql-test/suite/perfschema/t/aggregate.test
+++ /dev/null
@@ -1,197 +0,0 @@
-# Tests for PERFORMANCE_SCHEMA
-# Verify that statistics aggregated by different criteria are consistent.
-
---source include/not_embedded.inc
---source include/have_perfschema.inc
-
---echo "General cleanup"
-
-# MDEV-7187 - test fails sporadically in buildbot
-set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
-set @@global.aria_checkpoint_interval= 0;
-
---disable_warnings
-drop table if exists t1;
---enable_warnings
-
-update performance_schema.setup_instruments set enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-
-# Cleanup statistics
-truncate table performance_schema.file_summary_by_event_name;
-truncate table performance_schema.file_summary_by_instance;
-truncate table performance_schema.socket_summary_by_event_name;
-truncate table performance_schema.socket_summary_by_instance;
-truncate table performance_schema.events_waits_summary_global_by_event_name;
-truncate table performance_schema.events_waits_summary_by_instance;
-truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
-
-# Start recording data
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-  set enabled = 'YES', timed = 'YES';
-
-
-create table t1 (
-  id INT PRIMARY KEY,
-  b CHAR(100) DEFAULT 'initial value')
-  ENGINE=MyISAM;
-
-insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
-
-# Stop recording data, so the select below don't add noise.
-update performance_schema.setup_instruments SET enabled = 'NO';
-# Disable all consumers, for long standing waits
-update performance_schema.setup_consumers set enabled = 'NO';
-
-# Helper to debug
-set @dump_all=FALSE;
-
-# Note that in general:
-# - COUNT/SUM/MAX(file_summary_by_event_name) >=
-#   COUNT/SUM/MAX(file_summary_by_instance).
-# - MIN(file_summary_by_event_name) <=
-#   MIN(file_summary_by_instance).
-# There will be equality only when file instances are not removed,
-# aka when a file is not deleted from the file system,
-# because doing so removes a row in file_summary_by_instance.
-
-# Likewise:
-# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
-#   COUNT/SUM/MAX(events_waits_summary_by_instance)
-# - MIN(events_waits_summary_global_by_event_name) <=
-#   MIN(events_waits_summary_by_instance)
-# There will be equality only when an instrument instance
-# is not removed, which is next to impossible to predictably guarantee
-# in the server.
-# For example, a MyISAM table removed from the table cache
-# will cause a mysql_mutex_destroy on myisam/MYISAM_SHARE::intern_lock.
-# Another example, a thread terminating will cause a mysql_mutex_destroy
-# on sql/LOCK_delete
-# Both cause a row to be deleted from events_waits_summary_by_instance.
-
-# Likewise:
-# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
-#   COUNT/SUM/MAX(events_waits_summary_by_thread_by_event_name)
-# - MIN(events_waits_summary_global_by_event_name) <=
-#   MIN(events_waits_summary_by_thread_by_event_name)
-# There will be equality only when no thread is removed,
-# that is if no thread disconnects, or no sub thread (for example insert
-# delayed) ever completes.
-# A thread completing will cause rows in
-# events_waits_summary_by_thread_by_event_name to be removed.
-
---echo "Verifying file aggregate consistency"
-
-# Since the code generating the load in this test does:
-# - create table
-# - insert
-# - does not cause temporary tables to be used
-# we can test for equality here for file aggregates.
-
-# If any of these queries returns data, the test failed.
-
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
-OR @dump_all;
-
---echo "Verifying waits aggregate consistency (instance)"
-
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
-AND (MIN(i.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
-OR @dump_all;
-
---echo "Verifying waits aggregate consistency (thread)"
-
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
-AND (MIN(t.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
-OR @dump_all;
-
-
-# Cleanup
-
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-  set enabled = 'YES', timed = 'YES';
-
-drop table test.t1;
-
-set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
-
diff --git a/mysql-test/suite/plugins/r/server_audit.result b/mysql-test/suite/plugins/r/server_audit.result
index 83b88ed0480..ceb75176b43 100644
--- a/mysql-test/suite/plugins/r/server_audit.result
+++ b/mysql-test/suite/plugins/r/server_audit.result
@@ -8,7 +8,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	
-server_audit_loc_info	
 server_audit_logging	OFF
 server_audit_mode	0
 server_audit_output_type	file
@@ -72,7 +71,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	odin, root, dva, tri
-server_audit_loc_info	
 server_audit_logging	ON
 server_audit_mode	0
 server_audit_output_type	file
@@ -218,7 +216,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	odin, root, dva, tri
-server_audit_loc_info	
 server_audit_logging	ON
 server_audit_mode	1
 server_audit_output_type	file
diff --git a/mysql-test/suite/plugins/r/thread_pool_server_audit.result b/mysql-test/suite/plugins/r/thread_pool_server_audit.result
index 83b88ed0480..ceb75176b43 100644
--- a/mysql-test/suite/plugins/r/thread_pool_server_audit.result
+++ b/mysql-test/suite/plugins/r/thread_pool_server_audit.result
@@ -8,7 +8,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	
-server_audit_loc_info	
 server_audit_logging	OFF
 server_audit_mode	0
 server_audit_output_type	file
@@ -72,7 +71,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	odin, root, dva, tri
-server_audit_loc_info	
 server_audit_logging	ON
 server_audit_mode	0
 server_audit_output_type	file
@@ -218,7 +216,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	odin, root, dva, tri
-server_audit_loc_info	
 server_audit_logging	ON
 server_audit_mode	1
 server_audit_output_type	file
diff --git a/mysql-test/suite/rpl/t/rpl_drop_db.test b/mysql-test/suite/rpl/t/rpl_drop_db.test
index a67850a66dd..dae1651dc93 100644
--- a/mysql-test/suite/rpl/t/rpl_drop_db.test
+++ b/mysql-test/suite/rpl/t/rpl_drop_db.test
@@ -13,7 +13,7 @@ insert into mysqltest1.t1 values (1);
 select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt';
 create table mysqltest1.t2 (n int);
 create table mysqltest1.t3 (n int);
---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
+--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty"
 --error 1010
 drop database mysqltest1;
 use mysqltest1;
@@ -30,7 +30,7 @@ while ($1)
 }
 --enable_query_log
 
---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
+--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty"
 --error 1010
 drop database mysqltest1;
 use mysqltest1;
diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test
index 4bff8a867f1..190b9f3fb50 100644
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@@ -889,6 +889,11 @@ SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061));
 SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061));
 SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
 
+#
+# potential signedness issue
+#
+select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
+
 --echo #
 --echo # End of 5.5 tests
 --echo #
diff --git a/mysql-test/t/group_min_max_innodb.test b/mysql-test/t/group_min_max_innodb.test
index 6967f847147..91e0bd3279f 100644
--- a/mysql-test/t/group_min_max_innodb.test
+++ b/mysql-test/t/group_min_max_innodb.test
@@ -230,3 +230,16 @@ eval EXPLAIN $query;
 eval $query;
 
 DROP TABLE t0,t1,t2;
+
+--echo #
+--echo # MDEV-MariaDB daemon leaks memory with specific query
+--echo #
+
+CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
+  `language_id` int(11) unsigned NOT NULL DEFAULT '1'
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
+  `serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
+SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
+drop table t1,t2;
diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test
index 519094d6350..e9d69b446d5 100644
--- a/mysql-test/t/merge.test
+++ b/mysql-test/t/merge.test
@@ -2881,6 +2881,19 @@ drop tables m1, t1, t4;
 drop view t3;
 
 
+--echo #
+--echo # MDEV-10424 - Assertion `ticket == __null' failed in
+--echo #              MDL_request::set_type
+--echo #
+CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
+CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
+PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
+EXECUTE stmt;
+EXECUTE stmt;
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1, tmerge;
+
+
 --echo End of 5.5 tests
 
 
diff --git a/mysql-test/t/ps.test b/mysql-test/t/ps.test
index 3881d522bbf..67f6f021434 100644
--- a/mysql-test/t/ps.test
+++ b/mysql-test/t/ps.test
@@ -3670,5 +3670,32 @@ deallocate prepare stmt;
 SET SESSION sql_mode = @save_sql_mode;
 DROP TABLE t1,t2;
 
+--echo #
+--echo # MDEV-8833: Crash of server on prepared statement with
+--echo # conversion to semi-join
+--echo #
+
+CREATE TABLE t1 (column1 INT);
+INSERT INTO t1 VALUES (3),(9);
+
+CREATE TABLE t2 (column2 INT);
+INSERT INTO t2 VALUES (1),(4);
+
+CREATE TABLE t3 (column3 INT);
+INSERT INTO t3 VALUES (6),(8);
+
+CREATE TABLE t4 (column4 INT);
+INSERT INTO t4 VALUES (2),(5);
+
+PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1 
+FROM t1 AS table1
+WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 ) 
+) AS sq
+FROM t3 AS table3, t4 AS table4";
+EXECUTE stmt;
+EXECUTE stmt;
+deallocate prepare stmt;
+drop table t1,t2,t3,t4;
+
 
---echo # End of 10.0 tests
+--echo # End of 5.5 tests
diff --git a/mysql-test/t/type_uint.test b/mysql-test/t/type_uint.test
index 3a949c5c47a..84fca993d09 100644
--- a/mysql-test/t/type_uint.test
+++ b/mysql-test/t/type_uint.test
@@ -16,6 +16,13 @@ drop table t1;
 
 # End of 4.1 tests
 
+create table t1 (a bigint unsigned, b mediumint unsigned);
+insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
+select coalesce(a,b), coalesce(b,a) from t1;
+create table t2 as select a from t1 union select b from t1;
+show create table t2;
+select * from t2;
+drop table t1, t2;
 
 --echo #
 --echo # Start of 10.0 tests
diff --git a/mysys/my_redel.c b/mysys/my_redel.c
index 61e61b40791..976fc5a18c3 100644
--- a/mysys/my_redel.c
+++ b/mysys/my_redel.c
@@ -1,5 +1,5 @@
-/*
-   Copyright (c) 2000, 2010, Oracle and/or its affiliates
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates
+   Copyright (c) 2009, 2016, MariaDB
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -49,7 +49,8 @@ int my_redel(const char *org_name, const char *tmp_name,
   DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s'  MyFlags: %lu",
 		   org_name,tmp_name,MyFlags));
 
-  if (my_copystat(org_name,tmp_name,MyFlags) < 0)
+  if (!my_disable_copystat_in_redel &&
+      my_copystat(org_name,tmp_name,MyFlags) < 0)
     goto end;
   if (MyFlags & MY_REDEL_MAKE_BACKUP)
   {
diff --git a/mysys/my_static.c b/mysys/my_static.c
index 4aca78e30a9..9236c1395fb 100644
--- a/mysys/my_static.c
+++ b/mysys/my_static.c
@@ -98,3 +98,4 @@ my_bool my_disable_sync=0;
 my_bool my_disable_async_io=0;
 my_bool my_disable_flush_key_blocks=0;
 my_bool my_disable_symlinks=0;
+my_bool my_disable_copystat_in_redel=0;
diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c
index b84f2b94806..d48b6c37728 100644
--- a/plugin/server_audit/server_audit.c
+++ b/plugin/server_audit/server_audit.c
@@ -427,9 +427,8 @@ static MYSQL_SYSVAR_UINT(query_log_limit, query_log_limit,
 char locinfo_ini_value[sizeof(struct connection_info)+4];
 
 static MYSQL_THDVAR_STR(loc_info,
-                        PLUGIN_VAR_READONLY | PLUGIN_VAR_MEMALLOC,
-                        "Auxiliary info.", NULL, NULL,
-                        locinfo_ini_value);
+                        PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_MEMALLOC,
+                        "Internal info", NULL, NULL, locinfo_ini_value);
 
 static const char *syslog_facility_names[]=
 {
diff --git a/sql/contributors.h b/sql/contributors.h
index f52d3243453..0359ec54022 100644
--- a/sql/contributors.h
+++ b/sql/contributors.h
@@ -46,6 +46,7 @@ struct show_table_contributors_st show_table_contributors[]= {
   {"Auttomattic", "https://automattic.com", "Bronze Sponsor of the MariaDB Foundation"},
   {"Verkkokauppa.com", "https://virtuozzo.com", "Bronze Sponsor of the MariaDB Foundation"},
   {"Virtuozzo", "https://virtuozzo.com/", "Bronze Sponsor of the MariaDB Foundation"},
+  {"Tencent Game DBA", "http://tencentdba.com/about/", "Bronze Sponsor of the MariaDB Foundation"},
 
   /* Sponsors of important features */
   {"Google", "USA", "Sponsoring encryption, parallel replication and GTID"},
diff --git a/sql/field.cc b/sql/field.cc
index ae815187019..b909d14ec8f 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -355,7 +355,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]=
   //MYSQL_TYPE_NULL         MYSQL_TYPE_TIMESTAMP
     MYSQL_TYPE_LONGLONG,    MYSQL_TYPE_VARCHAR,
   //MYSQL_TYPE_LONGLONG     MYSQL_TYPE_INT24
-    MYSQL_TYPE_LONGLONG,    MYSQL_TYPE_LONG,
+    MYSQL_TYPE_LONGLONG,    MYSQL_TYPE_LONGLONG,
   //MYSQL_TYPE_DATE         MYSQL_TYPE_TIME
     MYSQL_TYPE_VARCHAR,     MYSQL_TYPE_VARCHAR,
   //MYSQL_TYPE_DATETIME     MYSQL_TYPE_YEAR
diff --git a/sql/item.cc b/sql/item.cc
index a9c17ef620c..47635b14f46 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -2673,9 +2673,28 @@ void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
   if (context)
   {
     Name_resolution_context *ctx= new Name_resolution_context();
-    ctx->outer_context= NULL; // We don't build a complete name resolver
-    ctx->table_list= NULL;    // We rely on first_name_resolution_table instead
+    if (context->select_lex == new_parent)
+    {
+      /*
+        This field was pushed in then pulled out
+        (for example left part of IN)
+      */
+      ctx->outer_context= context->outer_context;
+    }
+    else if (context->outer_context)
+    {
+      /* just pull to the upper context */
+      ctx->outer_context= context->outer_context->outer_context;
+    }
+    else
+    {
+      /* No upper context (merging Derived/VIEW where context chain ends) */
+      ctx->outer_context= NULL;
+    }
+    ctx->table_list= context->first_name_resolution_table;
     ctx->select_lex= new_parent;
+    if (context->select_lex == NULL)
+      ctx->select_lex= NULL;
     ctx->first_name_resolution_table= context->first_name_resolution_table;
     ctx->last_name_resolution_table=  context->last_name_resolution_table;
     ctx->error_processor=             context->error_processor;
diff --git a/sql/log.cc b/sql/log.cc
index be24bcd718a..45ab5c8827b 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -3102,7 +3102,7 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
       if (! write_error)
       {
         write_error= 1;
-        sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, error);
+        sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, tmp_errno);
       }
     }
   }
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 41f6def8e08..8da8273083c 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -4096,6 +4096,7 @@ static int init_common_variables()
 
   max_system_variables.pseudo_thread_id= (ulong)~0;
   server_start_time= flush_status_time= my_time(0);
+  my_disable_copystat_in_redel= 1;
 
   global_rpl_filter= new Rpl_filter;
   binlog_filter= new Rpl_filter;
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index ef7a46a7109..da3c5646e84 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -1,5 +1,5 @@
-/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
-   Copyright (c) 2010, 2014, SkySQL Ab.
+/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
+   Copyright (c) 2012, 2016, MariaDB
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
diff --git a/sql/parse_file.h b/sql/parse_file.h
index e4756e6c8af..87917dbd71b 100644
--- a/sql/parse_file.h
+++ b/sql/parse_file.h
@@ -42,9 +42,9 @@ enum file_opt_type {
 
 struct File_option
 {
-  LEX_STRING name;		/**< Name of the option */
-  int offset;			/**< offset to base address of value */
-  file_opt_type type;		/**< Option type */
+  LEX_STRING name;              /**< Name of the option */
+  my_ptrdiff_t offset;          /**< offset to base address of value */
+  file_opt_type type;           /**< Option type */
 };
 
 
diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc
index 9dd3e532d1e..f72eb676743 100644
--- a/sql/signal_handler.cc
+++ b/sql/signal_handler.cc
@@ -64,13 +64,13 @@ extern "C" sig_handler handle_fatal_signal(int sig)
   struct tm tm;
 #ifdef HAVE_STACKTRACE
   THD *thd;
-#endif
   /*
      This flag remembers if the query pointer was found invalid.
      We will try and print the query at the end of the signal handler, in case
      we're wrong.
   */
   bool print_invalid_query_pointer= false;
+#endif
 
   if (segfaulted)
   {
@@ -276,6 +276,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
       "\"mlockall\" bugs.\n");
   }
 
+#ifdef HAVE_STACKTRACE
   if (print_invalid_query_pointer)
   {
     my_safe_printf_stderr(
@@ -285,6 +286,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
     my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length()));
     my_safe_printf_stderr("\n\n");
   }
+#endif
 
 #ifdef HAVE_WRITE_CORE
   if (test_flags & TEST_CORE_ON_SIGNAL)
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index b974075b442..1f4426f2043 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -466,7 +466,19 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
       }
       thd->prepare_derived_at_open= FALSE;
 
-      table->next_global= save_next_global;
+      /*
+        MERGE engine may adjust table->next_global chain, thus we have to
+        append save_next_global after merge children.
+      */
+      if (save_next_global)
+      {
+        TABLE_LIST *table_list_iterator= table;
+        while (table_list_iterator->next_global)
+          table_list_iterator= table_list_iterator->next_global;
+        table_list_iterator->next_global= save_next_global;
+        save_next_global->prev_global= &table_list_iterator->next_global;
+      }
+
       table->next_local= save_next_local;
       thd->open_options&= ~extra_open_options;
 
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 9ea5b20dce6..b4a3cc27d2c 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -9267,6 +9267,7 @@ open_system_tables_for_read(THD *thd, TABLE_LIST *table_list,
   */
   lex->reset_n_backup_query_tables_list(&query_tables_list_backup);
   thd->reset_n_backup_open_tables_state(backup);
+  thd->lex->sql_command= SQLCOM_SELECT;
 
   if (open_and_lock_tables(thd, table_list, FALSE,
                            MYSQL_OPEN_IGNORE_FLUSH |
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 430191cee5d..4143d2cc419 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -5828,9 +5828,11 @@ int THD::decide_logging_format(TABLE_LIST *tables)
     {
       static const char *prelocked_mode_name[] = {
         "NON_PRELOCKED",
+        "LOCK_TABLES",
         "PRELOCKED",
         "PRELOCKED_UNDER_LOCK_TABLES",
       };
+      compile_time_assert(array_elements(prelocked_mode_name) == LTM_always_last);
       DBUG_PRINT("debug", ("prelocked_mode: %s",
                            prelocked_mode_name[locked_tables_mode]));
     }
diff --git a/sql/sql_class.h b/sql/sql_class.h
index b22dc8142d8..da885c3dbac 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1262,7 +1262,8 @@ enum enum_locked_tables_mode
   LTM_NONE= 0,
   LTM_LOCK_TABLES,
   LTM_PRELOCKED,
-  LTM_PRELOCKED_UNDER_LOCK_TABLES
+  LTM_PRELOCKED_UNDER_LOCK_TABLES,
+  LTM_always_last
 };
 
 
@@ -4610,6 +4611,11 @@ public:
       save_copy_field_end= copy_field_end= NULL;
     }
   }
+  void free_copy_field_data()
+  {
+    for (Copy_field *ptr= copy_field ; ptr != copy_field_end ; ptr++)
+      ptr->tmp.free();
+  }
 };
 
 class select_union :public select_result_interceptor
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 60248f3fef4..e7286960599 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -2849,6 +2849,22 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
 }
 
 
+static size_t var_storage_size(int flags)
+{
+  switch (flags & PLUGIN_VAR_TYPEMASK) {
+  case PLUGIN_VAR_BOOL:         return sizeof(my_bool);
+  case PLUGIN_VAR_INT:          return sizeof(int);
+  case PLUGIN_VAR_LONG:         return sizeof(long);
+  case PLUGIN_VAR_ENUM:         return sizeof(long);
+  case PLUGIN_VAR_LONGLONG:     return sizeof(ulonglong);
+  case PLUGIN_VAR_SET:          return sizeof(ulonglong);
+  case PLUGIN_VAR_STR:          return sizeof(char*);
+  case PLUGIN_VAR_DOUBLE:       return sizeof(double);
+  default: DBUG_ASSERT(0);      return 0;
+  }
+}
+
+
 /*
   returns a bookmark for thd-local variables, creating if neccessary.
   returns null for non thd-local variables.
@@ -2857,39 +2873,13 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
 static st_bookmark *register_var(const char *plugin, const char *name,
                                  int flags)
 {
-  uint length= strlen(plugin) + strlen(name) + 3, size= 0, offset, new_size;
+  uint length= strlen(plugin) + strlen(name) + 3, size, offset, new_size;
   st_bookmark *result;
   char *varname, *p;
 
-  if (!(flags & PLUGIN_VAR_THDLOCAL))
-    return NULL;
-
-  switch (flags & PLUGIN_VAR_TYPEMASK) {
-  case PLUGIN_VAR_BOOL:
-    size= sizeof(my_bool);
-    break;
-  case PLUGIN_VAR_INT:
-    size= sizeof(int);
-    break;
-  case PLUGIN_VAR_LONG:
-  case PLUGIN_VAR_ENUM:
-    size= sizeof(long);
-    break;
-  case PLUGIN_VAR_LONGLONG:
-  case PLUGIN_VAR_SET:
-    size= sizeof(ulonglong);
-    break;
-  case PLUGIN_VAR_STR:
-    size= sizeof(char*);
-    break;
-  case PLUGIN_VAR_DOUBLE:
-    size= sizeof(double);
-    break;
-  default:
-    DBUG_ASSERT(0);
-    return NULL;
-  };
+  DBUG_ASSERT(flags & PLUGIN_VAR_THDLOCAL);
 
+  size= var_storage_size(flags);
   varname= ((char*) my_alloca(length));
   strxmov(varname + 1, plugin, "_", name, NullS);
   for (p= varname + 1; *p; p++)
@@ -2983,25 +2973,17 @@ void sync_dynamic_session_variables(THD* thd, bool global_lock)
   */
   for (idx= 0; idx < bookmark_hash.records; idx++)
   {
-    sys_var_pluginvar *pi;
-    sys_var *var;
     st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx);
 
     if (v->version <= thd->variables.dynamic_variables_version)
       continue; /* already in thd->variables */
 
-    if (!(var= intern_find_sys_var(v->key + 1, v->name_len)) ||
-        !(pi= var->cast_pluginvar()) ||
-        v->key[0] != plugin_var_bookmark_key(pi->plugin_var->flags))
-      continue;
-
     /* Here we do anything special that may be required of the data types */
 
-    if ((pi->plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
-        pi->plugin_var->flags & PLUGIN_VAR_MEMALLOC)
+    if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
+         v->key[0] & BOOKMARK_MEMALLOC)
     {
-      int offset= ((thdvar_str_t *)(pi->plugin_var))->offset;
-      char **pp= (char**) (thd->variables.dynamic_variables_ptr + offset);
+      char **pp= (char**) (thd->variables.dynamic_variables_ptr + v->offset);
       if (*pp)
         *pp= my_strdup(*pp, MYF(MY_WME|MY_FAE));
     }
@@ -3448,69 +3430,58 @@ bool sys_var_pluginvar::session_update(THD *thd, set_var *var)
   return false;
 }
 
-bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
+static const void *var_def_ptr(st_mysql_sys_var *pv)
 {
-  DBUG_ASSERT(!is_readonly());
-  mysql_mutex_assert_owner(&LOCK_global_system_variables);
-
-  void *tgt= real_value_ptr(thd, OPT_GLOBAL);
-  const void *src= &var->save_result;
-
-  if (!var->value)
-  {
-    switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
+    switch (pv->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
     case PLUGIN_VAR_INT:
-      src= &((sysvar_uint_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_uint_t*) pv)->def_val;
     case PLUGIN_VAR_LONG:
-      src= &((sysvar_ulong_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_ulong_t*) pv)->def_val;
     case PLUGIN_VAR_LONGLONG:
-      src= &((sysvar_ulonglong_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_ulonglong_t*) pv)->def_val;
     case PLUGIN_VAR_ENUM:
-      src= &((sysvar_enum_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_enum_t*) pv)->def_val;
     case PLUGIN_VAR_SET:
-      src= &((sysvar_set_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_set_t*) pv)->def_val;
     case PLUGIN_VAR_BOOL:
-      src= &((sysvar_bool_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_bool_t*) pv)->def_val;
     case PLUGIN_VAR_STR:
-      src= &((sysvar_str_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_str_t*) pv)->def_val;
     case PLUGIN_VAR_DOUBLE:
-      src= &((sysvar_double_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_double_t*) pv)->def_val;
     case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_uint_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_uint_t*) pv)->def_val;
     case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_ulong_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_ulong_t*) pv)->def_val;
     case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_ulonglong_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_ulonglong_t*) pv)->def_val;
     case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_enum_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_enum_t*) pv)->def_val;
     case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_set_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_set_t*) pv)->def_val;
     case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_bool_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_bool_t*) pv)->def_val;
     case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_str_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_str_t*) pv)->def_val;
     case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_double_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_double_t*) pv)->def_val;
     default:
       DBUG_ASSERT(0);
+      return NULL;
     }
-  }
+}
+
+
+bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
+{
+  DBUG_ASSERT(!is_readonly());
+  mysql_mutex_assert_owner(&LOCK_global_system_variables);
+
+  void *tgt= real_value_ptr(thd, OPT_GLOBAL);
+  const void *src= &var->save_result;
+
+  if (!var->value)
+    src= var_def_ptr(plugin_var);
 
   plugin_var->update(thd, plugin_var, tgt, src);
   return false;
@@ -3863,7 +3834,18 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
       *(int*)(opt + 1)= offset= v->offset;
 
       if (opt->flags & PLUGIN_VAR_NOCMDOPT)
+      {
+        char *val= global_system_variables.dynamic_variables_ptr + offset;
+        if (((opt->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) &&
+             (opt->flags & PLUGIN_VAR_MEMALLOC))
+        {
+          char *def_val= *(char**)var_def_ptr(opt);
+          *(char**)val= def_val ? my_strdup(def_val, MYF(0)) : NULL;
+        }
+        else
+          memcpy(val, var_def_ptr(opt), var_storage_size(opt->flags));
         continue;
+      }
 
       optname= (char*) memdup_root(mem_root, v->key + 1,
                                    (optnamelen= v->name_len) + 1);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 5dc50c92104..239e5b6b5d2 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -9172,9 +9172,26 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
     We need to destruct the copy_field (allocated in create_tmp_table())
     before setting it to 0 if the join is not "reusable".
   */
-  if (!tmp_join || tmp_join != this) 
-    tmp_table_param.cleanup(); 
-  tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
+  if (!tmp_join || tmp_join != this)
+    tmp_table_param.cleanup();
+  else
+  {
+    /*
+      Free data buffered in copy_fields, but keep data pointed by copy_field
+      around for next iteration (possibly stored in save_copy_fields).
+
+      It would be logically simpler to not clear copy_field
+      below, but as we have loops that runs over copy_field to
+      copy_field_end that should not be done anymore, it's simpler to
+      just clear the pointers.
+
+      Another option would be to just clear copy_field_end and not run
+      the loops if this is not set or to have tmp_table_param.cleanup()
+      to run cleanup on save_copy_field if copy_field is not set.
+    */
+    tmp_table_param.free_copy_field_data();
+    tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
+  }
   first_record= sort_and_group=0;
   send_records= (ha_rows) 0;
 
@@ -11890,7 +11907,7 @@ void JOIN::join_free()
 /**
   Free resources of given join.
 
-  @param fill   true if we should free all resources, call with full==1
+  @param full   true if we should free all resources, call with full==1
                 should be last, before it this function can be called with
                 full==0
 
@@ -12010,7 +12027,7 @@ void JOIN::cleanup(bool full)
     /*
       If we have tmp_join and 'this' JOIN is not tmp_join and
       tmp_table_param.copy_field's  of them are equal then we have to remove
-      pointer to  tmp_table_param.copy_field from tmp_join, because it qill
+      pointer to  tmp_table_param.copy_field from tmp_join, because it will
       be removed in tmp_table_param.cleanup().
     */
     if (tmp_join &&
@@ -16114,6 +16131,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
   case Item::VARBIN_ITEM:
   case Item::CACHE_ITEM:
   case Item::EXPR_CACHE_ITEM:
+  case Item::PARAM_ITEM:
     if (make_copy_field)
     {
       DBUG_ASSERT(((Item_result_field*)item)->result_field);
@@ -22917,7 +22935,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
  err:
   if (copy)
     delete [] param->copy_field;			// This is never 0
-  param->copy_field=0;
+  param->copy_field= 0;
 err2:
   DBUG_RETURN(TRUE);
 }
diff --git a/sql/table_cache.cc b/sql/table_cache.cc
index 2dd368a1945..16a47b37417 100644
--- a/sql/table_cache.cc
+++ b/sql/table_cache.cc
@@ -778,6 +778,8 @@ void tdc_release_share(TABLE_SHARE *share)
   mysql_mutex_lock(&share->tdc->LOCK_table_share);
   if (--share->tdc->ref_count)
   {
+    if (!share->is_view)
+      mysql_cond_broadcast(&share->tdc->COND_release);
     mysql_mutex_unlock(&share->tdc->LOCK_table_share);
     mysql_mutex_unlock(&LOCK_unused_shares);
     DBUG_VOID_RETURN;
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index f21fd560235..c13d4583fef 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -736,7 +736,7 @@ dict_stats_copy(
 				if (dst_idx->type & DICT_FTS) {
 					continue;
 				}
-				dict_stats_empty_index(dst_idx);
+				dict_stats_empty_index(dst_idx, true);
 			} else {
 				continue;
 			}
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 1457417d5dc..61b6f5408cf 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t	fts_pll_tokenize_mutex_key;
 /** variable to record innodb_fts_internal_tbl_name for information
 schema table INNODB_FTS_INSERTED etc. */
 UNIV_INTERN char* fts_internal_tbl_name		= NULL;
+UNIV_INTERN char* fts_internal_tbl_name2	= NULL;
 
 /** InnoDB default stopword list:
 There are different versions of stopwords, the stop words listed
@@ -6570,6 +6571,36 @@ fts_check_corrupt_index(
 	return(0);
 }
 
+/* Get parent table name if it's a fts aux table
+@param[in]	aux_table_name	aux table name
+@param[in]	aux_table_len	aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+	const char*	aux_table_name,
+	ulint		aux_table_len)
+{
+	fts_aux_table_t	aux_table;
+	char*		parent_table_name = NULL;
+
+	if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
+		dict_table_t*	parent_table;
+
+		parent_table = dict_table_open_on_id(
+			aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
+
+		if (parent_table != NULL) {
+			parent_table_name = mem_strdupl(
+				parent_table->name,
+				strlen(parent_table->name));
+
+			dict_table_close(parent_table, TRUE, FALSE);
+		}
+	}
+
+	return(parent_table_name);
+}
+
 /** Check the validity of the parent table.
 @param[in]	aux_table	auxiliary table
 @return true if it is a valid table or false if it is not */
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index fd15092d96c..7ba54a1c360 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16916,7 +16916,12 @@ innodb_internal_table_update(
 		my_free(old);
 	}
 
-	fts_internal_tbl_name = *(char**) var_ptr;
+	fts_internal_tbl_name2 = *(char**) var_ptr;
+	if (fts_internal_tbl_name2 == NULL) {
+		fts_internal_tbl_name = const_cast<char*>("default");
+	} else {
+		fts_internal_tbl_name = fts_internal_tbl_name2;
+	}
 }
 
 /****************************************************************//**
@@ -19148,7 +19153,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
   "Whether to disable OS system file cache for sort I/O",
   NULL, NULL, FALSE);
 
-static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
+static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2,
   PLUGIN_VAR_NOCMDARG,
   "FTS internal auxiliary table to be checked",
   innodb_internal_table_validate,
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 5385c7ab920..0ccc9bebf29 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -210,7 +210,10 @@ innobase_need_rebuild(
 	const Alter_inplace_info*	ha_alter_info,
 	const TABLE*			altered_table)
 {
-	if (ha_alter_info->handler_flags
+	Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
+		ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE);
+
+	if (alter_inplace_flags
 	    == Alter_inplace_info::CHANGE_CREATE_OPTION
 	    && !(ha_alter_info->create_info->used_fields
 		 & (HA_CREATE_USED_ROW_FORMAT
@@ -3985,7 +3988,7 @@ err_exit:
 	}
 
 	if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
-	    || (ha_alter_info->handler_flags
+	    || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
 		== Alter_inplace_info::CHANGE_CREATE_OPTION
 		&& !innobase_need_rebuild(ha_alter_info, table))) {
 
@@ -4159,7 +4162,7 @@ ok_exit:
 		DBUG_RETURN(false);
 	}
 
-	if (ha_alter_info->handler_flags
+	if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
 	    == Alter_inplace_info::CHANGE_CREATE_OPTION
 	    && !innobase_need_rebuild(ha_alter_info, table)) {
 		goto ok_exit;
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index d1e6e3ed808..a73446440aa 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -3959,6 +3959,8 @@ i_s_fts_config_fill(
 		DBUG_RETURN(0);
 	}
 
+	DEBUG_SYNC_C("i_s_fts_config_fille_check");
+
 	fields = table->field;
 
 	/* Prevent DDL to drop fts aux tables. */
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 87b5787d416..3e2f359bbeb 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -375,6 +375,7 @@ extern bool		fts_need_sync;
 /** Variable specifying the table that has Fulltext index to display its
 content through information schema table */
 extern char*		fts_internal_tbl_name;
+extern char*		fts_internal_tbl_name2;
 
 #define	fts_que_graph_free(graph)			\
 do {							\
@@ -823,6 +824,15 @@ void
 fts_drop_orphaned_tables(void);
 /*==========================*/
 
+/* Get parent table name if it's a fts aux table
+@param[in]	aux_table_name	aux table name
+@param[in]	aux_table_len	aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+	const char*	aux_table_name,
+	ulint		aux_table_len);
+
 /******************************************************************//**
 Since we do a horizontal split on the index table, we need to drop
 all the split tables.
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 8557f74f756..81190c3ad2e 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -45,7 +45,7 @@ Created 1/20/1994 Heikki Tuuri
 
 #define INNODB_VERSION_MAJOR	5
 #define INNODB_VERSION_MINOR	6
-#define INNODB_VERSION_BUGFIX	32
+#define INNODB_VERSION_BUGFIX	33
 
 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 5e32663ad32..12d4a59da6b 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -621,7 +621,7 @@ row_log_table_delete(
 		&old_pk_extra_size);
 	ut_ad(old_pk_extra_size < 0x100);
 
-	mrec_size = 4 + old_pk_size;
+	mrec_size = 6 + old_pk_size;
 
 	/* Log enough prefix of the BLOB unless both the
 	old and new table are in COMPACT or REDUNDANT format,
@@ -651,8 +651,8 @@ row_log_table_delete(
 		*b++ = static_cast<byte>(old_pk_extra_size);
 
 		/* Log the size of external prefix we saved */
-		mach_write_to_2(b, ext_size);
-		b += 2;
+		mach_write_to_4(b, ext_size);
+		b += 4;
 
 		rec_convert_dtuple_to_temp(
 			b + old_pk_extra_size, new_index,
@@ -2276,14 +2276,14 @@ row_log_table_apply_op(
 		break;
 
 	case ROW_T_DELETE:
-		/* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
-		if (mrec + 4 >= mrec_end) {
+		/* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
+		if (mrec + 6 >= mrec_end) {
 			return(NULL);
 		}
 
 		extra_size = *mrec++;
-		ext_size = mach_read_from_2(mrec);
-		mrec += 2;
+		ext_size = mach_read_from_4(mrec);
+		mrec += 4;
 		ut_ad(mrec < mrec_end);
 
 		/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index b2c96a7ed7b..35011247105 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -2734,6 +2734,10 @@ loop:
 		return(n_tables + n_tables_dropped);
 	}
 
+	DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
+		os_thread_sleep(5000000);
+	);
+
 	table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
 					DICT_ERR_IGNORE_NONE);
 
@@ -2744,6 +2748,16 @@ loop:
 		goto already_dropped;
 	}
 
+	if (!table->to_be_dropped) {
+		/* There is a scenario: the old table is dropped
+		just after it's added into drop list, and new
+		table with the same name is created, then we try
+		to drop the new table in background. */
+		dict_table_close(table, FALSE, FALSE);
+
+		goto already_dropped;
+	}
+
 	ut_a(!table->can_be_evicted);
 
 	dict_table_close(table, FALSE, FALSE);
@@ -4075,6 +4089,13 @@ row_drop_table_for_mysql(
 		}
 	}
 
+
+	DBUG_EXECUTE_IF("row_drop_table_add_to_background",
+		row_add_table_to_background_drop_list(table->name);
+		err = DB_SUCCESS;
+		goto funct_exit;
+	);
+
 	/* TODO: could we replace the counter n_foreign_key_checks_running
 	with lock checks on the table? Acquire here an exclusive lock on the
 	table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
@@ -4700,6 +4721,19 @@ loop:
 	row_mysql_lock_data_dictionary(trx);
 
 	while ((table_name = dict_get_first_table_name_in_db(name))) {
+		/* Drop parent table if it is a fts aux table, to
+		avoid accessing dropped fts aux tables in information
+		scheam when parent table still exists.
+		Note: Drop parent table will drop fts aux tables. */
+		char*	parent_table_name;
+		parent_table_name = fts_get_parent_table_name(
+				table_name, strlen(table_name));
+
+		if (parent_table_name != NULL) {
+			mem_free(table_name);
+			table_name = parent_table_name;
+		}
+
 		ut_a(memcmp(table_name, name, namelen) == 0);
 
 		table = dict_table_open_on_name(
diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc
index 7c85431c57c..50d91842344 100644
--- a/storage/perfschema/ha_perfschema.cc
+++ b/storage/perfschema/ha_perfschema.cc
@@ -225,7 +225,7 @@ maria_declare_plugin(perfschema)
   0x0001,
   pfs_status_vars,
   NULL,
-  "5.6.32",
+  "5.6.33",
   MariaDB_PLUGIN_MATURITY_STABLE
 }
 maria_declare_plugin_end;
diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt
index 765e6733a98..53a4a675bbf 100644
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@@ -1,4 +1,4 @@
-SET(TOKUDB_VERSION 5.6.31-77.0)
+SET(TOKUDB_VERSION 5.6.32-78.1)
 # PerconaFT only supports x86-64 and cmake-2.8.9+
 IF(CMAKE_VERSION VERSION_LESS "2.8.9")
   MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
index 4b62703480f..576f902f6ae 100644
--- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
+++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
@@ -367,8 +367,8 @@ static void print_db_env_struct (void) {
                              "int (*checkpointing_get_period)             (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints.  0 means disabled. */",
                              "int (*cleaner_set_period)                   (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts.  0 means disabled. */",
                              "int (*cleaner_get_period)                   (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts.  0 means disabled. */",
-                             "int (*cleaner_set_iterations)               (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation.  0 means disabled. */",
-                             "int (*cleaner_get_iterations)               (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation.  0 means disabled. */",
+                             "int (*cleaner_set_iterations)               (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation.  0 means disabled. */",
+                             "int (*cleaner_get_iterations)               (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation.  0 means disabled. */",
                              "int (*evictor_set_enable_partial_eviction)  (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
                              "int (*evictor_get_enable_partial_eviction)  (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
                              "int (*checkpointing_postpone)               (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
index 77f6d8f67b7..cce12d575bf 100644
--- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
+++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
@@ -71,6 +71,7 @@ set_cflags_if_supported(
   -Wno-pointer-bool-conversion
   -fno-rtti
   -fno-exceptions
+  -Wno-error=nonnull-compare
   )
 ## set_cflags_if_supported_named("-Weffc++" -Weffcpp)
 
diff --git a/storage/tokudb/PerconaFT/ft/CMakeLists.txt b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
index 11091073ac2..6696c26ecc0 100644
--- a/storage/tokudb/PerconaFT/ft/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
@@ -55,8 +55,8 @@ set(FT_SOURCES
   msg_buffer
   node
   pivotkeys
+  serialize/rbtree_mhs
   serialize/block_allocator
-  serialize/block_allocator_strategy
   serialize/block_table
   serialize/compress
   serialize/ft_node-serialize
diff --git a/storage/tokudb/PerconaFT/ft/ft-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
index fb456ea6a18..e6452f60cfc 100644
--- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
@@ -496,7 +496,7 @@ handle_split_of_child(
 
     // We never set the rightmost blocknum to be the root.
     // Instead, we wait for the root to split and let promotion initialize the rightmost
-    // blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
+    // blocknum to be the first non-root leaf node on the right extreme to receive an insert.
     BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
     invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
     if (childa->blocknum.b == rightmost_blocknum.b) {
@@ -1470,7 +1470,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
     // It is possible after reading in the entire child,
     // that we now know that the child is not reactive
     // if so, we can unpin parent right now
-    // we wont be splitting/merging child
+    // we won't be splitting/merging child
     // and we have already replaced the bnc
     // for the root with a fresh one
     enum reactivity child_re = toku_ftnode_get_reactivity(ft, child);
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc
index 8f61bc67339..f131668889e 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
@@ -598,15 +598,12 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data) {
     }
 }
 
-void toku_ftnode_clone_callback(
-    void* value_data,
-    void** cloned_value_data,
-    long* clone_size,
-    PAIR_ATTR* new_attr,
-    bool for_checkpoint,
-    void* write_extraargs
-    )
-{
+void toku_ftnode_clone_callback(void *value_data,
+                                void **cloned_value_data,
+                                long *clone_size,
+                                PAIR_ATTR *new_attr,
+                                bool for_checkpoint,
+                                void *write_extraargs) {
     FTNODE node = static_cast<FTNODE>(value_data);
     toku_ftnode_assert_fully_in_memory(node);
     FT ft = static_cast<FT>(write_extraargs);
@@ -618,13 +615,16 @@ void toku_ftnode_clone_callback(
         toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize);
     }
 
-    cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known;
-    cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk;
+    cloned_node->oldest_referenced_xid_known =
+        node->oldest_referenced_xid_known;
+    cloned_node->max_msn_applied_to_node_on_disk =
+        node->max_msn_applied_to_node_on_disk;
     cloned_node->flags = node->flags;
     cloned_node->blocknum = node->blocknum;
     cloned_node->layout_version = node->layout_version;
     cloned_node->layout_version_original = node->layout_version_original;
-    cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk;
+    cloned_node->layout_version_read_from_disk =
+        node->layout_version_read_from_disk;
     cloned_node->build_id = node->build_id;
     cloned_node->height = node->height;
     cloned_node->dirty = node->dirty;
@@ -649,38 +649,39 @@ void toku_ftnode_clone_callback(
     // set new pair attr if necessary
     if (node->height == 0) {
         *new_attr = make_ftnode_pair_attr(node);
-    }
-    else {
+        for (int i = 0; i < node->n_children; i++) {
+            BLB(node, i)->logical_rows_delta = 0;
+            BLB(cloned_node, i)->logical_rows_delta = 0;
+        }
+    } else {
         new_attr->is_valid = false;
     }
     *clone_size = ftnode_memory_size(cloned_node);
     *cloned_value_data = cloned_node;
 }
 
-void toku_ftnode_flush_callback(
-    CACHEFILE UU(cachefile),
-    int fd,
-    BLOCKNUM blocknum,
-    void *ftnode_v,
-    void** disk_data,
-    void *extraargs,
-    PAIR_ATTR size __attribute__((unused)),
-    PAIR_ATTR* new_size,
-    bool write_me,
-    bool keep_me,
-    bool for_checkpoint,
-    bool is_clone
-    )
-{
-    FT ft = (FT) extraargs;
-    FTNODE ftnode = (FTNODE) ftnode_v;
-    FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
+void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
+                                int fd,
+                                BLOCKNUM blocknum,
+                                void *ftnode_v,
+                                void **disk_data,
+                                void *extraargs,
+                                PAIR_ATTR size __attribute__((unused)),
+                                PAIR_ATTR *new_size,
+                                bool write_me,
+                                bool keep_me,
+                                bool for_checkpoint,
+                                bool is_clone) {
+    FT ft = (FT)extraargs;
+    FTNODE ftnode = (FTNODE)ftnode_v;
+    FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data;
     assert(ftnode->blocknum.b == blocknum.b);
     int height = ftnode->height;
     if (write_me) {
         toku_ftnode_assert_fully_in_memory(ftnode);
         if (height > 0 && !is_clone) {
-            // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback()
+            // cloned nodes already had their stale messages moved, see
+            // toku_ftnode_clone_callback()
             toku_move_ftnode_messages_to_stale(ft, ftnode);
         } else if (height == 0) {
             toku_ftnode_leaf_run_gc(ft, ftnode);
@@ -688,7 +689,8 @@ void toku_ftnode_flush_callback(
                 toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint);
             }
         }
-        int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
+        int r = toku_serialize_ftnode_to(
+            fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
         assert_zero(r);
         ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION;
     }
@@ -703,20 +705,22 @@ void toku_ftnode_flush_callback(
                 FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
             }
             toku_free(*disk_data);
-        }
-        else {
+        } else {
             if (ftnode->height == 0) {
                 for (int i = 0; i < ftnode->n_children; i++) {
-                    if (BP_STATE(ftnode,i) == PT_AVAIL) {
+                    if (BP_STATE(ftnode, i) == PT_AVAIL) {
                         BASEMENTNODE bn = BLB(ftnode, i);
-                        toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
+                        toku_ft_decrease_stats(&ft->in_memory_stats,
+                                               bn->stat64_delta);
+                        if (!ftnode->dirty)
+                            toku_ft_adjust_logical_row_count(
+                                ft, -bn->logical_rows_delta);
                     }
                 }
             }
         }
         toku_ftnode_free(&ftnode);
-    }
-    else {
+    } else {
         *new_size = make_ftnode_pair_attr(ftnode);
     }
 }
@@ -845,10 +849,13 @@ static void compress_internal_node_partition(FTNODE node, int i, enum toku_compr
 }
 
 // callback for partially evicting a node
-int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
-                            void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) {
-    FTNODE node = (FTNODE) ftnode_pv;
-    FT ft = (FT) write_extraargs;
+int toku_ftnode_pe_callback(void *ftnode_pv,
+                            PAIR_ATTR old_attr,
+                            void *write_extraargs,
+                            void (*finalize)(PAIR_ATTR new_attr, void *extra),
+                            void *finalize_extra) {
+    FTNODE node = (FTNODE)ftnode_pv;
+    FT ft = (FT)write_extraargs;
     int num_partial_evictions = 0;
 
     // Hold things we intend to destroy here.
@@ -866,7 +873,8 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
     }
     // Don't partially evict nodes whose partitions can't be read back
     // from disk individually
-    if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
+    if (node->layout_version_read_from_disk <
+        FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
         goto exit;
     }
     //
@@ -874,77 +882,77 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
     //
     if (node->height > 0) {
         for (int i = 0; i < node->n_children; i++) {
-            if (BP_STATE(node,i) == PT_AVAIL) {
-                if (BP_SHOULD_EVICT(node,i)) {
+            if (BP_STATE(node, i) == PT_AVAIL) {
+                if (BP_SHOULD_EVICT(node, i)) {
                     NONLEAF_CHILDINFO bnc = BNC(node, i);
                     if (ft_compress_buffers_before_eviction &&
-                        // We may not serialize and compress a partition in memory if its
-                        // in memory layout version is different than what's on disk (and
-                        // therefore requires upgrade).
+                        // We may not serialize and compress a partition in
+                        // memory if its in memory layout version is different
+                        // than what's on disk (and therefore requires upgrade).
                         //
-                        // Auto-upgrade code assumes that if a node's layout version read
-                        // from disk is not current, it MUST require upgrade. Breaking
-                        // this rule would cause upgrade code to upgrade this partition
-                        // again after we serialize it as the current version, which is bad.
-                        node->layout_version == node->layout_version_read_from_disk) {
+                        // Auto-upgrade code assumes that if a node's layout
+                        // version read from disk is not current, it MUST
+                        // require upgrade.
+                        // Breaking this rule would cause upgrade code to
+                        // upgrade this partition again after we serialize it as
+                        // the current version, which is bad.
+                        node->layout_version ==
+                            node->layout_version_read_from_disk) {
                         toku_ft_bnc_move_messages_to_stale(ft, bnc);
                         compress_internal_node_partition(
                             node,
                             i,
                             // Always compress with quicklz
-                            TOKU_QUICKLZ_METHOD
-                            );
+                            TOKU_QUICKLZ_METHOD);
                     } else {
                         // We're not compressing buffers before eviction. Simply
-                        // detach the buffer and set the child's state to on-disk.
+                        // detach the buffer and set the child's state to
+                        // on-disk.
                         set_BNULL(node, i);
                         BP_STATE(node, i) = PT_ON_DISK;
                     }
                     buffers_to_destroy[num_buffers_to_destroy++] = bnc;
                     num_partial_evictions++;
+                } else {
+                    BP_SWEEP_CLOCK(node, i);
                 }
-                else {
-                    BP_SWEEP_CLOCK(node,i);
-                }
-            }
-            else {
+            } else {
                 continue;
             }
         }
-    }
-    //
-    // partial eviction strategy for basement nodes:
-    //  if the bn is compressed, evict it
-    //  else: check if it requires eviction, if it does, evict it, if not, sweep the clock count
-    //
-    else {
+    } else {
+        //
+        // partial eviction strategy for basement nodes:
+        //  if the bn is compressed, evict it
+        //  else: check if it requires eviction, if it does, evict it, if not,
+        //  sweep the clock count
+        //
         for (int i = 0; i < node->n_children; i++) {
             // Get rid of compressed stuff no matter what.
-            if (BP_STATE(node,i) == PT_COMPRESSED) {
+            if (BP_STATE(node, i) == PT_COMPRESSED) {
                 SUB_BLOCK sb = BSB(node, i);
                 pointers_to_free[num_pointers_to_free++] = sb->compressed_ptr;
                 pointers_to_free[num_pointers_to_free++] = sb;
                 set_BNULL(node, i);
-                BP_STATE(node,i) = PT_ON_DISK;
+                BP_STATE(node, i) = PT_ON_DISK;
                 num_partial_evictions++;
-            }
-            else if (BP_STATE(node,i) == PT_AVAIL) {
-                if (BP_SHOULD_EVICT(node,i)) {
+            } else if (BP_STATE(node, i) == PT_AVAIL) {
+                if (BP_SHOULD_EVICT(node, i)) {
                     BASEMENTNODE bn = BLB(node, i);
                     basements_to_destroy[num_basements_to_destroy++] = bn;
-                    toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
+                    toku_ft_decrease_stats(&ft->in_memory_stats,
+                                           bn->stat64_delta);
+                    toku_ft_adjust_logical_row_count(ft,
+                                                     -bn->logical_rows_delta);
                     set_BNULL(node, i);
                     BP_STATE(node, i) = PT_ON_DISK;
                     num_partial_evictions++;
+                } else {
+                    BP_SWEEP_CLOCK(node, i);
                 }
-                else {
-                    BP_SWEEP_CLOCK(node,i);
-                }
-            }
-            else if (BP_STATE(node,i) == PT_ON_DISK) {
+            } else if (BP_STATE(node, i) == PT_ON_DISK) {
                 continue;
-            }
-            else {
+            } else {
                 abort();
             }
         }
@@ -2378,12 +2386,16 @@ ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) {
     toku_ft_root_put_msg(ft_h->ft, msg, &gc_info);
 }
 
-void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra,
-                      TOKUTXN txn, bool oplsn_valid, LSN oplsn,
-                      bool do_logging) {
+void toku_ft_maybe_update(FT_HANDLE ft_h,
+                          const DBT *key,
+                          const DBT *update_function_extra,
+                          TOKUTXN txn,
+                          bool oplsn_valid,
+                          LSN oplsn,
+                          bool do_logging) {
     TXNID_PAIR xid = toku_txn_get_txnid(txn);
     if (txn) {
-        BYTESTRING keybs = { key->size, (char *) key->data };
+        BYTESTRING keybs = {key->size, (char *)key->data};
         toku_logger_save_rollback_cmdupdate(
             txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs);
         toku_txn_maybe_note_ft(txn, ft_h->ft);
@@ -2392,22 +2404,33 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func
     TOKULOGGER logger;
     logger = toku_txn_logger(txn);
     if (do_logging && logger) {
-        BYTESTRING keybs = {.len=key->size, .data=(char *) key->data};
-        BYTESTRING extrabs = {.len=update_function_extra->size,
-                              .data = (char *) update_function_extra->data};
-        toku_log_enq_update(logger, NULL, 0, txn,
-                                toku_cachefile_filenum(ft_h->ft->cf),
-                                xid, keybs, extrabs);
+        BYTESTRING keybs = {.len = key->size, .data = (char *)key->data};
+        BYTESTRING extrabs = {.len = update_function_extra->size,
+                              .data = (char *)update_function_extra->data};
+        toku_log_enq_update(logger,
+                            NULL,
+                            0,
+                            txn,
+                            toku_cachefile_filenum(ft_h->ft->cf),
+                            xid,
+                            keybs,
+                            extrabs);
     }
 
     LSN treelsn;
-    if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
+    if (oplsn_valid &&
+        oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
         // do nothing
     } else {
-        XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
-        ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
+        XIDS message_xids =
+            txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
+        ft_msg msg(
+            key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
         ft_send_update_msg(ft_h, msg, txn);
     }
+    // updates get converted to insert messages, which should do a -1 on the
+    // logical row count when the messages are permanently applied
+    toku_ft_adjust_logical_row_count(ft_h->ft, 1);
 }
 
 void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra,
diff --git a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
index adac96f4882..e31d80772d5 100644
--- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
@@ -73,30 +73,20 @@ static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) {
     return rre->_cancelled =
         rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
 }
-int toku_ft_recount_rows(
-    FT_HANDLE ft,
-    int (*progress_callback)(
-        uint64_t count,
-        uint64_t deleted,
-        void* progress_extra),
-    void* progress_extra) {
-
+int toku_ft_recount_rows(FT_HANDLE ft,
+                         int (*progress_callback)(uint64_t count,
+                                                  uint64_t deleted,
+                                                  void* progress_extra),
+                         void* progress_extra) {
     int ret = 0;
-    recount_rows_extra_t rre = {
-        progress_callback,
-        progress_extra,
-        0,
-        false
-        };
+    recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false};
 
     ft_cursor c;
     ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
-    if (ret) return ret;
+    if (ret)
+        return ret;
 
-    toku_ft_cursor_set_check_interrupt_cb(
-        &c,
-        recount_rows_interrupt,
-        &rre);
+    toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre);
 
     ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
     while (FT_LIKELY(ret == 0)) {
@@ -108,6 +98,7 @@ int toku_ft_recount_rows(
     if (rre._cancelled == false) {
         // update ft count
         toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
+        ft->ft->h->dirty = 1;
         ret = 0;
     }
 
diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc
index 93d21233bf7..699fcc57603 100644
--- a/storage/tokudb/PerconaFT/ft/ft.cc
+++ b/storage/tokudb/PerconaFT/ft/ft.cc
@@ -903,6 +903,9 @@ void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) {
     // must be returned in toku_ft_stat64.
     if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) {
         toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta);
+        if (ft->in_memory_logical_rows == (uint64_t)-1) {
+            toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), 1);
+        }
     }
 }
 
diff --git a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
index dd070373e26..1aa2c203831 100644
--- a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
+++ b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
@@ -301,7 +301,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
 
 void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
 
-// For test purposes only.  (In production, the rowset size is determined by negotation with the cachetable for some memory.  See #2613.)
+// For test purposes only.  (In production, the rowset size is determined by negotiation with the cachetable for some memory.  See #2613.)
 uint64_t toku_ft_loader_get_rowset_budget_for_testing (void);
 
 int toku_ft_loader_finish_extractor(FTLOADER bl);
diff --git a/storage/tokudb/PerconaFT/ft/loader/loader.cc b/storage/tokudb/PerconaFT/ft/loader/loader.cc
index 20f9363da1e..528c86a8f79 100644
--- a/storage/tokudb/PerconaFT/ft/loader/loader.cc
+++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc
@@ -91,7 +91,7 @@ toku_ft_loader_set_size_factor(uint32_t factor) {
 
 uint64_t
 toku_ft_loader_get_rowset_budget_for_testing (void)
-// For test purposes only.  In production, the rowset size is determined by negotation with the cachetable for some memory.  (See #2613).
+// For test purposes only.  In production, the rowset size is determined by negotiation with the cachetable for some memory.  (See #2613).
 {
     return 16ULL*size_factor*1024ULL;
 }
diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc
index 58ba675eb7c..12e5fda226e 100644
--- a/storage/tokudb/PerconaFT/ft/node.cc
+++ b/storage/tokudb/PerconaFT/ft/node.cc
@@ -373,52 +373,48 @@ find_bounds_within_message_tree(
     }
 }
 
-/**
- * For each message in the ancestor's buffer (determined by childnum) that
- * is key-wise between lower_bound_exclusive and upper_bound_inclusive,
- * apply the message to the basement node.  We treat the bounds as minus
- * or plus infinity respectively if they are NULL.  Do not mark the node
- * as dirty (preserve previous state of 'dirty' bit).
- */
+// For each message in the ancestor's buffer (determined by childnum) that
+// is key-wise between lower_bound_exclusive and upper_bound_inclusive,
+// apply the message to the basement node.  We treat the bounds as minus
+// or plus infinity respectively if they are NULL.  Do not mark the node
+// as dirty (preserve previous state of 'dirty' bit).
 static void bnc_apply_messages_to_basement_node(
-    FT_HANDLE t,             // used for comparison function
-    BASEMENTNODE bn,   // where to apply messages
+    FT_HANDLE t,      // used for comparison function
+    BASEMENTNODE bn,  // where to apply messages
     FTNODE ancestor,  // the ancestor node where we can find messages to apply
-    int childnum,      // which child buffer of ancestor contains messages we want
-    const pivot_bounds &bounds,  // contains pivot key bounds of this basement node
-    txn_gc_info* gc_info,
-    bool* msgs_applied) {
-
+    int childnum,  // which child buffer of ancestor contains messages we want
+    const pivot_bounds &
+        bounds,  // contains pivot key bounds of this basement node
+    txn_gc_info *gc_info,
+    bool *msgs_applied) {
     int r;
     NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
 
     // Determine the offsets in the message trees between which we need to
     // apply messages from this buffer
-    STAT64INFO_S stats_delta = {0,0};
+    STAT64INFO_S stats_delta = {0, 0};
     uint64_t workdone_this_ancestor = 0;
     int64_t logical_rows_delta = 0;
 
     uint32_t stale_lbi, stale_ube;
     if (!bn->stale_ancestor_messages_applied) {
-        find_bounds_within_message_tree(
-            t->ft->cmp,
-            bnc->stale_message_tree,
-            &bnc->msg_buffer,
-            bounds,
-            &stale_lbi,
-            &stale_ube);
+        find_bounds_within_message_tree(t->ft->cmp,
+                                        bnc->stale_message_tree,
+                                        &bnc->msg_buffer,
+                                        bounds,
+                                        &stale_lbi,
+                                        &stale_ube);
     } else {
         stale_lbi = 0;
         stale_ube = 0;
     }
     uint32_t fresh_lbi, fresh_ube;
-    find_bounds_within_message_tree(
-        t->ft->cmp,
-        bnc->fresh_message_tree,
-        &bnc->msg_buffer,
-        bounds,
-        &fresh_lbi,
-        &fresh_ube);
+    find_bounds_within_message_tree(t->ft->cmp,
+                                    bnc->fresh_message_tree,
+                                    &bnc->msg_buffer,
+                                    bounds,
+                                    &fresh_lbi,
+                                    &fresh_ube);
 
     // We now know where all the messages we must apply are, so one of the
     // following 4 cases will do the application, depending on which of
@@ -432,44 +428,53 @@ static void bnc_apply_messages_to_basement_node(
         // We have messages in multiple trees, so we grab all
         // the relevant messages' offsets and sort them by MSN, then apply
         // them in MSN order.
-        const int buffer_size = ((stale_ube - stale_lbi) +
-                                 (fresh_ube - fresh_lbi) +
-                                 bnc->broadcast_list.size());
+        const int buffer_size =
+            ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) +
+             bnc->broadcast_list.size());
         toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
         int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
-        struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
+        struct store_msg_buffer_offset_extra sfo_extra = {.offsets = offsets,
+                                                          .i = 0};
 
         // Populate offsets array with offsets to stale messages
-        r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
+        r = bnc->stale_message_tree
+                .iterate_on_range<struct store_msg_buffer_offset_extra,
+                                  store_msg_buffer_offset>(
+                    stale_lbi, stale_ube, &sfo_extra);
         assert_zero(r);
 
         // Then store fresh offsets, and mark them to be moved to stale later.
-        r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
+        r = bnc->fresh_message_tree
+                .iterate_and_mark_range<struct store_msg_buffer_offset_extra,
+                                        store_msg_buffer_offset>(
+                    fresh_lbi, fresh_ube, &sfo_extra);
         assert_zero(r);
 
         // Store offsets of all broadcast messages.
-        r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
+        r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra,
+                                        store_msg_buffer_offset>(&sfo_extra);
         assert_zero(r);
         invariant(sfo_extra.i == buffer_size);
 
         // Sort by MSN.
-        toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
+        toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::
+            mergesort_r(offsets, buffer_size, bnc->msg_buffer);
 
         // Apply the messages in MSN order.
         for (int i = 0; i < buffer_size; ++i) {
             *msgs_applied = true;
-            do_bn_apply_msg(
-                t,
-                bn,
-                &bnc->msg_buffer,
-                offsets[i],
-                gc_info,
-                &workdone_this_ancestor,
-                &stats_delta,
-                &logical_rows_delta);
+            do_bn_apply_msg(t,
+                            bn,
+                            &bnc->msg_buffer,
+                            offsets[i],
+                            gc_info,
+                            &workdone_this_ancestor,
+                            &stats_delta,
+                            &logical_rows_delta);
         }
     } else if (stale_lbi == stale_ube) {
-        // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
+        // No stale messages to apply, we just apply fresh messages, and mark
+        // them to be moved to stale later.
         struct iterate_do_bn_apply_msg_extra iter_extra = {
             .t = t,
             .bn = bn,
@@ -477,16 +482,20 @@ static void bnc_apply_messages_to_basement_node(
             .gc_info = gc_info,
             .workdone = &workdone_this_ancestor,
             .stats_to_update = &stats_delta,
-            .logical_rows_delta = &logical_rows_delta
-        };
-        if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
-        r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
+            .logical_rows_delta = &logical_rows_delta};
+        if (fresh_ube - fresh_lbi > 0)
+            *msgs_applied = true;
+        r = bnc->fresh_message_tree
+                .iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra,
+                                        iterate_do_bn_apply_msg>(
+                    fresh_lbi, fresh_ube, &iter_extra);
         assert_zero(r);
     } else {
         invariant(fresh_lbi == fresh_ube);
         // No fresh messages to apply, we just apply stale messages.
 
-        if (stale_ube - stale_lbi > 0) *msgs_applied = true;
+        if (stale_ube - stale_lbi > 0)
+            *msgs_applied = true;
         struct iterate_do_bn_apply_msg_extra iter_extra = {
             .t = t,
             .bn = bn,
@@ -494,22 +503,26 @@ static void bnc_apply_messages_to_basement_node(
             .gc_info = gc_info,
             .workdone = &workdone_this_ancestor,
             .stats_to_update = &stats_delta,
-            .logical_rows_delta = &logical_rows_delta
-        };
+            .logical_rows_delta = &logical_rows_delta};
 
-        r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
+        r = bnc->stale_message_tree
+                .iterate_on_range<struct iterate_do_bn_apply_msg_extra,
+                                  iterate_do_bn_apply_msg>(
+                    stale_lbi, stale_ube, &iter_extra);
         assert_zero(r);
     }
     //
     // update stats
     //
     if (workdone_this_ancestor > 0) {
-        (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor);
+        (void)toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum),
+                                      workdone_this_ancestor);
     }
     if (stats_delta.numbytes || stats_delta.numrows) {
         toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
     }
     toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
+    bn->logical_rows_delta += logical_rows_delta;
 }
 
 static void
diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h
index ad0298e81c5..52eefec0936 100644
--- a/storage/tokudb/PerconaFT/ft/node.h
+++ b/storage/tokudb/PerconaFT/ft/node.h
@@ -199,6 +199,7 @@ struct ftnode_leaf_basement_node {
     MSN max_msn_applied;            // max message sequence number applied
     bool stale_ancestor_messages_applied;
     STAT64INFO_S stat64_delta;      // change in stat64 counters since basement was last written to disk
+    int64_t logical_rows_delta;
 };
 typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
 
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
index 1355f3739ee..19811373d16 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
@@ -46,415 +46,214 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "portability/toku_stdlib.h"
 
 #include "ft/serialize/block_allocator.h"
-#include "ft/serialize/block_allocator_strategy.h"
+#include "ft/serialize/rbtree_mhs.h"
 
 #if TOKU_DEBUG_PARANOID
-#define VALIDATE() validate()
+#define VALIDATE() Validate()
 #else
 #define VALIDATE()
 #endif
 
-static FILE *ba_trace_file = nullptr;
-
-void block_allocator::maybe_initialize_trace(void) {
-    const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH");        
-    if (ba_trace_path != nullptr) {
-        ba_trace_file = toku_os_fopen(ba_trace_path, "w");
-        if (ba_trace_file == nullptr) {
-            fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), "
-                            "but it could not be opened for writing (errno %d)\n",
-                            ba_trace_path, get_maybe_error_errno());
-        } else {
-            fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path);
-        }
-    }
-}
-
-void block_allocator::maybe_close_trace() {
-    if (ba_trace_file != nullptr) {
-        int r = toku_os_fclose(ba_trace_file);
-        if (r != 0) {
-            fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n",
-                            r, get_maybe_error_errno());
-        } else {
-            fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n");
-        }
-    }
-}
-
-void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) {
-    // the alignment must be at least 512 and aligned with 512 to work with direct I/O
-    assert(alignment >= 512 && (alignment % 512) == 0);
+void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning,
+                                    uint64_t alignment) {
+    // the alignment must be at least 512 and aligned with 512 to work with
+    // direct I/O
+    invariant(alignment >= 512 && (alignment % 512) == 0);
 
     _reserve_at_beginning = reserve_at_beginning;
     _alignment = alignment;
     _n_blocks = 0;
-    _blocks_array_size = 1;
-    XMALLOC_N(_blocks_array_size, _blocks_array);
     _n_bytes_in_use = reserve_at_beginning;
-    _strategy = BA_STRATEGY_FIRST_FIT;
-
-    memset(&_trace_lock, 0, sizeof(toku_mutex_t));
-    toku_mutex_init(&_trace_lock, nullptr);
+    _tree = new MhsRbTree::Tree(alignment);
+}
 
+void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) {
+    CreateInternal(reserve_at_beginning, alignment);
+    _tree->Insert({reserve_at_beginning, MAX_BYTE});
     VALIDATE();
 }
 
-void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
-    _create_internal(reserve_at_beginning, alignment);
-    _trace_create();
+void BlockAllocator::Destroy() {
+    delete _tree;
 }
 
-void block_allocator::destroy() {
-    toku_free(_blocks_array);
-    _trace_destroy();
-    toku_mutex_destroy(&_trace_lock);
-}
+void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning,
+                                          uint64_t alignment,
+                                          struct BlockPair *translation_pairs,
+                                          uint64_t n_blocks) {
+    CreateInternal(reserve_at_beginning, alignment);
+    _n_blocks = n_blocks;
 
-void block_allocator::set_strategy(enum allocation_strategy strategy) {
-    _strategy = strategy;
-}
+    struct BlockPair *XMALLOC_N(n_blocks, pairs);
+    memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair));
+    std::sort(pairs, pairs + n_blocks);
 
-void block_allocator::grow_blocks_array_by(uint64_t n_to_add) {
-    if (_n_blocks + n_to_add > _blocks_array_size) {
-        uint64_t new_size = _n_blocks + n_to_add;
-        uint64_t at_least = _blocks_array_size * 2;
-        if (at_least > new_size) {
-            new_size = at_least;
-        }
-        _blocks_array_size = new_size;
-        XREALLOC_N(_blocks_array_size, _blocks_array);
+    if (pairs[0]._offset > reserve_at_beginning) {
+        _tree->Insert(
+            {reserve_at_beginning, pairs[0]._offset - reserve_at_beginning});
     }
-}
-
-void block_allocator::grow_blocks_array() {
-    grow_blocks_array_by(1);
-}
-
-void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
-                                             struct blockpair *pairs, uint64_t n_blocks) {
-    _create_internal(reserve_at_beginning, alignment);
-
-    _n_blocks = n_blocks;
-    grow_blocks_array_by(_n_blocks);
-    memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
-    std::sort(_blocks_array, _blocks_array + _n_blocks);
     for (uint64_t i = 0; i < _n_blocks; i++) {
-        // Allocator does not support size 0 blocks. See block_allocator_free_block.
-        invariant(_blocks_array[i].size > 0);
-        invariant(_blocks_array[i].offset >= _reserve_at_beginning);
-        invariant(_blocks_array[i].offset % _alignment == 0);
-
-        _n_bytes_in_use += _blocks_array[i].size;
+        // Allocator does not support size 0 blocks. See
+        // block_allocator_free_block.
+        invariant(pairs[i]._size > 0);
+        invariant(pairs[i]._offset >= _reserve_at_beginning);
+        invariant(pairs[i]._offset % _alignment == 0);
+
+        _n_bytes_in_use += pairs[i]._size;
+
+        MhsRbTree::OUUInt64 free_size(MAX_BYTE);
+        MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size);
+        if (i < n_blocks - 1) {
+            MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset);
+            invariant(next_offset >= free_offset);
+            free_size = next_offset - free_offset;
+            if (free_size == 0)
+                continue;
+        }
+        _tree->Insert({free_offset, free_size});
     }
-
+    toku_free(pairs);
     VALIDATE();
-
-    _trace_create_from_blockpairs();
 }
 
 // Effect: align a value by rounding up.
-static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
+static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) {
     return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
 }
 
-struct block_allocator::blockpair *
-block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) {
-    switch (_strategy) {
-    case BA_STRATEGY_FIRST_FIT:
-        return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
-    case BA_STRATEGY_BEST_FIT:
-        return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
-    case BA_STRATEGY_HEAT_ZONE:
-        return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat);
-    case BA_STRATEGY_PADDED_FIT:
-        return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment);
-    default:
-        abort();
-    }
-}
-
-// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
-void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) {
-    struct blockpair *bp;
-
+// Effect: Allocate a block. The resulting block must be aligned on the
+// ba->alignment (which to make direct_io happy must be a positive multiple of
+// 512).
+void BlockAllocator::AllocBlock(uint64_t size,
+                                uint64_t *offset) {
     // Allocator does not support size 0 blocks. See block_allocator_free_block.
     invariant(size > 0);
 
-    grow_blocks_array();
     _n_bytes_in_use += size;
+    *offset = _tree->Remove(size);
 
-    uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
-
-    if (_n_blocks == 0) {
-        // First and only block
-        assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use
-        _blocks_array[0].offset = align(_reserve_at_beginning, _alignment);
-        _blocks_array[0].size = size;
-        *offset = _blocks_array[0].offset;
-        goto done;
-    } else if (end_of_reserve + size <= _blocks_array[0].offset ) {
-        // Check to see if the space immediately after the reserve is big enough to hold the new block.
-        bp = &_blocks_array[0];
-        memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
-        bp[0].offset = end_of_reserve;
-        bp[0].size = size;
-        *offset = end_of_reserve;
-        goto done;
-    }
-
-    bp = choose_block_to_alloc_after(size, heat);
-    if (bp != nullptr) {
-        // our allocation strategy chose the space after `bp' to fit the new block
-        uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
-        uint64_t blocknum = bp - _blocks_array;
-        invariant(&_blocks_array[blocknum] == bp);
-        invariant(blocknum < _n_blocks);
-        memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
-        bp[1].offset = answer_offset;
-        bp[1].size = size;
-        *offset = answer_offset;
-    } else {
-        // It didn't fit anywhere, so fit it on the end.
-        assert(_n_blocks < _blocks_array_size);
-        bp = &_blocks_array[_n_blocks];
-        uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment);
-        bp->offset = answer_offset;
-        bp->size = size;
-        *offset = answer_offset;
-    }
-
-done:
     _n_blocks++;
     VALIDATE();
-
-    _trace_alloc(size, heat, *offset);
-}
-
-// Find the index in the blocks array that has a particular offset.  Requires that the block exist.
-// Use binary search so it runs fast.
-int64_t block_allocator::find_block(uint64_t offset) {
-    VALIDATE();
-    if (_n_blocks == 1) {
-        assert(_blocks_array[0].offset == offset);
-        return 0;
-    }
-
-    uint64_t lo = 0;
-    uint64_t hi = _n_blocks;
-    while (1) {
-        assert(lo < hi); // otherwise no such block exists.
-        uint64_t mid = (lo + hi) / 2;
-        uint64_t thisoff = _blocks_array[mid].offset;
-        if (thisoff < offset) {
-            lo = mid + 1;
-        } else if (thisoff > offset) {
-            hi = mid;
-        } else {
-            return mid;
-        }
-    }
 }
 
-// To support 0-sized blocks, we need to include size as an input to this function.
+// To support 0-sized blocks, we need to include size as an input to this
+// function.
 // All 0-sized blocks at the same offset can be considered identical, but
 // a 0-sized block can share offset with a non-zero sized block.
-// The non-zero sized block is not exchangable with a zero sized block (or vice versa),
-// so inserting 0-sized blocks can cause corruption here.
-void block_allocator::free_block(uint64_t offset) {
+// The non-zero sized block is not exchangable with a zero sized block (or vice
+// versa), so inserting 0-sized blocks can cause corruption here.
+void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) {
     VALIDATE();
-    int64_t bn = find_block(offset);
-    assert(bn >= 0); // we require that there is a block with that offset.
-    _n_bytes_in_use -= _blocks_array[bn].size;
-    memmove(&_blocks_array[bn], &_blocks_array[bn + 1],
-            (_n_blocks - bn - 1) * sizeof(struct blockpair));
+    _n_bytes_in_use -= size;
+    _tree->Insert({offset, size});
     _n_blocks--;
     VALIDATE();
-    
-    _trace_free(offset);
-}
-
-uint64_t block_allocator::block_size(uint64_t offset) {
-    int64_t bn = find_block(offset);
-    assert(bn >=0); // we require that there is a block with that offset.
-    return _blocks_array[bn].size;
 }
 
-uint64_t block_allocator::allocated_limit() const {
-    if (_n_blocks == 0) {
-        return _reserve_at_beginning;
-    } else {
-        struct blockpair *last = &_blocks_array[_n_blocks - 1];
-        return last->offset + last->size;
-    }
+uint64_t BlockAllocator::AllocatedLimit() const {
+    MhsRbTree::Node *max_node = _tree->MaxNode();
+    return rbn_offset(max_node).ToInt();
 }
 
-// Effect: Consider the blocks in sorted order.  The reserved block at the beginning is number 0.  The next one is number 1 and so forth.
+// Effect: Consider the blocks in sorted order.  The reserved block at the
+// beginning is number 0.  The next one is number 1 and so forth.
 // Return the offset and size of the block with that number.
 // Return 0 if there is a block that big, return nonzero if b is too big.
-int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) {
-    if (b ==0 ) {
+int BlockAllocator::NthBlockInLayoutOrder(uint64_t b,
+                                          uint64_t *offset,
+                                          uint64_t *size) {
+    MhsRbTree::Node *x, *y;
+    if (b == 0) {
         *offset = 0;
         *size = _reserve_at_beginning;
-        return  0;
+        return 0;
     } else if (b > _n_blocks) {
         return -1;
     } else {
-        *offset =_blocks_array[b - 1].offset;
-        *size =_blocks_array[b - 1].size;
+        x = _tree->MinNode();
+        for (uint64_t i = 1; i <= b; i++) {
+            y = x;
+            x = _tree->Successor(x);
+        }
+        *size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt();
+        *offset = (rbn_offset(y) + rbn_size(y)).ToInt();
         return 0;
     }
 }
 
+struct VisUnusedExtra {
+    TOKU_DB_FRAGMENTATION _report;
+    uint64_t _align;
+};
+
+static void VisUnusedCollector(void *extra,
+                               MhsRbTree::Node *node,
+                               uint64_t UU(depth)) {
+    struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra;
+    TOKU_DB_FRAGMENTATION report = v_e->_report;
+    uint64_t alignm = v_e->_align;
+
+    MhsRbTree::OUUInt64 offset = rbn_offset(node);
+    MhsRbTree::OUUInt64 size = rbn_size(node);
+    MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm));
+    uint64_t free_space = (offset + size - answer_offset).ToInt();
+    if (free_space > 0) {
+        report->unused_bytes += free_space;
+        report->unused_blocks++;
+        if (free_space > report->largest_unused_block) {
+            report->largest_unused_block = free_space;
+        }
+    }
+}
 // Requires: report->file_size_bytes is filled in
 // Requires: report->data_bytes is filled in
 // Requires: report->checkpoint_bytes_additional is filled in
-void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) {
-    assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional);
+void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) {
+    invariant(_n_bytes_in_use ==
+              report->data_bytes + report->checkpoint_bytes_additional);
 
     report->unused_bytes = 0;
     report->unused_blocks = 0;
     report->largest_unused_block = 0;
-    if (_n_blocks > 0) {
-        //Deal with space before block 0 and after reserve:
-        {
-            struct blockpair *bp = &_blocks_array[0];
-            assert(bp->offset >= align(_reserve_at_beginning, _alignment));
-            uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment);
-            if (free_space > 0) {
-                report->unused_bytes += free_space;
-                report->unused_blocks++;
-                if (free_space > report->largest_unused_block) {
-                    report->largest_unused_block = free_space;
-                }
-            }
-        }
-
-        //Deal with space between blocks:
-        for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) {
-            // Consider the space after blocknum
-            struct blockpair *bp = &_blocks_array[blocknum];
-            uint64_t this_offset = bp[0].offset;
-            uint64_t this_size   = bp[0].size;
-            uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
-            uint64_t next_offset = bp[1].offset;
-            uint64_t free_space  = next_offset - end_of_this_block;
-            if (free_space > 0) {
-                report->unused_bytes += free_space;
-                report->unused_blocks++;
-                if (free_space > report->largest_unused_block) {
-                    report->largest_unused_block = free_space;
-                }
-            }
-        }
-
-        //Deal with space after last block
-        {
-            struct blockpair *bp = &_blocks_array[_n_blocks-1];
-            uint64_t this_offset = bp[0].offset;
-            uint64_t this_size   = bp[0].size;
-            uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
-            if (end_of_this_block < report->file_size_bytes) {
-                uint64_t free_space  = report->file_size_bytes - end_of_this_block;
-                assert(free_space > 0);
-                report->unused_bytes += free_space;
-                report->unused_blocks++;
-                if (free_space > report->largest_unused_block) {
-                    report->largest_unused_block = free_space;
-                }
-            }
-        }
-    } else {
-        // No blocks.  Just the reserve.
-        uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment);
-        if (end_of_this_block < report->file_size_bytes) {
-            uint64_t free_space  = report->file_size_bytes - end_of_this_block;
-            assert(free_space > 0);
-            report->unused_bytes += free_space;
-            report->unused_blocks++;
-            if (free_space > report->largest_unused_block) {
-                report->largest_unused_block = free_space;
-            }
-        }
-    }
+    struct VisUnusedExtra extra = {report, _alignment};
+    _tree->InOrderVisitor(VisUnusedCollector, &extra);
 }
 
-void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) {
-    report->data_bytes = _n_bytes_in_use; 
-    report->data_blocks = _n_blocks; 
+void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) {
+    report->data_bytes = _n_bytes_in_use;
+    report->data_blocks = _n_blocks;
     report->file_size_bytes = 0;
     report->checkpoint_bytes_additional = 0;
-    get_unused_statistics(report);
+    UnusedStatistics(report);
 }
 
-void block_allocator::validate() const {
-    uint64_t n_bytes_in_use = _reserve_at_beginning;
-    for (uint64_t i = 0; i < _n_blocks; i++) {
-        n_bytes_in_use += _blocks_array[i].size;
-        if (i > 0) {
-            assert(_blocks_array[i].offset >  _blocks_array[i - 1].offset);
-            assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size );
-        }
-    }
-    assert(n_bytes_in_use == _n_bytes_in_use);
-}
-
-// Tracing
-
-void block_allocator::_trace_create(void) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n",
-                this, _reserve_at_beginning, _alignment);
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
-    }
-}
-
-void block_allocator::_trace_create_from_blockpairs(void) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ",
-                this, _reserve_at_beginning, _alignment);
-        for (uint64_t i = 0; i < _n_blocks; i++) {
-            fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ",
-                    _blocks_array[i].offset, _blocks_array[i].size);
-        }
-        fprintf(ba_trace_file, "\n");
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
-    }
-}
-
-void block_allocator::_trace_destroy(void) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
-    }
-}
-
-void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-                this, size, heat, offset);
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
+struct ValidateExtra {
+    uint64_t _bytes;
+    MhsRbTree::Node *_pre_node;
+};
+static void VisUsedBlocksInOrder(void *extra,
+                                 MhsRbTree::Node *cur_node,
+                                 uint64_t UU(depth)) {
+    struct ValidateExtra *v_e = (struct ValidateExtra *)extra;
+    MhsRbTree::Node *pre_node = v_e->_pre_node;
+    // verify no overlaps
+    if (pre_node) {
+        invariant(rbn_size(pre_node) > 0);
+        invariant(rbn_offset(cur_node) >
+                  rbn_offset(pre_node) + rbn_size(pre_node));
+        MhsRbTree::OUUInt64 used_space =
+            rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node));
+        v_e->_bytes += used_space.ToInt();
+    } else {
+        v_e->_bytes += rbn_offset(cur_node).ToInt();
     }
+    v_e->_pre_node = cur_node;
 }
 
-void block_allocator::_trace_free(uint64_t offset) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset);
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
-    }
+void BlockAllocator::Validate() const {
+    _tree->ValidateBalance();
+    _tree->ValidateMhs();
+    struct ValidateExtra extra = {0, nullptr};
+    _tree->InOrderVisitor(VisUsedBlocksInOrder, &extra);
+    invariant(extra._bytes == _n_bytes_in_use);
 }
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
index 9b2c1553e7f..648ea9a9ef2 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
@@ -43,6 +43,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "portability/toku_pthread.h"
 #include "portability/toku_stdint.h"
 #include "portability/toku_stdlib.h"
+#include "ft/serialize/rbtree_mhs.h"
 
 // Block allocator.
 //
@@ -51,151 +52,128 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // The allocation of block numbers is handled elsewhere.
 //
 // When creating a block allocator we also specify a certain-sized
-// block at the beginning that is preallocated (and cannot be allocated or freed)
+// block at the beginning that is preallocated (and cannot be allocated or
+// freed)
 //
 // We can allocate blocks of a particular size at a particular location.
-// We can allocate blocks of a particular size at a location chosen by the allocator.
 // We can free blocks.
 // We can determine the size of a block.
-
-class block_allocator {
-public:
+#define MAX_BYTE 0xffffffffffffffff
+class BlockAllocator {
+   public:
     static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096;
 
     // How much must be reserved at the beginning for the block?
-    //  The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
+    //  The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1
+    //  pointer for each root.
     //  So 4096 should be enough.
     static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096;
-    
-    static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0,
+
+    static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT ==
+                      0,
                   "block allocator header must have proper alignment");
 
-    static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
+    static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE =
+        BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
 
-    enum allocation_strategy {
-        BA_STRATEGY_FIRST_FIT = 1,
-        BA_STRATEGY_BEST_FIT,
-        BA_STRATEGY_PADDED_FIT,
-        BA_STRATEGY_HEAT_ZONE
-    };
-
-    struct blockpair {
-        uint64_t offset;
-        uint64_t size;
-        blockpair(uint64_t o, uint64_t s) :
-            offset(o), size(s) {
-        }
-        int operator<(const struct blockpair &rhs) const {
-            return offset < rhs.offset;
-        }
-        int operator<(const uint64_t &o) const {
-            return offset < o;
+    struct BlockPair {
+        uint64_t _offset;
+        uint64_t _size;
+        BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
+        int operator<(const struct BlockPair &rhs) const {
+            return _offset < rhs._offset;
         }
+        int operator<(const uint64_t &o) const { return _offset < o; }
     };
 
-    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
-    //         The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
+    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
+    // bytes are not put into a block.
+    //         The default allocation strategy is first fit
+    //         (BA_STRATEGY_FIRST_FIT)
     //  All blocks be start on a multiple of ALIGNMENT.
     //  Aborts if we run out of memory.
     // Parameters
-    //  reserve_at_beginning (IN)        Size of reserved block at beginning.  This size does not have to be aligned.
+    //  reserve_at_beginning (IN)        Size of reserved block at beginning.
+    //  This size does not have to be aligned.
     //  alignment (IN)                   Block alignment.
-    void create(uint64_t reserve_at_beginning, uint64_t alignment);
+    void Create(uint64_t reserve_at_beginning, uint64_t alignment);
 
-    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
-    //         The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
-    //         The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs'
+    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
+    // bytes are not put into a block.
+    //         The allocator is initialized to contain `n_blocks' of BlockPairs,
+    //         taken from `pairs'
     //  All blocks be start on a multiple of ALIGNMENT.
     //  Aborts if we run out of memory.
     // Parameters
     //  pairs,                           unowned array of pairs to copy
     //  n_blocks,                        Size of pairs array
-    //  reserve_at_beginning (IN)        Size of reserved block at beginning.  This size does not have to be aligned.
+    //  reserve_at_beginning (IN)        Size of reserved block at beginning.
+    //  This size does not have to be aligned.
     //  alignment (IN)                   Block alignment.
-    void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
-                                struct blockpair *pairs, uint64_t n_blocks);
+    void CreateFromBlockPairs(uint64_t reserve_at_beginning,
+                              uint64_t alignment,
+                              struct BlockPair *pairs,
+                              uint64_t n_blocks);
 
     // Effect: Destroy this block allocator
-    void destroy();
-
-    // Effect: Set the allocation strategy that the allocator should use
-    // Requires: No other threads are operating on this block allocator
-    void set_strategy(enum allocation_strategy strategy);
+    void Destroy();
 
-    // Effect: Allocate a block of the specified size at an address chosen by the allocator.
+    // Effect: Allocate a block of the specified size at an address chosen by
+    // the allocator.
     //  Aborts if anything goes wrong.
     //  The block address will be a multiple of the alignment.
     // Parameters:
-    //  size (IN):    The size of the block.  (The size does not have to be aligned.)
+    //  size (IN):    The size of the block.  (The size does not have to be
+    //  aligned.)
     //  offset (OUT): The location of the block.
-    //  heat (IN):    A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint)
-    //                Heat values are lexiographically ordered (like integers), but their specific values are arbitrary
-    void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset);
+    //  block soon (perhaps in the next checkpoint)
+    //                Heat values are lexiographically ordered (like integers),
+    //                but their specific values are arbitrary
+    void AllocBlock(uint64_t size, uint64_t *offset);
 
     // Effect: Free the block at offset.
     // Requires: There must be a block currently allocated at that offset.
     // Parameters:
     //  offset (IN): The offset of the block.
-    void free_block(uint64_t offset);
+    void FreeBlock(uint64_t offset, uint64_t size);
 
-    // Effect: Return the size of the block that starts at offset.
-    // Requires: There must be a block currently allocated at that offset.
-    // Parameters:
-    //  offset (IN): The offset of the block.
-    uint64_t block_size(uint64_t offset);
-
-    // Effect: Check to see if the block allocator is OK.  This may take a long time.
+    // Effect: Check to see if the block allocator is OK.  This may take a long
+    // time.
     // Usage Hints: Probably only use this for unit tests.
     // TODO: Private?
-    void validate() const;
+    void Validate() const;
 
     // Effect: Return the unallocated block address of "infinite" size.
-    //  That is, return the smallest address that is above all the allocated blocks.
-    uint64_t allocated_limit() const;
+    //  That is, return the smallest address that is above all the allocated
+    //  blocks.
+    uint64_t AllocatedLimit() const;
 
-    // Effect: Consider the blocks in sorted order.  The reserved block at the beginning is number 0.  The next one is number 1 and so forth.
+    // Effect: Consider the blocks in sorted order.  The reserved block at the
+    // beginning is number 0.  The next one is number 1 and so forth.
     //  Return the offset and size of the block with that number.
     //  Return 0 if there is a block that big, return nonzero if b is too big.
     // Rationale: This is probably useful only for tests.
-    int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size);
+    int NthBlockInLayoutOrder(uint64_t b, uint64_t *offset, uint64_t *size);
 
     // Effect:  Fill in report to indicate how the file is used.
-    // Requires: 
+    // Requires:
     //  report->file_size_bytes is filled in
     //  report->data_bytes is filled in
     //  report->checkpoint_bytes_additional is filled in
-    void get_unused_statistics(TOKU_DB_FRAGMENTATION report);
+    void UnusedStatistics(TOKU_DB_FRAGMENTATION report);
 
     // Effect: Fill in report->data_bytes with the number of bytes in use
-    //         Fill in report->data_blocks with the number of blockpairs in use
+    //         Fill in report->data_blocks with the number of BlockPairs in use
     //         Fill in unused statistics using this->get_unused_statistics()
     // Requires:
     //  report->file_size is ignored on return
     //  report->checkpoint_bytes_additional is ignored on return
-    void get_statistics(TOKU_DB_FRAGMENTATION report);
-
-    // Block allocator tracing.
-    // - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file
-    //   should be written to.
-    // - Trace may be replayed by ba_trace_replay tool in tools/ directory
-    //   eg: "cat mytracefile | ba_trace_replay"
-    static void maybe_initialize_trace();
-    static void maybe_close_trace();
-
-private:
-    void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment);
-    void grow_blocks_array_by(uint64_t n_to_add);
-    void grow_blocks_array();
-    int64_t find_block(uint64_t offset);
-    struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
-
-    // Tracing
-    toku_mutex_t _trace_lock;
-    void _trace_create(void);
-    void _trace_create_from_blockpairs(void);
-    void _trace_destroy(void);
-    void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset);
-    void _trace_free(uint64_t offset);
+    void Statistics(TOKU_DB_FRAGMENTATION report);
+
+    virtual ~BlockAllocator(){};
+
+   private:
+    void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment);
 
     // How much to reserve at the beginning
     uint64_t _reserve_at_beginning;
@@ -203,12 +181,8 @@ private:
     uint64_t _alignment;
     // How many blocks
     uint64_t _n_blocks;
-    // How big is the blocks_array.  Must be >= n_blocks.
-    uint64_t _blocks_array_size;
-    // These blocks are sorted by address.
-    struct blockpair *_blocks_array;
-    // Including the reserve_at_beginning
     uint64_t _n_bytes_in_use;
-    // The allocation strategy are we using
-    enum allocation_strategy _strategy;
+
+    // These blocks are sorted by address.
+    MhsRbTree::Tree *_tree;
 };
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
deleted file mode 100644
index 62bb8fc4a87..00000000000
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License, version 2,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License, version 3,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-#include <algorithm>
-
-#include <string.h>
-
-#include "portability/toku_assert.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
-    return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
-}
-
-static uint64_t _roundup_to_power_of_two(uint64_t value) {
-    uint64_t r = 4096;
-    while (r < value) {
-        r *= 2;
-        invariant(r > 0);
-    }
-    return r;
-}
-
-// First fit block allocation
-static struct block_allocator::blockpair *
-_first_fit(struct block_allocator::blockpair *blocks_array,
-           uint64_t n_blocks, uint64_t size, uint64_t alignment,
-           uint64_t max_padding) {
-    if (n_blocks == 1) {
-        // won't enter loop, can't underflow the direction < 0 case
-        return nullptr;
-    }
-
-    struct block_allocator::blockpair *bp = &blocks_array[0];
-    for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
-         n_spaces_to_check--, bp++) {
-        // Consider the space after bp
-        uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
-        uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
-        if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1]
-            invariant(bp - blocks_array < (int64_t) n_blocks);
-            return bp;
-        }
-    }
-    return nullptr;
-}
-
-static struct block_allocator::blockpair *
-_first_fit_bw(struct block_allocator::blockpair *blocks_array,
-           uint64_t n_blocks, uint64_t size, uint64_t alignment,
-           uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) {
-    if (n_blocks == 1) {
-        // won't enter loop, can't underflow the direction < 0 case
-        return nullptr;
-    }
-
-    struct block_allocator::blockpair *bp = &blocks_array[-1];
-    for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
-         n_spaces_to_check--, bp--) {
-        // Consider the space after bp
-        uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
-        uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
-        if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) {
-            invariant(blocks_array - bp < (int64_t) n_blocks);
-            return bp;
-        }
-    }
-    return nullptr;
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
-                                    uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    return _first_fit(blocks_array, n_blocks, size, alignment, 0);
-}
-
-// Best fit block allocation
-struct block_allocator::blockpair *
-block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
-                                   uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    struct block_allocator::blockpair *best_bp = nullptr;
-    uint64_t best_hole_size = 0;
-    for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
-        // Consider the space after blocknum
-        struct block_allocator::blockpair *bp = &blocks_array[blocknum];
-        uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
-        uint64_t possible_end_offset = possible_offset + size;
-        if (possible_end_offset <= bp[1].offset) {
-            // It fits here. Is it the best fit?
-            uint64_t hole_size = bp[1].offset - possible_end_offset;
-            if (best_bp == nullptr || hole_size < best_hole_size) {
-                best_hole_size = hole_size;
-                best_bp = bp;
-            }
-        }
-    }
-    return best_bp;
-}
-
-static uint64_t padded_fit_alignment = 4096;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-//       portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_padded_fit_alignment_from_env(void) {
-    // TODO: Should be in portability as 'toku_os_getenv()?'
-    const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
-    if (s != nullptr && strlen(s) > 0) {
-        const int64_t alignment = strtoll(s, nullptr, 10);
-        if (alignment <= 0) {
-            fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
-                            "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n",
-                            s, padded_fit_alignment);
-        } else {
-            padded_fit_alignment = _roundup_to_power_of_two(alignment);
-            fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n",
-                    padded_fit_alignment);
-        }
-    }
-}
-
-// First fit into a block that is oversized by up to max_padding.
-// The hope is that if we purposefully waste a bit of space at allocation
-// time we'll be more likely to reuse this block later.
-struct block_allocator::blockpair *
-block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
-                                     uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment);
-}
-
-static double hot_zone_threshold = 0.85;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-//       portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_hot_zone_threshold_from_env(void) {
-    // TODO: Should be in portability as 'toku_os_getenv()?'
-    const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
-    if (s != nullptr && strlen(s) > 0) {
-        const double hot_zone = strtod(s, nullptr);
-        if (hot_zone < 1 || hot_zone > 99) {
-            fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "
-                            "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s);
-            hot_zone_threshold = 85 / 100;
-        } else {
-            fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s);
-            hot_zone_threshold = hot_zone / 100;
-        }
-    }
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
-                                    uint64_t n_blocks, uint64_t size, uint64_t alignment,
-                                    uint64_t heat) {
-    if (heat > 0) {
-        struct block_allocator::blockpair *bp, *boundary_bp;
-
-        // Hot allocation. Find the beginning of the hot zone.
-        boundary_bp = &blocks_array[n_blocks - 1];
-        uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment);
-        uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
-
-        boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
-        uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp;
-        uint64_t blocks_outside_zone = boundary_bp - blocks_array;
-        invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
-
-        if (blocks_in_zone > 0) {
-            // Find the first fit in the hot zone, going forward.
-            bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0);
-            if (bp != nullptr) {
-                return bp;
-            }
-        }
-        if (blocks_outside_zone > 0) {
-            // Find the first fit in the cold zone, going backwards.
-            bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]);
-            if (bp != nullptr) {
-                return bp;
-            }
-        }
-    } else {
-        // Cold allocations are simply first-fit from the beginning.
-        return _first_fit(blocks_array, n_blocks, size, alignment, 0);
-    }
-    return nullptr;
-}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
index 7101ba9f58c..d2532134d96 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
@@ -46,31 +46,27 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "ft/ft-internal.h"
 
 // TODO: reorganize this dependency (FT-303)
-#include "ft/ft-ops.h" // for toku_maybe_truncate_file
+#include "ft/ft-ops.h"  // for toku_maybe_truncate_file
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/rbuf.h"
 #include "ft/serialize/wbuf.h"
 #include "ft/serialize/block_allocator.h"
-
 #include "util/nb_mutex.h"
 #include "util/scoped_malloc.h"
 
 // indicates the end of a freelist
-static const BLOCKNUM freelist_null = { -1 };
+static const BLOCKNUM freelist_null = {-1};
 
 // value of block_translation_pair.size if blocknum is unused
-static const DISKOFF size_is_free = (DISKOFF) -1;
+static const DISKOFF size_is_free = (DISKOFF)-1;
 
-// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock
-static const DISKOFF diskoff_unused = (DISKOFF) -2;
+// value of block_translation_pair.u.diskoff if blocknum is used but does not
+// yet have a diskblock
+static const DISKOFF diskoff_unused = (DISKOFF)-2;
 
-void block_table::_mutex_lock() {
-    toku_mutex_lock(&_mutex);
-}
+void block_table::_mutex_lock() { toku_mutex_lock(&_mutex); }
 
-void block_table::_mutex_unlock() {
-    toku_mutex_unlock(&_mutex);
-}
+void block_table::_mutex_unlock() { toku_mutex_unlock(&_mutex); }
 
 // TODO: Move lock to FT
 void toku_ft_lock(FT ft) {
@@ -85,13 +81,16 @@ void toku_ft_unlock(FT ft) {
     bt->_mutex_unlock();
 }
 
-// There are two headers: the reserve must fit them both and be suitably aligned.
-static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
-              block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0,
+// There are two headers: the reserve must fit them both and be suitably
+// aligned.
+static_assert(BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
+                      BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT ==
+                  0,
               "Block allocator's header reserve must be suitibly aligned");
-static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
-              block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
-              "Block allocator's total header reserve must exactly fit two headers");
+static_assert(
+    BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+    "Block allocator's total header reserve must exactly fit two headers");
 
 // does NOT initialize the block allocator: the caller is responsible
 void block_table::_create_internal() {
@@ -99,25 +98,30 @@ void block_table::_create_internal() {
     memset(&_inprogress, 0, sizeof(struct translation));
     memset(&_checkpointed, 0, sizeof(struct translation));
     memset(&_mutex, 0, sizeof(_mutex));
+    _bt_block_allocator = new BlockAllocator();
     toku_mutex_init(&_mutex, nullptr);
     nb_mutex_init(&_safe_file_size_lock);
 }
 
-// Fill in the checkpointed translation from buffer, and copy checkpointed to current.
-// The one read from disk is the last known checkpointed one, so we are keeping it in 
-// place and then setting current (which is never stored on disk) for current use.
-// The translation_buffer has translation only, we create the rest of the block_table.
-int block_table::create_from_buffer(int fd,
-                                    DISKOFF location_on_disk, //Location of translation_buffer
-                                    DISKOFF size_on_disk,
-                                    unsigned char *translation_buffer) {
+// Fill in the checkpointed translation from buffer, and copy checkpointed to
+// current.
+// The one read from disk is the last known checkpointed one, so we are keeping
+// it in
+// place and then setting current (which is never stored on disk) for current
+// use.
+// The translation_buffer has translation only, we create the rest of the
+// block_table.
+int block_table::create_from_buffer(
+    int fd,
+    DISKOFF location_on_disk,  // Location of translation_buffer
+    DISKOFF size_on_disk,
+    unsigned char *translation_buffer) {
     // Does not initialize the block allocator
     _create_internal();
 
     // Deserialize the translation and copy it to current
-    int r = _translation_deserialize_from_buffer(&_checkpointed,
-                                                 location_on_disk, size_on_disk,
-                                                 translation_buffer);
+    int r = _translation_deserialize_from_buffer(
+        &_checkpointed, location_on_disk, size_on_disk, translation_buffer);
     if (r != 0) {
         return r;
     }
@@ -130,22 +134,26 @@ int block_table::create_from_buffer(int fd,
     invariant(file_size >= 0);
     _safe_file_size = file_size;
 
-    // Gather the non-empty translations and use them to create the block allocator
+    // Gather the non-empty translations and use them to create the block
+    // allocator
     toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b *
-                                  sizeof(struct block_allocator::blockpair));
-    struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
+                                  sizeof(struct BlockAllocator::BlockPair));
+    struct BlockAllocator::BlockPair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
     uint64_t n_pairs = 0;
     for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) {
         struct block_translation_pair pair = _checkpointed.block_translation[i];
         if (pair.size > 0) {
             invariant(pair.u.diskoff != diskoff_unused);
-            pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size);
+            pairs[n_pairs++] =
+                BlockAllocator::BlockPair(pair.u.diskoff, pair.size);
         }
     }
 
-    _bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
-                                               block_allocator::BLOCK_ALLOCATOR_ALIGNMENT,
-                                               pairs, n_pairs);
+    _bt_block_allocator->CreateFromBlockPairs(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+        BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT,
+        pairs,
+        n_pairs);
 
     return 0;
 }
@@ -155,8 +163,10 @@ void block_table::create() {
     _create_internal();
 
     _checkpointed.type = TRANSLATION_CHECKPOINTED;
-    _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS);
-    _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b;
+    _checkpointed.smallest_never_used_blocknum =
+        make_blocknum(RESERVED_BLOCKNUMS);
+    _checkpointed.length_of_array =
+        _checkpointed.smallest_never_used_blocknum.b;
     _checkpointed.blocknum_freelist_head = freelist_null;
     XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
     for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
@@ -164,12 +174,13 @@ void block_table::create() {
         _checkpointed.block_translation[i].u.diskoff = diskoff_unused;
     }
 
-    // we just created a default checkpointed, now copy it to current.  
+    // we just created a default checkpointed, now copy it to current.
     _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
 
     // Create an empty block allocator.
-    _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
-                               block_allocator::BLOCK_ALLOCATOR_ALIGNMENT);
+    _bt_block_allocator->Create(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+        BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT);
 }
 
 // TODO: Refactor with FT-303
@@ -185,20 +196,24 @@ static void ft_set_dirty(FT ft, bool for_checkpoint) {
 
 void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) {
     toku_mutex_assert_locked(&_mutex);
-    uint64_t new_size_needed = _bt_block_allocator.allocated_limit();
-    //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful.
-    if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) {
+    uint64_t new_size_needed = _bt_block_allocator->AllocatedLimit();
+    // Save a call to toku_os_get_file_size (kernel call) if unlikely to be
+    // useful.
+    if (new_size_needed < size_needed_before &&
+        new_size_needed < _safe_file_size) {
         nb_mutex_lock(&_safe_file_size_lock, &_mutex);
 
         // Must hold _safe_file_size_lock to change _safe_file_size.
         if (new_size_needed < _safe_file_size) {
             int64_t safe_file_size_before = _safe_file_size;
-            // Not safe to use the 'to-be-truncated' portion until truncate is done.
+            // Not safe to use the 'to-be-truncated' portion until truncate is
+            // done.
             _safe_file_size = new_size_needed;
             _mutex_unlock();
 
             uint64_t size_after;
-            toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after);
+            toku_maybe_truncate_file(
+                fd, new_size_needed, safe_file_size_before, &size_after);
             _mutex_lock();
 
             _safe_file_size = size_after;
@@ -213,26 +228,35 @@ void block_table::maybe_truncate_file_on_open(int fd) {
     _mutex_unlock();
 }
 
-void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) {
-    // We intend to malloc a fresh block, so the incoming translation should be empty
+void block_table::_copy_translation(struct translation *dst,
+                                    struct translation *src,
+                                    enum translation_type newtype) {
+    // We intend to malloc a fresh block, so the incoming translation should be
+    // empty
     invariant_null(dst->block_translation);
 
     invariant(src->length_of_array >= src->smallest_never_used_blocknum.b);
     invariant(newtype == TRANSLATION_DEBUG ||
-              (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) ||
-              (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT));
+              (src->type == TRANSLATION_CURRENT &&
+               newtype == TRANSLATION_INPROGRESS) ||
+              (src->type == TRANSLATION_CHECKPOINTED &&
+               newtype == TRANSLATION_CURRENT));
     dst->type = newtype;
     dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum;
-    dst->blocknum_freelist_head = src->blocknum_freelist_head; 
+    dst->blocknum_freelist_head = src->blocknum_freelist_head;
 
-    // destination btt is of fixed size. Allocate + memcpy the exact length necessary.
+    // destination btt is of fixed size. Allocate + memcpy the exact length
+    // necessary.
     dst->length_of_array = dst->smallest_never_used_blocknum.b;
     XMALLOC_N(dst->length_of_array, dst->block_translation);
-    memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation));
+    memcpy(dst->block_translation,
+           src->block_translation,
+           dst->length_of_array * sizeof(*dst->block_translation));
 
     // New version of btt is not yet stored on disk.
     dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0;
-    dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused;
+    dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff =
+        diskoff_unused;
 }
 
 int64_t block_table::get_blocks_in_use_unlocked() {
@@ -240,8 +264,9 @@ int64_t block_table::get_blocks_in_use_unlocked() {
     struct translation *t = &_current;
     int64_t num_blocks = 0;
     {
-        //Reserved blocknums do not get upgraded; They are part of the header.
-        for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
+        // Reserved blocknums do not get upgraded; They are part of the header.
+        for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
+             b.b++) {
             if (t->block_translation[b.b].size != size_is_free) {
                 num_blocks++;
             }
@@ -251,38 +276,43 @@ int64_t block_table::get_blocks_in_use_unlocked() {
 }
 
 void block_table::_maybe_optimize_translation(struct translation *t) {
-    //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just
-    //on a free list.  Doing so requires us to regenerate the free list.
-    //This is O(n) work, so do it only if you're already doing that.
+    // Reduce 'smallest_never_used_blocknum.b' (completely free blocknums
+    // instead of just
+    // on a free list.  Doing so requires us to regenerate the free list.
+    // This is O(n) work, so do it only if you're already doing that.
 
     BLOCKNUM b;
     paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
-    //Calculate how large the free suffix is.
+    // Calculate how large the free suffix is.
     int64_t freed;
     {
-        for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) {
-            if (t->block_translation[b.b-1].size != size_is_free) {
+        for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS;
+             b.b--) {
+            if (t->block_translation[b.b - 1].size != size_is_free) {
                 break;
             }
         }
         freed = t->smallest_never_used_blocknum.b - b.b;
     }
-    if (freed>0) {
+    if (freed > 0) {
         t->smallest_never_used_blocknum.b = b.b;
-        if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) {
-            //We're using more memory than necessary to represent this now.  Reduce.
+        if (t->length_of_array / 4 > t->smallest_never_used_blocknum.b) {
+            // We're using more memory than necessary to represent this now.
+            // Reduce.
             uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
             XREALLOC_N(new_length, t->block_translation);
             t->length_of_array = new_length;
-            //No need to zero anything out. 
+            // No need to zero anything out.
         }
 
-        //Regenerate free list.
+        // Regenerate free list.
         t->blocknum_freelist_head.b = freelist_null.b;
-        for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
+        for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
+             b.b++) {
             if (t->block_translation[b.b].size == size_is_free) {
-                t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
-                t->blocknum_freelist_head                      = b;
+                t->block_translation[b.b].u.next_free_blocknum =
+                    t->blocknum_freelist_head;
+                t->blocknum_freelist_head = b;
             }
         }
     }
@@ -303,14 +333,16 @@ void block_table::note_start_checkpoint_unlocked() {
 }
 
 void block_table::note_skipped_checkpoint() {
-    //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header
+    // Purpose, alert block translation that the checkpoint was skipped, e.x.
+    // for a non-dirty header
     _mutex_lock();
     paranoid_invariant_notnull(_inprogress.block_translation);
     _checkpoint_skipped = true;
     _mutex_unlock();
 }
 
-// Purpose: free any disk space used by previous checkpoint that isn't in use by either
+// Purpose: free any disk space used by previous checkpoint that isn't in use by
+// either
 //           - current state
 //           - in-progress checkpoint
 //          capture inprogress as new checkpointed.
@@ -323,7 +355,7 @@ void block_table::note_skipped_checkpoint() {
 void block_table::note_end_checkpoint(int fd) {
     // Free unused blocks
     _mutex_lock();
-    uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit();
+    uint64_t allocated_limit_at_start = _bt_block_allocator->AllocatedLimit();
     paranoid_invariant_notnull(_inprogress.block_translation);
     if (_checkpoint_skipped) {
         toku_free(_inprogress.block_translation);
@@ -331,17 +363,23 @@ void block_table::note_end_checkpoint(int fd) {
         goto end;
     }
 
-    //Make certain inprogress was allocated space on disk
-    assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
-    assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0);
+    // Make certain inprogress was allocated space on disk
+    invariant(
+        _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
+    invariant(
+        _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff >
+        0);
 
     {
         struct translation *t = &_checkpointed;
         for (int64_t i = 0; i < t->length_of_array; i++) {
             struct block_translation_pair *pair = &t->block_translation[i];
-            if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) {
-                assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair));
-                _bt_block_allocator.free_block(pair->u.diskoff);
+            if (pair->size > 0 &&
+                !_translation_prevents_freeing(
+                    &_inprogress, make_blocknum(i), pair)) {
+                invariant(!_translation_prevents_freeing(
+                              &_current, make_blocknum(i), pair));
+                _bt_block_allocator->FreeBlock(pair->u.diskoff, pair->size);
             }
         }
         toku_free(_checkpointed.block_translation);
@@ -359,53 +397,65 @@ bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) {
     return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b;
 }
 
-void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+void block_table::_verify_valid_blocknum(struct translation *UU(t),
+                                         BLOCKNUM UU(b)) {
     invariant(_is_valid_blocknum(t, b));
 }
 
-bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) {
+bool block_table::_is_valid_freeable_blocknum(struct translation *t,
+                                              BLOCKNUM b) {
     invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
     return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b;
 }
 
 // should be freeable
-void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t),
+                                                  BLOCKNUM UU(b)) {
     invariant(_is_valid_freeable_blocknum(t, b));
 }
 
 // Also used only in ft-serialize-test.
-void block_table::block_free(uint64_t offset) {
+void block_table::block_free(uint64_t offset, uint64_t size) {
     _mutex_lock();
-    _bt_block_allocator.free_block(offset);
+    _bt_block_allocator->FreeBlock(offset, size);
     _mutex_unlock();
 }
 
 int64_t block_table::_calculate_size_on_disk(struct translation *t) {
-    return 8 + // smallest_never_used_blocknum
-           8 + // blocknum_freelist_head
-           t->smallest_never_used_blocknum.b * 16 + // Array
-           4; // 4 for checksum
+    return 8 +  // smallest_never_used_blocknum
+           8 +  // blocknum_freelist_head
+           t->smallest_never_used_blocknum.b * 16 +  // Array
+           4;                                        // 4 for checksum
 }
 
-// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table.
-bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) {
-    return t->block_translation &&
-           b.b < t->smallest_never_used_blocknum.b &&
+// We cannot free the disk space allocated to this blocknum if it is still in
+// use by the given translation table.
+bool block_table::_translation_prevents_freeing(
+    struct translation *t,
+    BLOCKNUM b,
+    struct block_translation_pair *old_pair) {
+    return t->block_translation && b.b < t->smallest_never_used_blocknum.b &&
            old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
 }
 
-void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) {
+void block_table::_realloc_on_disk_internal(BLOCKNUM b,
+                                            DISKOFF size,
+                                            DISKOFF *offset,
+                                            FT ft,
+                                            bool for_checkpoint) {
     toku_mutex_assert_locked(&_mutex);
     ft_set_dirty(ft, for_checkpoint);
 
     struct translation *t = &_current;
     struct block_translation_pair old_pair = t->block_translation[b.b];
-    //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
-    bool cannot_free = (bool)
-        ((!for_checkpoint && _translation_prevents_freeing(&_inprogress,   b, &old_pair)) ||
-         _translation_prevents_freeing(&_checkpointed, b, &old_pair));
-    if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) {
-        _bt_block_allocator.free_block(old_pair.u.diskoff);
+    // Free the old block if it is not still in use by the checkpoint in
+    // progress or the previous checkpoint
+    bool cannot_free =
+        (!for_checkpoint &&
+         _translation_prevents_freeing(&_inprogress, b, &old_pair)) ||
+        _translation_prevents_freeing(&_checkpointed, b, &old_pair);
+    if (!cannot_free && old_pair.u.diskoff != diskoff_unused) {
+        _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
     }
 
     uint64_t allocator_offset = diskoff_unused;
@@ -413,19 +463,22 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o
     if (size > 0) {
         // Allocate a new block if the size is greater than 0,
         // if the size is just 0, offset will be set to diskoff_unused
-        _bt_block_allocator.alloc_block(size, heat, &allocator_offset);
+        _bt_block_allocator->AllocBlock(size, &allocator_offset);
     }
     t->block_translation[b.b].u.diskoff = allocator_offset;
     *offset = allocator_offset;
 
-    //Update inprogress btt if appropriate (if called because Pending bit is set).
+    // Update inprogress btt if appropriate (if called because Pending bit is
+    // set).
     if (for_checkpoint) {
         paranoid_invariant(b.b < _inprogress.length_of_array);
         _inprogress.block_translation[b.b] = t->block_translation[b.b];
     }
 }
 
-void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) {
+void block_table::_ensure_safe_write_unlocked(int fd,
+                                              DISKOFF block_size,
+                                              DISKOFF block_offset) {
     // Requires: holding _mutex
     uint64_t size_needed = block_size + block_offset;
     if (size_needed > _safe_file_size) {
@@ -435,7 +488,8 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
             _mutex_unlock();
 
             int64_t size_after;
-            toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after);
+            toku_maybe_preallocate_in_file(
+                fd, size_needed, _safe_file_size, &size_after);
 
             _mutex_lock();
             _safe_file_size = size_after;
@@ -444,11 +498,16 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
     }
 }
 
-void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) {
+void block_table::realloc_on_disk(BLOCKNUM b,
+                                  DISKOFF size,
+                                  DISKOFF *offset,
+                                  FT ft,
+                                  int fd,
+                                  bool for_checkpoint) {
     _mutex_lock();
     struct translation *t = &_current;
     _verify_valid_freeable_blocknum(t, b);
-    _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat);
+    _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint);
 
     _ensure_safe_write_unlocked(fd, size, *offset);
     _mutex_unlock();
@@ -458,70 +517,97 @@ bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) {
     return pair->size == 0 && pair->u.diskoff == diskoff_unused;
 }
 
-// Effect: figure out where to put the inprogress btt on disk, allocate space for it there.
-//   The space must be 512-byte aligned (both the starting address and the size).
-//   As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt.
+// Effect: figure out where to put the inprogress btt on disk, allocate space
+// for it there.
+//   The space must be 512-byte aligned (both the starting address and the
+//   size).
+//   As a result, the allcoated space may be a little bit bigger (up to the next
+//   512-byte boundary) than the actual btt.
 void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
     toku_mutex_assert_locked(&_mutex);
 
     struct translation *t = &_inprogress;
     paranoid_invariant_notnull(t->block_translation);
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
-    //Each inprogress is allocated only once
+    // Each inprogress is allocated only once
     paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b]));
 
-    //Allocate a new block
+    // Allocate a new block
     int64_t size = _calculate_size_on_disk(t);
     uint64_t offset;
-    _bt_block_allocator.alloc_block(size, 0, &offset);
+    _bt_block_allocator->AllocBlock(size, &offset);
     t->block_translation[b.b].u.diskoff = offset;
-    t->block_translation[b.b].size      = size;
+    t->block_translation[b.b].size = size;
 }
 
 // Effect: Serializes the blocktable to a wbuf (which starts uninitialized)
-//   A clean shutdown runs checkpoint start so that current and inprogress are copies.
-//   The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd)
-//   The address is guaranteed to be 512-byte aligned, but the size is not guaranteed.
-//   It *is* guaranteed that we can read up to the next 512-byte boundary, however
-void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w,
-                                                int64_t *address, int64_t *size) {
+//   A clean shutdown runs checkpoint start so that current and inprogress are
+//   copies.
+//   The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the
+//   total length is a multiple of 512 (so we pad with zeros at the end if
+//   needd)
+//   The address is guaranteed to be 512-byte aligned, but the size is not
+//   guaranteed.
+//   It *is* guaranteed that we can read up to the next 512-byte boundary,
+//   however
+void block_table::serialize_translation_to_wbuf(int fd,
+                                                struct wbuf *w,
+                                                int64_t *address,
+                                                int64_t *size) {
     _mutex_lock();
     struct translation *t = &_inprogress;
 
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
-    _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy.
+    _alloc_inprogress_translation_on_disk_unlocked();  // The allocated block
+                                                       // must be 512-byte
+                                                       // aligned to make
+                                                       // O_DIRECT happy.
     uint64_t size_translation = _calculate_size_on_disk(t);
-    uint64_t size_aligned     = roundup_to_multiple(512, size_translation);
-    assert((int64_t)size_translation==t->block_translation[b.b].size);
+    uint64_t size_aligned = roundup_to_multiple(512, size_translation);
+    invariant((int64_t)size_translation == t->block_translation[b.b].size);
     {
-        //Init wbuf
+        // Init wbuf
         if (0)
-            printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff);
+            printf(
+                "%s:%d writing translation table of size_translation %" PRIu64
+                " at %" PRId64 "\n",
+                __FILE__,
+                __LINE__,
+                size_translation,
+                t->block_translation[b.b].u.diskoff);
         char *XMALLOC_N_ALIGNED(512, size_aligned, buf);
-        for (uint64_t i=size_translation; i<size_aligned; i++) buf[i]=0; // fill in the end of the buffer with zeros.
+        for (uint64_t i = size_translation; i < size_aligned; i++)
+            buf[i] = 0;  // fill in the end of the buffer with zeros.
         wbuf_init(w, buf, size_aligned);
     }
-    wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum); 
-    wbuf_BLOCKNUM(w, t->blocknum_freelist_head); 
+    wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum);
+    wbuf_BLOCKNUM(w, t->blocknum_freelist_head);
     int64_t i;
-    for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
+    for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
         if (0)
-            printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+            printf("%s:%d %" PRId64 ",%" PRId64 "\n",
+                   __FILE__,
+                   __LINE__,
+                   t->block_translation[i].u.diskoff,
+                   t->block_translation[i].size);
         wbuf_DISKOFF(w, t->block_translation[i].u.diskoff);
         wbuf_DISKOFF(w, t->block_translation[i].size);
     }
     uint32_t checksum = toku_x1764_finish(&w->checksum);
     wbuf_int(w, checksum);
     *address = t->block_translation[b.b].u.diskoff;
-    *size    = size_translation;
-    assert((*address)%512 == 0);
+    *size = size_translation;
+    invariant((*address) % 512 == 0);
 
     _ensure_safe_write_unlocked(fd, size_aligned, *address);
     _mutex_unlock();
 }
 
-// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+// Perhaps rename: purpose is get disk address of a block, given its blocknum
+// (blockid?)
+void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
+                                                              DISKOFF *offset,
+                                                              DISKOFF *size) {
     struct translation *t = &_current;
     _verify_valid_blocknum(t, b);
     if (offset) {
@@ -532,8 +618,11 @@ void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOF
     }
 }
 
-// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+// Perhaps rename: purpose is get disk address of a block, given its blocknum
+// (blockid?)
+void block_table::translate_blocknum_to_offset_size(BLOCKNUM b,
+                                                    DISKOFF *offset,
+                                                    DISKOFF *size) {
     _mutex_lock();
     _translate_blocknum_to_offset_size_unlocked(b, offset, size);
     _mutex_unlock();
@@ -544,13 +633,13 @@ void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset,
 // given that one more never-used blocknum will soon be used.
 void block_table::_maybe_expand_translation(struct translation *t) {
     if (t->length_of_array <= t->smallest_never_used_blocknum.b) {
-        //expansion is necessary
+        // expansion is necessary
         uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
         XREALLOC_N(new_length, t->block_translation);
         uint64_t i;
         for (i = t->length_of_array; i < new_length; i++) {
             t->block_translation[i].u.next_free_blocknum = freelist_null;
-            t->block_translation[i].size                 = size_is_free;
+            t->block_translation[i].size = size_is_free;
         }
         t->length_of_array = new_length;
     }
@@ -563,7 +652,8 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
     if (t->blocknum_freelist_head.b == freelist_null.b) {
         // no previously used blocknums are available
         // use a never used blocknum
-        _maybe_expand_translation(t); //Ensure a never used blocknums is available
+        _maybe_expand_translation(
+            t);  // Ensure a never used blocknums is available
         result = t->smallest_never_used_blocknum;
         t->smallest_never_used_blocknum.b++;
     } else {  // reuse a previously used blocknum
@@ -571,11 +661,11 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
         BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum;
         t->blocknum_freelist_head = next;
     }
-    //Verify the blocknum is free
+    // Verify the blocknum is free
     paranoid_invariant(t->block_translation[result.b].size == size_is_free);
-    //blocknum is not free anymore
+    // blocknum is not free anymore
     t->block_translation[result.b].u.diskoff = diskoff_unused;
-    t->block_translation[result.b].size    = 0;
+    t->block_translation[result.b].size = 0;
     _verify_valid_freeable_blocknum(t, result);
     *res = result;
     ft_set_dirty(ft, false);
@@ -587,42 +677,46 @@ void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) {
     _mutex_unlock();
 }
 
-void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) {
+void block_table::_free_blocknum_in_translation(struct translation *t,
+                                                BLOCKNUM b) {
     _verify_valid_freeable_blocknum(t, b);
     paranoid_invariant(t->block_translation[b.b].size != size_is_free);
 
-    t->block_translation[b.b].size                 = size_is_free;
+    t->block_translation[b.b].size = size_is_free;
     t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
-    t->blocknum_freelist_head                      = b;
+    t->blocknum_freelist_head = b;
 }
 
 // Effect: Free a blocknum.
 // If the blocknum holds the only reference to a block on disk, free that block
-void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) {
+void block_table::_free_blocknum_unlocked(BLOCKNUM *bp,
+                                          FT ft,
+                                          bool for_checkpoint) {
     toku_mutex_assert_locked(&_mutex);
     BLOCKNUM b = *bp;
-    bp->b = 0; //Remove caller's reference.
+    bp->b = 0;  // Remove caller's reference.
 
     struct block_translation_pair old_pair = _current.block_translation[b.b];
 
     _free_blocknum_in_translation(&_current, b);
     if (for_checkpoint) {
-        paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
+        paranoid_invariant(ft->checkpoint_header->type ==
+                           FT_CHECKPOINT_INPROGRESS);
         _free_blocknum_in_translation(&_inprogress, b);
     }
 
-    //If the size is 0, no disk block has ever been assigned to this blocknum.
+    // If the size is 0, no disk block has ever been assigned to this blocknum.
     if (old_pair.size > 0) {
-        //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
-        bool cannot_free = (bool)
-            (_translation_prevents_freeing(&_inprogress,   b, &old_pair) ||
-             _translation_prevents_freeing(&_checkpointed, b, &old_pair));
+        // Free the old block if it is not still in use by the checkpoint in
+        // progress or the previous checkpoint
+        bool cannot_free =
+            _translation_prevents_freeing(&_inprogress, b, &old_pair) ||
+            _translation_prevents_freeing(&_checkpointed, b, &old_pair);
         if (!cannot_free) {
-            _bt_block_allocator.free_block(old_pair.u.diskoff);
+            _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
         }
-    }
-    else {
-        paranoid_invariant(old_pair.size==0);
+    } else {
+        paranoid_invariant(old_pair.size == 0);
         paranoid_invariant(old_pair.u.diskoff == diskoff_unused);
     }
     ft_set_dirty(ft, for_checkpoint);
@@ -644,13 +738,14 @@ void block_table::verify_no_free_blocknums() {
 void block_table::free_unused_blocknums(BLOCKNUM root) {
     _mutex_lock();
     int64_t smallest = _current.smallest_never_used_blocknum.b;
-    for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) {
+    for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) {
         if (i == root.b) {
             continue;
         }
         BLOCKNUM b = make_blocknum(i);
         if (_current.block_translation[b.b].size == 0) {
-            invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused);
+            invariant(_current.block_translation[b.b].u.diskoff ==
+                      diskoff_unused);
             _free_blocknum_in_translation(&_current, b);
         }
     }
@@ -675,13 +770,14 @@ bool block_table::_no_data_blocks_except_root(BLOCKNUM root) {
             goto cleanup;
         }
     }
- cleanup:
+cleanup:
     _mutex_unlock();
     return ok;
 }
 
 // Verify there are no data blocks except root.
-// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now.
+// TODO(leif): This actually takes a lock, but I don't want to fix all the
+// callers right now.
 void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) {
     paranoid_invariant(_no_data_blocks_except_root(root));
 }
@@ -705,13 +801,24 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
     if (t->block_translation) {
         BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
         fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array);
-        fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b);
-        fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b);
-        fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
-        fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff);
+        fprintf(f,
+                " smallest_never_used_blocknum[%" PRId64 "]",
+                t->smallest_never_used_blocknum.b);
+        fprintf(f,
+                " blocknum_free_list_head[%" PRId64 "]",
+                t->blocknum_freelist_head.b);
+        fprintf(
+            f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
+        fprintf(f,
+                " location_on_disk[%" PRId64 "]\n",
+                t->block_translation[b.b].u.diskoff);
         int64_t i;
-        for (i=0; i<t->length_of_array; i++) {
-            fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+        for (i = 0; i < t->length_of_array; i++) {
+            fprintf(f,
+                    " %" PRId64 ": %" PRId64 " %" PRId64 "\n",
+                    i,
+                    t->block_translation[i].u.diskoff,
+                    t->block_translation[i].size);
         }
         fprintf(f, "\n");
     } else {
@@ -724,9 +831,13 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
 void block_table::dump_translation_table_pretty(FILE *f) {
     _mutex_lock();
     struct translation *t = &_checkpointed;
-    assert(t->block_translation != nullptr);
+    invariant(t->block_translation != nullptr);
     for (int64_t i = 0; i < t->length_of_array; ++i) {
-        fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+        fprintf(f,
+                "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n",
+                i,
+                t->block_translation[i].u.diskoff,
+                t->block_translation[i].size);
     }
     _mutex_unlock();
 }
@@ -750,7 +861,10 @@ void block_table::blocknum_dump_translation(BLOCKNUM b) {
     struct translation *t = &_current;
     if (b.b < t->length_of_array) {
         struct block_translation_pair *bx = &t->block_translation[b.b];
-        printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size);
+        printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n",
+               b.b,
+               bx->u.diskoff,
+               bx->size);
     }
     _mutex_unlock();
 }
@@ -763,26 +877,31 @@ void block_table::destroy(void) {
     toku_free(_inprogress.block_translation);
     toku_free(_checkpointed.block_translation);
 
-    _bt_block_allocator.destroy();
+    _bt_block_allocator->Destroy();
+    delete _bt_block_allocator;
     toku_mutex_destroy(&_mutex);
     nb_mutex_destroy(&_safe_file_size_lock);
 }
 
-int block_table::_translation_deserialize_from_buffer(struct translation *t,
-                                                      DISKOFF location_on_disk,
-                                                      uint64_t size_on_disk,
-                                                      // out: buffer with serialized translation
-                                                      unsigned char *translation_buffer) {
+int block_table::_translation_deserialize_from_buffer(
+    struct translation *t,
+    DISKOFF location_on_disk,
+    uint64_t size_on_disk,
+    // out: buffer with serialized translation
+    unsigned char *translation_buffer) {
     int r = 0;
-    assert(location_on_disk != 0);
+    invariant(location_on_disk != 0);
     t->type = TRANSLATION_CHECKPOINTED;
 
     // check the checksum
     uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4);
     uint64_t offset = size_on_disk - 4;
-    uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset));
+    uint32_t stored_x1764 = toku_dtoh32(*(int *)(translation_buffer + offset));
     if (x1764 != stored_x1764) {
-        fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764);
+        fprintf(stderr,
+                "Translation table checksum failure: calc=0x%08x read=0x%08x\n",
+                x1764,
+                stored_x1764);
         r = TOKUDB_BAD_CHECKSUM;
         goto exit;
     }
@@ -790,42 +909,47 @@ int block_table::_translation_deserialize_from_buffer(struct translation *t,
     struct rbuf rb;
     rb.buf = translation_buffer;
     rb.ndone = 0;
-    rb.size = size_on_disk-4;//4==checksum
+    rb.size = size_on_disk - 4;  // 4==checksum
 
-    t->smallest_never_used_blocknum = rbuf_blocknum(&rb); 
+    t->smallest_never_used_blocknum = rbuf_blocknum(&rb);
     t->length_of_array = t->smallest_never_used_blocknum.b;
     invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
-    t->blocknum_freelist_head = rbuf_blocknum(&rb); 
+    t->blocknum_freelist_head = rbuf_blocknum(&rb);
     XMALLOC_N(t->length_of_array, t->block_translation);
     for (int64_t i = 0; i < t->length_of_array; i++) {
         t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb);
         t->block_translation[i].size = rbuf_DISKOFF(&rb);
     }
-    invariant(_calculate_size_on_disk(t) == (int64_t) size_on_disk);
-    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk);
-    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk);
+    invariant(_calculate_size_on_disk(t) == (int64_t)size_on_disk);
+    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size ==
+              (int64_t)size_on_disk);
+    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff ==
+              location_on_disk);
 
 exit:
     return r;
 }
 
 int block_table::iterate(enum translation_type type,
-                         BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) {
+                         BLOCKTABLE_CALLBACK f,
+                         void *extra,
+                         bool data_only,
+                         bool used_only) {
     struct translation *src;
-    
+
     int r = 0;
     switch (type) {
-    case TRANSLATION_CURRENT:
-        src = &_current;
-        break;
-    case TRANSLATION_INPROGRESS:
-        src = &_inprogress;
-        break;
-    case TRANSLATION_CHECKPOINTED:
-        src = &_checkpointed;
-        break;
-    default:
-        r = EINVAL;
+        case TRANSLATION_CURRENT:
+            src = &_current;
+            break;
+        case TRANSLATION_INPROGRESS:
+            src = &_inprogress;
+            break;
+        case TRANSLATION_CHECKPOINTED:
+            src = &_checkpointed;
+            break;
+        default:
+            r = EINVAL;
     }
 
     struct translation fakecurrent;
@@ -839,12 +963,15 @@ int block_table::iterate(enum translation_type type,
             src->block_translation[RESERVED_BLOCKNUM_TRANSLATION];
         _mutex_unlock();
         int64_t i;
-        for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
+        for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
             struct block_translation_pair pair = t->block_translation[i];
-            if (data_only && i< RESERVED_BLOCKNUMS) continue;
-            if (used_only && pair.size <= 0) continue;
+            if (data_only && i < RESERVED_BLOCKNUMS)
+                continue;
+            if (used_only && pair.size <= 0)
+                continue;
             r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra);
-            if (r!=0) break;
+            if (r != 0)
+                break;
         }
         toku_free(t->block_translation);
     }
@@ -856,8 +983,11 @@ typedef struct {
     int64_t total_space;
 } frag_extra;
 
-static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) {
-    frag_extra *info = (frag_extra *) extra;
+static int frag_helper(BLOCKNUM UU(b),
+                       int64_t size,
+                       int64_t address,
+                       void *extra) {
+    frag_extra *info = (frag_extra *)extra;
 
     if (size + address > info->total_space)
         info->total_space = size + address;
@@ -865,22 +995,30 @@ static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extr
     return 0;
 }
 
-void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) {
-    frag_extra info = { 0, 0 };
+void block_table::internal_fragmentation(int64_t *total_sizep,
+                                         int64_t *used_sizep) {
+    frag_extra info = {0, 0};
     int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true);
-    assert_zero(r);
+    invariant_zero(r);
 
-    if (total_sizep) *total_sizep = info.total_space;
-    if (used_sizep)  *used_sizep  = info.used_space;
+    if (total_sizep)
+        *total_sizep = info.total_space;
+    if (used_sizep)
+        *used_sizep = info.used_space;
 }
 
-void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) {
+void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size,
+                                                       DISKOFF *offset,
+                                                       FT ft) {
     toku_mutex_assert_locked(&_mutex);
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
-    _realloc_on_disk_internal(b, size, offset, ft, false, 0);
+    _realloc_on_disk_internal(b, size, offset, ft, false);
 }
 
-void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) {
+void block_table::realloc_descriptor_on_disk(DISKOFF size,
+                                             DISKOFF *offset,
+                                             FT ft,
+                                             int fd) {
     _mutex_lock();
     _realloc_descriptor_on_disk_unlocked(size, offset, ft);
     _ensure_safe_write_unlocked(fd, size, *offset);
@@ -897,11 +1035,12 @@ void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) {
 void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
     // Requires:  blocktable lock is held.
     // Requires:  report->file_size_bytes is already filled in.
-    
+
     // Count the headers.
-    report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    report->data_bytes = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     report->data_blocks = 1;
-    report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    report->checkpoint_bytes_additional =
+        BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     report->checkpoint_blocks_additional = 1;
 
     struct translation *current = &_current;
@@ -915,30 +1054,34 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
 
     struct translation *checkpointed = &_checkpointed;
     for (int64_t i = 0; i < checkpointed->length_of_array; i++) {
-        struct block_translation_pair *pair = &checkpointed->block_translation[i];
-        if (pair->size > 0 && !(i < current->length_of_array &&
-                                current->block_translation[i].size > 0 &&
-                                current->block_translation[i].u.diskoff == pair->u.diskoff)) {
-                report->checkpoint_bytes_additional += pair->size;
-                report->checkpoint_blocks_additional++;
+        struct block_translation_pair *pair =
+            &checkpointed->block_translation[i];
+        if (pair->size > 0 &&
+            !(i < current->length_of_array &&
+              current->block_translation[i].size > 0 &&
+              current->block_translation[i].u.diskoff == pair->u.diskoff)) {
+            report->checkpoint_bytes_additional += pair->size;
+            report->checkpoint_blocks_additional++;
         }
     }
 
     struct translation *inprogress = &_inprogress;
     for (int64_t i = 0; i < inprogress->length_of_array; i++) {
         struct block_translation_pair *pair = &inprogress->block_translation[i];
-        if (pair->size > 0 && !(i < current->length_of_array &&
-                                current->block_translation[i].size > 0 &&
-                                current->block_translation[i].u.diskoff == pair->u.diskoff) &&
-                              !(i < checkpointed->length_of_array &&
-                                checkpointed->block_translation[i].size > 0 &&
-                                checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) {
+        if (pair->size > 0 &&
+            !(i < current->length_of_array &&
+              current->block_translation[i].size > 0 &&
+              current->block_translation[i].u.diskoff == pair->u.diskoff) &&
+            !(i < checkpointed->length_of_array &&
+              checkpointed->block_translation[i].size > 0 &&
+              checkpointed->block_translation[i].u.diskoff ==
+                  pair->u.diskoff)) {
             report->checkpoint_bytes_additional += pair->size;
             report->checkpoint_blocks_additional++;
         }
     }
 
-    _bt_block_allocator.get_unused_statistics(report);
+    _bt_block_allocator->UnusedStatistics(report);
 }
 
 void block_table::get_info64(struct ftinfo64 *s) {
@@ -967,25 +1110,38 @@ void block_table::get_info64(struct ftinfo64 *s) {
     _mutex_unlock();
 }
 
-int block_table::iterate_translation_tables(uint64_t checkpoint_count,
-                                            int (*iter)(uint64_t checkpoint_count,
-                                                        int64_t total_num_rows,
-                                                        int64_t blocknum,
-                                                        int64_t diskoff,
-                                                        int64_t size,
-                                                        void *extra),
-                                            void *iter_extra) {
+int block_table::iterate_translation_tables(
+    uint64_t checkpoint_count,
+    int (*iter)(uint64_t checkpoint_count,
+                int64_t total_num_rows,
+                int64_t blocknum,
+                int64_t diskoff,
+                int64_t size,
+                void *extra),
+    void *iter_extra) {
     int error = 0;
     _mutex_lock();
 
-    int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array;
+    int64_t total_num_rows =
+        _current.length_of_array + _checkpointed.length_of_array;
     for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) {
         struct block_translation_pair *block = &_current.block_translation[i];
-        error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
+        error = iter(checkpoint_count,
+                     total_num_rows,
+                     i,
+                     block->u.diskoff,
+                     block->size,
+                     iter_extra);
     }
     for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) {
-        struct block_translation_pair *block = &_checkpointed.block_translation[i];
-        error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
+        struct block_translation_pair *block =
+            &_checkpointed.block_translation[i];
+        error = iter(checkpoint_count - 1,
+                     total_num_rows,
+                     i,
+                     block->u.diskoff,
+                     block->size,
+                     iter_extra);
     }
 
     _mutex_unlock();
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.h b/storage/tokudb/PerconaFT/ft/serialize/block_table.h
index 8d391674540..dd732d4f372 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.h
@@ -62,13 +62,16 @@ enum {
     RESERVED_BLOCKNUMS
 };
 
-typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
+typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b,
+                                   int64_t size,
+                                   int64_t address,
+                                   void *extra);
 
 static inline BLOCKNUM make_blocknum(int64_t b) {
-    BLOCKNUM result = { .b = b };
+    BLOCKNUM result = {.b = b};
     return result;
 }
-static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
+static const BLOCKNUM ROLLBACK_NONE = {.b = 0};
 
 /**
  *  There are three copies of the translation table (btt) in the block table:
@@ -80,18 +83,20 @@ static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
  *
  *    inprogress     Is only filled by copying from current,
  *                   and is the only version ever serialized to disk.
- *                   (It is serialized to disk on checkpoint and clean shutdown.)
+ *                   (It is serialized to disk on checkpoint and clean
+ *shutdown.)
  *                   At end of checkpoint it replaces 'checkpointed'.
  *                   During a checkpoint, any 'pending' dirty writes will update
  *                   inprogress.
  *
  *    current        Is initialized by copying from checkpointed,
- *                   is the only version ever modified while the database is in use, 
+ *                   is the only version ever modified while the database is in
+ *use,
  *                   and is the only version ever copied to inprogress.
  *                   It is never stored on disk.
  */
 class block_table {
-public:
+   public:
     enum translation_type {
         TRANSLATION_NONE = 0,
         TRANSLATION_CURRENT,
@@ -102,7 +107,10 @@ public:
 
     void create();
 
-    int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
+    int create_from_buffer(int fd,
+                           DISKOFF location_on_disk,
+                           DISKOFF size_on_disk,
+                           unsigned char *translation_buffer);
 
     void destroy();
 
@@ -114,11 +122,21 @@ public:
 
     // Blocknums
     void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
-    void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat);
+    void realloc_on_disk(BLOCKNUM b,
+                         DISKOFF size,
+                         DISKOFF *offset,
+                         struct ft *ft,
+                         int fd,
+                         bool for_checkpoint);
     void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
-    void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+    void translate_blocknum_to_offset_size(BLOCKNUM b,
+                                           DISKOFF *offset,
+                                           DISKOFF *size);
     void free_unused_blocknums(BLOCKNUM root);
-    void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
+    void realloc_descriptor_on_disk(DISKOFF size,
+                                    DISKOFF *offset,
+                                    struct ft *ft,
+                                    int fd);
     void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size);
 
     // External verfication
@@ -127,15 +145,22 @@ public:
     void verify_no_free_blocknums();
 
     // Serialization
-    void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size);
+    void serialize_translation_to_wbuf(int fd,
+                                       struct wbuf *w,
+                                       int64_t *address,
+                                       int64_t *size);
 
     // DEBUG ONLY (ftdump included), tests included
     void blocknum_dump_translation(BLOCKNUM b);
     void dump_translation_table_pretty(FILE *f);
     void dump_translation_table(FILE *f);
-    void block_free(uint64_t offset);
+    void block_free(uint64_t offset, uint64_t size);
 
-    int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); 
+    int iterate(enum translation_type type,
+                BLOCKTABLE_CALLBACK f,
+                void *extra,
+                bool data_only,
+                bool used_only);
     void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep);
 
     // Requires: blocktable lock is held.
@@ -146,13 +171,16 @@ public:
 
     void get_info64(struct ftinfo64 *);
 
-    int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *);
+    int iterate_translation_tables(
+        uint64_t,
+        int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *),
+        void *);
 
-private:
+   private:
     struct block_translation_pair {
         // If in the freelist, use next_free_blocknum, otherwise diskoff.
         union {
-            DISKOFF  diskoff; 
+            DISKOFF diskoff;
             BLOCKNUM next_free_blocknum;
         } u;
 
@@ -173,7 +201,8 @@ private:
     struct translation {
         enum translation_type type;
 
-        // Number of elements in array (block_translation).  always >= smallest_never_used_blocknum
+        // Number of elements in array (block_translation).  always >=
+        // smallest_never_used_blocknum
         int64_t length_of_array;
         BLOCKNUM smallest_never_used_blocknum;
 
@@ -181,20 +210,28 @@ private:
         BLOCKNUM blocknum_freelist_head;
         struct block_translation_pair *block_translation;
 
-        // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
-        // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
+        // size_on_disk is stored in
+        // block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
+        // location_on is stored in
+        // block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
     };
 
     void _create_internal();
-    int _translation_deserialize_from_buffer(struct translation *t,    // destination into which to deserialize
-                                             DISKOFF location_on_disk, // location of translation_buffer
-                                             uint64_t size_on_disk,
-                                             unsigned char * translation_buffer);   // buffer with serialized translation
-
-    void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype);
+    int _translation_deserialize_from_buffer(
+        struct translation *t,     // destination into which to deserialize
+        DISKOFF location_on_disk,  // location of translation_buffer
+        uint64_t size_on_disk,
+        unsigned char *
+            translation_buffer);  // buffer with serialized translation
+
+    void _copy_translation(struct translation *dst,
+                           struct translation *src,
+                           enum translation_type newtype);
     void _maybe_optimize_translation(struct translation *t);
     void _maybe_expand_translation(struct translation *t);
-    bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair);
+    bool _translation_prevents_freeing(struct translation *t,
+                                       BLOCKNUM b,
+                                       struct block_translation_pair *old_pair);
     void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b);
     int64_t _calculate_size_on_disk(struct translation *t);
     bool _pair_is_unallocated(struct block_translation_pair *pair);
@@ -203,14 +240,26 @@ private:
 
     // Blocknum management
     void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
-    void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
-    void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
-    void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat);
-    void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+    void _free_blocknum_unlocked(BLOCKNUM *bp,
+                                 struct ft *ft,
+                                 bool for_checkpoint);
+    void _realloc_descriptor_on_disk_unlocked(DISKOFF size,
+                                              DISKOFF *offset,
+                                              struct ft *ft);
+    void _realloc_on_disk_internal(BLOCKNUM b,
+                                   DISKOFF size,
+                                   DISKOFF *offset,
+                                   struct ft *ft,
+                                   bool for_checkpoint);
+    void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
+                                                     DISKOFF *offset,
+                                                     DISKOFF *size);
 
     // File management
     void _maybe_truncate_file(int fd, uint64_t size_needed_before);
-    void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset);
+    void _ensure_safe_write_unlocked(int fd,
+                                     DISKOFF block_size,
+                                     DISKOFF block_offset);
 
     // Verification
     bool _is_valid_blocknum(struct translation *t, BLOCKNUM b);
@@ -220,29 +269,33 @@ private:
     bool _no_data_blocks_except_root(BLOCKNUM root);
     bool _blocknum_allocated(BLOCKNUM b);
 
-    // Locking 
+    // Locking
     //
     // TODO: Move the lock to the FT
     void _mutex_lock();
     void _mutex_unlock();
 
-    // The current translation is the one used by client threads. 
+    // The current translation is the one used by client threads.
     // It is not represented on disk.
     struct translation _current;
 
-    // The translation used by the checkpoint currently in progress. 
-    // If the checkpoint thread allocates a block, it must also update the current translation.
+    // The translation used by the checkpoint currently in progress.
+    // If the checkpoint thread allocates a block, it must also update the
+    // current translation.
     struct translation _inprogress;
 
-    // The translation for the data that shall remain inviolate on disk until the next checkpoint finishes,
+    // The translation for the data that shall remain inviolate on disk until
+    // the next checkpoint finishes,
     // after which any blocks used only in this translation can be freed.
     struct translation _checkpointed;
 
-    // The in-memory data structure for block allocation. 
+    // The in-memory data structure for block allocation.
     // There is no on-disk data structure for block allocation.
-    // Note: This is *allocation* not *translation* - the block allocator is unaware of which
-    //       blocks are used for which translation, but simply allocates and deallocates blocks.
-    block_allocator _bt_block_allocator;
+    // Note: This is *allocation* not *translation* - the block allocator is
+    // unaware of which
+    //       blocks are used for which translation, but simply allocates and
+    //       deallocates blocks.
+    BlockAllocator *_bt_block_allocator;
     toku_mutex_t _mutex;
     struct nb_mutex _safe_file_size_lock;
     bool _checkpoint_skipped;
@@ -257,16 +310,16 @@ private:
 
 #include "ft/serialize/wbuf.h"
 
-static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+static inline void wbuf_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
     wbuf_ulonglong(w, b.b);
 }
 
-static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+static inline void wbuf_nocrc_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
     wbuf_nocrc_ulonglong(w, b.b);
 }
 
 static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) {
-    wbuf_ulonglong(wb, (uint64_t) off);
+    wbuf_ulonglong(wb, (uint64_t)off);
 }
 
 #include "ft/serialize/rbuf.h"
@@ -280,6 +333,8 @@ static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) {
     return result;
 }
 
-static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) {
+static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb,
+                                    memarena *UU(ma),
+                                    BLOCKNUM *blocknum) {
     *blocknum = rbuf_blocknum(rb);
 }
diff --git a/storage/tokudb/PerconaFT/ft/serialize/compress.cc b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
index 1719b6b7cb5..c2f815c6cf2 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/compress.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
@@ -235,7 +235,7 @@ void toku_decompress (Bytef       *dest,   uLongf destLen,
         strm.zalloc = Z_NULL;
         strm.zfree = Z_NULL;
         strm.opaque = Z_NULL;
-        char windowBits = source[1];
+        int8_t windowBits = source[1];
         int r = inflateInit2(&strm, windowBits);
         lazy_assert(r == Z_OK);
         strm.next_out = dest;
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
index 49d4368a3ab..8fcb5293412 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
@@ -217,8 +217,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
             // translation table itself won't fit in main memory.
             ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read,
                                            translation_address_on_disk);
-            assert(readsz >= translation_size_on_disk);
-            assert(readsz <= (ssize_t)size_to_read);
+            invariant(readsz >= translation_size_on_disk);
+            invariant(readsz <= (ssize_t)size_to_read);
         }
         // Create table and read in data.
         r = ft->blocktable.create_from_buffer(fd,
@@ -411,73 +411,90 @@ exit:
     return r;
 }
 
-static size_t
-serialize_ft_min_size (uint32_t version) {
+static size_t serialize_ft_min_size(uint32_t version) {
     size_t size = 0;
 
-    switch(version) {
-    case FT_LAYOUT_VERSION_29:
-        size += sizeof(uint64_t); // logrows in ft
-    case FT_LAYOUT_VERSION_28:
-        size += sizeof(uint32_t); // fanout in ft
-    case FT_LAYOUT_VERSION_27:
-    case FT_LAYOUT_VERSION_26:
-    case FT_LAYOUT_VERSION_25:
-    case FT_LAYOUT_VERSION_24:
-    case FT_LAYOUT_VERSION_23:
-    case FT_LAYOUT_VERSION_22:
-    case FT_LAYOUT_VERSION_21:
-        size += sizeof(MSN);       // max_msn_in_ft
-    case FT_LAYOUT_VERSION_20:
-    case FT_LAYOUT_VERSION_19:
-        size += 1; // compression method
-        size += sizeof(MSN);       // highest_unused_msn_for_upgrade
-    case FT_LAYOUT_VERSION_18:
-        size += sizeof(uint64_t);  // time_of_last_optimize_begin
-        size += sizeof(uint64_t);  // time_of_last_optimize_end
-        size += sizeof(uint32_t);  // count_of_optimize_in_progress
-        size += sizeof(MSN);       // msn_at_start_of_last_completed_optimize
-        size -= 8;                 // removed num_blocks_to_upgrade_14
-        size -= 8;                 // removed num_blocks_to_upgrade_13
-    case FT_LAYOUT_VERSION_17:
-        size += 16;
-        invariant(sizeof(STAT64INFO_S) == 16);
-    case FT_LAYOUT_VERSION_16:
-    case FT_LAYOUT_VERSION_15:
-        size += 4;  // basement node size
-        size += 8;  // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14
-        size += 8;  // time of last verification
-    case FT_LAYOUT_VERSION_14:
-        size += 8;  //TXNID that created
-    case FT_LAYOUT_VERSION_13:
-        size += ( 4 // build_id
-                  +4 // build_id_original
-                  +8 // time_of_creation
-                  +8 // time_of_last_modification
-            );
+    switch (version) {
+        case FT_LAYOUT_VERSION_29:
+            size += sizeof(uint64_t);  // logrows in ft
+        case FT_LAYOUT_VERSION_28:
+            size += sizeof(uint32_t);  // fanout in ft
+        case FT_LAYOUT_VERSION_27:
+        case FT_LAYOUT_VERSION_26:
+        case FT_LAYOUT_VERSION_25:
+        case FT_LAYOUT_VERSION_24:
+        case FT_LAYOUT_VERSION_23:
+        case FT_LAYOUT_VERSION_22:
+        case FT_LAYOUT_VERSION_21:
+            size += sizeof(MSN);  // max_msn_in_ft
+        case FT_LAYOUT_VERSION_20:
+        case FT_LAYOUT_VERSION_19:
+            size += 1;            // compression method
+            size += sizeof(MSN);  // highest_unused_msn_for_upgrade
+        case FT_LAYOUT_VERSION_18:
+            size += sizeof(uint64_t);  // time_of_last_optimize_begin
+            size += sizeof(uint64_t);  // time_of_last_optimize_end
+            size += sizeof(uint32_t);  // count_of_optimize_in_progress
+            size += sizeof(MSN);  // msn_at_start_of_last_completed_optimize
+            size -= 8;            // removed num_blocks_to_upgrade_14
+            size -= 8;            // removed num_blocks_to_upgrade_13
+        case FT_LAYOUT_VERSION_17:
+            size += 16;
+            invariant(sizeof(STAT64INFO_S) == 16);
+        case FT_LAYOUT_VERSION_16:
+        case FT_LAYOUT_VERSION_15:
+            size += 4;  // basement node size
+            size += 8;  // num_blocks_to_upgrade_14 (previously
+                        // num_blocks_to_upgrade, now one int each for upgrade
+                        // from 13, 14
+            size += 8;  // time of last verification
+        case FT_LAYOUT_VERSION_14:
+            size += 8;  // TXNID that created
+        case FT_LAYOUT_VERSION_13:
+            size += (4  // build_id
+                     +
+                     4  // build_id_original
+                     +
+                     8  // time_of_creation
+                     +
+                     8  // time_of_last_modification
+                     );
         // fall through
-    case FT_LAYOUT_VERSION_12:
-        size += (+8 // "tokudata"
-                 +4 // version
-                 +4 // original_version
-                 +4 // size
-                 +8 // byte order verification
-                 +8 // checkpoint_count
-                 +8 // checkpoint_lsn
-                 +4 // tree's nodesize
-                 +8 // translation_size_on_disk
-                 +8 // translation_address_on_disk
-                 +4 // checksum
-                 +8 // Number of blocks in old version.
-                 +8 // diskoff
-                 +4 // flags
-            );
-        break;
-    default:
-        abort();
-    }
-
-    lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+        case FT_LAYOUT_VERSION_12:
+            size += (+8  // "tokudata"
+                     +
+                     4  // version
+                     +
+                     4  // original_version
+                     +
+                     4  // size
+                     +
+                     8  // byte order verification
+                     +
+                     8  // checkpoint_count
+                     +
+                     8  // checkpoint_lsn
+                     +
+                     4  // tree's nodesize
+                     +
+                     8  // translation_size_on_disk
+                     +
+                     8  // translation_address_on_disk
+                     +
+                     4  // checksum
+                     +
+                     8  // Number of blocks in old version.
+                     +
+                     8  // diskoff
+                     +
+                     4  // flags
+                     );
+            break;
+        default:
+            abort();
+    }
+
+    lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
     return size;
 }
 
@@ -486,7 +503,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
                                      struct rbuf *rb,
                                      uint64_t *checkpoint_count,
                                      LSN *checkpoint_lsn,
-                                     uint32_t * version_p)
+                                     uint32_t *version_p)
 // Effect: Read and parse the header of a fractalal tree
 //
 //  Simply reading the raw bytes of the header into an rbuf is insensitive
@@ -496,18 +513,18 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
 //  file AND the header is useless
 {
     int r = 0;
-    const int64_t prefix_size = 8 + // magic ("tokudata")
-                                4 + // version
-                                4 + // build_id
-                                4;  // size
+    const int64_t prefix_size = 8 +  // magic ("tokudata")
+                                4 +  // version
+                                4 +  // build_id
+                                4;   // size
     const int64_t read_size = roundup_to_multiple(512, prefix_size);
     unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix);
     rb->buf = NULL;
     int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header);
     if (n != read_size) {
-        if (n==0) {
+        if (n == 0) {
             r = TOKUDB_DICTIONARY_NO_HEADER;
-        } else if (n<0) {
+        } else if (n < 0) {
             r = get_error_errno();
         } else {
             r = EINVAL;
@@ -518,95 +535,102 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
 
     rbuf_init(rb, prefix, prefix_size);
 
-    //Check magic number
+    // Check magic number
     const void *magic;
     rbuf_literal_bytes(rb, &magic, 8);
-    if (memcmp(magic,"tokudata",8)!=0) {
-        if ((*(uint64_t*)magic) == 0) {
+    if (memcmp(magic, "tokudata", 8) != 0) {
+        if ((*(uint64_t *)magic) == 0) {
             r = TOKUDB_DICTIONARY_NO_HEADER;
         } else {
-            r = EINVAL; //Not a tokudb file! Do not use.
+            r = EINVAL;  // Not a tokudb file! Do not use.
         }
         goto exit;
     }
 
-    //Version MUST be in network order regardless of disk order.
+    // Version MUST be in network order regardless of disk order.
     uint32_t version;
     version = rbuf_network_int(rb);
     *version_p = version;
     if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) {
-        r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use
+        r = TOKUDB_DICTIONARY_TOO_OLD;  // Cannot use
         goto exit;
     } else if (version > FT_LAYOUT_VERSION) {
-        r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use
+        r = TOKUDB_DICTIONARY_TOO_NEW;  // Cannot use
         goto exit;
     }
 
-    //build_id MUST be in network order regardless of disk order.
+    // build_id MUST be in network order regardless of disk order.
     uint32_t build_id __attribute__((__unused__));
     build_id = rbuf_network_int(rb);
     int64_t min_header_size;
     min_header_size = serialize_ft_min_size(version);
 
-    //Size MUST be in network order regardless of disk order.
+    // Size MUST be in network order regardless of disk order.
     uint32_t size;
     size = rbuf_network_int(rb);
-    //If too big, it is corrupt.  We would probably notice during checksum
-    //but may have to do a multi-gigabyte malloc+read to find out.
-    //If its too small reading rbuf would crash, so verify.
-    if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) {
+    // If too big, it is corrupt.  We would probably notice during checksum
+    // but may have to do a multi-gigabyte malloc+read to find out.
+    // If its too small reading rbuf would crash, so verify.
+    if (size > BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE ||
+        size < min_header_size) {
         r = TOKUDB_DICTIONARY_NO_HEADER;
         goto exit;
     }
 
-    lazy_assert(rb->ndone==prefix_size);
+    lazy_assert(rb->ndone == prefix_size);
     rb->size = size;
     {
         toku_free(rb->buf);
         uint32_t size_to_read = roundup_to_multiple(512, size);
         XMALLOC_N_ALIGNED(512, size_to_read, rb->buf);
 
-        assert(offset_of_header%512==0);
+        invariant(offset_of_header % 512 == 0);
         n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header);
         if (n != size_to_read) {
             if (n < 0) {
                 r = get_error_errno();
             } else {
-                r = EINVAL; //Header might be useless (wrong size) or could be a disk read error.
+                r = EINVAL;  // Header might be useless (wrong size) or could be
+                             // a disk read error.
             }
             goto exit;
         }
     }
-    //It's version 14 or later.  Magic looks OK.
-    //We have an rbuf that represents the header.
-    //Size is within acceptable bounds.
+    // It's version 14 or later.  Magic looks OK.
+    // We have an rbuf that represents the header.
+    // Size is within acceptable bounds.
 
-    //Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed)
+    // Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function
+    // changed)
     uint32_t calculated_x1764;
-    calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4);
+    calculated_x1764 = toku_x1764_memory(rb->buf, rb->size - 4);
     uint32_t stored_x1764;
-    stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4));
+    stored_x1764 = toku_dtoh32(*(int *)(rb->buf + rb->size - 4));
     if (calculated_x1764 != stored_x1764) {
-        r = TOKUDB_BAD_CHECKSUM; //Header useless
-        fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764);
+        r = TOKUDB_BAD_CHECKSUM;  // Header useless
+        fprintf(stderr,
+                "Header checksum failure: calc=0x%08x read=0x%08x\n",
+                calculated_x1764,
+                stored_x1764);
         goto exit;
     }
 
-    //Verify byte order
+    // Verify byte order
     const void *tmp_byte_order_check;
     lazy_assert((sizeof toku_byte_order_host) == 8);
-    rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order
+    rbuf_literal_bytes(
+        rb, &tmp_byte_order_check, 8);  // Must not translate byte order
     int64_t byte_order_stored;
-    byte_order_stored = *(int64_t*)tmp_byte_order_check;
+    byte_order_stored = *(int64_t *)tmp_byte_order_check;
     if (byte_order_stored != toku_byte_order_host) {
-        r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary
+        r = TOKUDB_DICTIONARY_NO_HEADER;  // Cannot use dictionary
         goto exit;
     }
 
-    //Load checkpoint count
+    // Load checkpoint count
     *checkpoint_count = rbuf_ulonglong(rb);
     *checkpoint_lsn = rbuf_LSN(rb);
-    //Restart at beginning during regular deserialization
+    // Restart at beginning during regular deserialization
     rb->ndone = 0;
 
 exit:
@@ -620,11 +644,7 @@ exit:
 // Read ft from file into struct.  Read both headers and use one.
 // We want the latest acceptable header whose checkpoint_lsn is no later
 // than max_acceptable_lsn.
-int
-toku_deserialize_ft_from(int fd,
-                         LSN max_acceptable_lsn,
-                         FT *ft)
-{
+int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft) {
     struct rbuf rb_0;
     struct rbuf rb_1;
     uint64_t checkpoint_count_0 = 0;
@@ -638,13 +658,23 @@ toku_deserialize_ft_from(int fd,
     int r0, r1, r;
 
     toku_off_t header_0_off = 0;
-    r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0);
+    r0 = deserialize_ft_from_fd_into_rbuf(fd,
+                                          header_0_off,
+                                          &rb_0,
+                                          &checkpoint_count_0,
+                                          &checkpoint_lsn_0,
+                                          &version_0);
     if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) {
         h0_acceptable = true;
     }
 
-    toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
-    r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1);
+    toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    r1 = deserialize_ft_from_fd_into_rbuf(fd,
+                                          header_1_off,
+                                          &rb_1,
+                                          &checkpoint_count_1,
+                                          &checkpoint_lsn_1,
+                                          &version_1);
     if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) {
         h1_acceptable = true;
     }
@@ -655,24 +685,29 @@ toku_deserialize_ft_from(int fd,
         // We were unable to read either header or at least one is too
         // new.  Certain errors are higher priority than others. Order of
         // these if/else if is important.
-        if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
+        if (r0 == TOKUDB_DICTIONARY_TOO_NEW ||
+            r1 == TOKUDB_DICTIONARY_TOO_NEW) {
             r = TOKUDB_DICTIONARY_TOO_NEW;
-        } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) {
+        } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD ||
+                   r1 == TOKUDB_DICTIONARY_TOO_OLD) {
             r = TOKUDB_DICTIONARY_TOO_OLD;
         } else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) {
             fprintf(stderr, "Both header checksums failed.\n");
             r = TOKUDB_BAD_CHECKSUM;
-        } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) {
+        } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER ||
+                   r1 == TOKUDB_DICTIONARY_NO_HEADER) {
             r = TOKUDB_DICTIONARY_NO_HEADER;
         } else {
-            r = r0 ? r0 : r1; //Arbitrarily report the error from the
-                              //first header, unless it's readable
+            r = r0 ? r0 : r1;  // Arbitrarily report the error from the
+            // first header, unless it's readable
         }
 
-        // it should not be possible for both headers to be later than the max_acceptable_lsn
-        invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
-                    (r1==0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
-        invariant(r!=0);
+        // it should not be possible for both headers to be later than the
+        // max_acceptable_lsn
+        invariant(
+            !((r0 == 0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
+              (r1 == 0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
+        invariant(r != 0);
         goto exit;
     }
 
@@ -682,8 +717,7 @@ toku_deserialize_ft_from(int fd,
             invariant(version_0 >= version_1);
             rb = &rb_0;
             version = version_0;
-        }
-        else {
+        } else {
             invariant(checkpoint_count_1 == checkpoint_count_0 + 1);
             invariant(version_1 >= version_0);
             rb = &rb_1;
@@ -692,14 +726,18 @@ toku_deserialize_ft_from(int fd,
     } else if (h0_acceptable) {
         if (r1 == TOKUDB_BAD_CHECKSUM) {
             // print something reassuring
-            fprintf(stderr, "Header 2 checksum failed, but header 1 ok.  Proceeding.\n");
+            fprintf(
+                stderr,
+                "Header 2 checksum failed, but header 1 ok.  Proceeding.\n");
         }
         rb = &rb_0;
         version = version_0;
     } else if (h1_acceptable) {
         if (r0 == TOKUDB_BAD_CHECKSUM) {
             // print something reassuring
-            fprintf(stderr, "Header 1 checksum failed, but header 2 ok.  Proceeding.\n");
+            fprintf(
+                stderr,
+                "Header 1 checksum failed, but header 2 ok.  Proceeding.\n");
         }
         rb = &rb_1;
         version = version_1;
@@ -718,15 +756,13 @@ exit:
     return r;
 }
 
-
-size_t toku_serialize_ft_size (FT_HEADER h) {
+size_t toku_serialize_ft_size(FT_HEADER h) {
     size_t size = serialize_ft_min_size(h->layout_version);
-    //There is no dynamic data.
-    lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+    // There is no dynamic data.
+    lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
     return size;
 }
 
-
 void toku_serialize_ft_to_wbuf (
     struct wbuf *wbuf, 
     FT_HEADER h, 
@@ -771,52 +807,60 @@ void toku_serialize_ft_to_wbuf (
 }
 
 void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) {
-    lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS);
+    lazy_assert(h->type == FT_CHECKPOINT_INPROGRESS);
     struct wbuf w_translation;
     int64_t size_translation;
     int64_t address_translation;
 
     // Must serialize translation first, to get address,size for header.
-    bt->serialize_translation_to_wbuf(fd, &w_translation,
-                                      &address_translation,
-                                      &size_translation);
-    assert(size_translation == w_translation.ndone);
+    bt->serialize_translation_to_wbuf(
+        fd, &w_translation, &address_translation, &size_translation);
+    invariant(size_translation == w_translation.ndone);
 
-    // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized.
-    assert(w_translation.size % 512 == 0);
+    // the number of bytes available in the buffer is 0 mod 512, and those last
+    // bytes are all initialized.
+    invariant(w_translation.size % 512 == 0);
 
     struct wbuf w_main;
-    size_t size_main       = toku_serialize_ft_size(h);
+    size_t size_main = toku_serialize_ft_size(h);
     size_t size_main_aligned = roundup_to_multiple(512, size_main);
-    assert(size_main_aligned<block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+    invariant(size_main_aligned <
+              BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
     char *XMALLOC_N_ALIGNED(512, size_main_aligned, mainbuf);
-    for (size_t i=size_main; i<size_main_aligned; i++) mainbuf[i]=0; // initialize the end of the buffer with zeros
+    for (size_t i = size_main; i < size_main_aligned; i++)
+        mainbuf[i] = 0;  // initialize the end of the buffer with zeros
     wbuf_init(&w_main, mainbuf, size_main);
-    toku_serialize_ft_to_wbuf(&w_main, h, address_translation, size_translation);
+    toku_serialize_ft_to_wbuf(
+        &w_main, h, address_translation, size_translation);
     lazy_assert(w_main.ndone == size_main);
 
     // Actually write translation table
-    // This write is guaranteed to read good data at the end of the buffer, since the
+    // This write is guaranteed to read good data at the end of the buffer,
+    // since the
     // w_translation.buf is padded with zeros to a 512-byte boundary.
-    toku_os_full_pwrite(fd, w_translation.buf, roundup_to_multiple(512, size_translation), address_translation);
-
-    //Everything but the header MUST be on disk before header starts.
-    //Otherwise we will think the header is good and some blocks might not
-    //yet be on disk.
-    //If the header has a cachefile we need to do cachefile fsync (to
-    //prevent crash if we redirected to dev null)
-    //If there is no cachefile we still need to do an fsync.
+    toku_os_full_pwrite(fd,
+                        w_translation.buf,
+                        roundup_to_multiple(512, size_translation),
+                        address_translation);
+
+    // Everything but the header MUST be on disk before header starts.
+    // Otherwise we will think the header is good and some blocks might not
+    // yet be on disk.
+    // If the header has a cachefile we need to do cachefile fsync (to
+    // prevent crash if we redirected to dev null)
+    // If there is no cachefile we still need to do an fsync.
     if (cf) {
         toku_cachefile_fsync(cf);
-    }
-    else {
+    } else {
         toku_file_fsync(fd);
     }
 
-    //Alternate writing header to two locations:
+    // Alternate writing header to two locations:
     //   Beginning (0) or BLOCK_ALLOCATOR_HEADER_RESERVE
     toku_off_t main_offset;
-    main_offset = (h->checkpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    main_offset = (h->checkpoint_count & 0x1)
+                      ? 0
+                      : BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset);
     toku_free(w_main.buf);
     toku_free(w_translation.buf);
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
index c4f4886b6a0..5914f8a1050 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
@@ -99,13 +99,11 @@ void toku_ft_serialize_layer_init(void) {
     num_cores = toku_os_get_number_active_processors();
     int r = toku_thread_pool_create(&ft_pool, num_cores);
     lazy_assert_zero(r);
-    block_allocator::maybe_initialize_trace();
     toku_serialize_in_parallel = false;
 }
 
 void toku_ft_serialize_layer_destroy(void) {
     toku_thread_pool_destroy(&ft_pool);
-    block_allocator::maybe_close_trace();
 }
 
 enum { FILE_CHANGE_INCREMENT = (16 << 20) };
@@ -773,19 +771,23 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
     return 0;
 }
 
-int
-toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) {
-
+int toku_serialize_ftnode_to(int fd,
+                             BLOCKNUM blocknum,
+                             FTNODE node,
+                             FTNODE_DISK_DATA *ndd,
+                             bool do_rebalancing,
+                             FT ft,
+                             bool for_checkpoint) {
     size_t n_to_write;
     size_t n_uncompressed_bytes;
     char *compressed_buf = nullptr;
 
-    // because toku_serialize_ftnode_to is only called for 
+    // because toku_serialize_ftnode_to is only called for
     // in toku_ftnode_flush_callback, we pass false
     // for in_parallel. The reasoning is that when we write
-    // nodes to disk via toku_ftnode_flush_callback, we 
+    // nodes to disk via toku_ftnode_flush_callback, we
     // assume that it is being done on a non-critical
-    // background thread (probably for checkpointing), and therefore 
+    // background thread (probably for checkpointing), and therefore
     // should not hog CPU,
     //
     // Should the above facts change, we may want to revisit
@@ -802,32 +804,32 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
         toku_unsafe_fetch(&toku_serialize_in_parallel),
         &n_to_write,
         &n_uncompressed_bytes,
-        &compressed_buf
-        );
+        &compressed_buf);
     if (r != 0) {
         return r;
     }
 
-    // If the node has never been written, then write the whole buffer, including the zeros
-    invariant(blocknum.b>=0);
+    // If the node has never been written, then write the whole buffer,
+    // including the zeros
+    invariant(blocknum.b >= 0);
     DISKOFF offset;
 
     // Dirties the ft
-    ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
-                                   ft, fd, for_checkpoint,
-                                   // Allocations for nodes high in the tree are considered 'hot',
-                                   // as they are likely to move again in the next checkpoint.
-                                   node->height);
+    ft->blocktable.realloc_on_disk(
+        blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
 
     tokutime_t t0 = toku_time_now();
     toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
     tokutime_t t1 = toku_time_now();
 
     tokutime_t io_time = t1 - t0;
-    toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
+    toku_ft_status_update_flush_reason(
+        node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
 
     toku_free(compressed_buf);
-    node->dirty = 0;  // See #1957.   Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+    node->dirty = 0;  // See #1957.   Must set the node to be clean after
+                      // serializing it so that it doesn't get written again on
+                      // the next checkpoint or eviction.
     return 0;
 }
 
@@ -994,6 +996,7 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
     bn->seqinsert = orig_bn->seqinsert;
     bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
     bn->stat64_delta = orig_bn->stat64_delta;
+    bn->logical_rows_delta = orig_bn->logical_rows_delta;
     bn->data_buffer.clone(&orig_bn->data_buffer);
     return bn;
 }
@@ -1004,6 +1007,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
     bn->seqinsert = 0;
     bn->stale_ancestor_messages_applied = false;
     bn->stat64_delta = ZEROSTATS;
+    bn->logical_rows_delta = 0;
     bn->data_buffer.init_zero();
     return bn;
 }
@@ -1897,7 +1901,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
                                             /* out */ int *layout_version_p);
 
 // This function upgrades a version 14 or 13 ftnode to the current
-// verison. NOTE: This code assumes the first field of the rbuf has
+// version. NOTE: This code assumes the first field of the rbuf has
 // already been read from the buffer (namely the layout_version of the
 // ftnode.)
 static int
@@ -2488,9 +2492,12 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL
     serialized->blocknum = log->blocknum;
 }
 
-int
-toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
-                                FT ft, bool for_checkpoint) {
+int toku_serialize_rollback_log_to(int fd,
+                                   ROLLBACK_LOG_NODE log,
+                                   SERIALIZED_ROLLBACK_LOG_NODE serialized_log,
+                                   bool is_serialized,
+                                   FT ft,
+                                   bool for_checkpoint) {
     size_t n_to_write;
     char *compressed_buf;
     struct serialized_rollback_log_node serialized_local;
@@ -2511,21 +2518,21 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
                                            serialized_log->n_sub_blocks,
                                            serialized_log->sub_block,
                                            ft->h->compression_method,
-                                           &n_to_write, &compressed_buf);
+                                           &n_to_write,
+                                           &compressed_buf);
 
     // Dirties the ft
     DISKOFF offset;
-    ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
-                                   ft, fd, for_checkpoint,
-                                   // We consider rollback log flushing the hottest possible allocation,
-                                   // since rollback logs are short-lived compared to FT nodes.
-                                   INT_MAX);
+    ft->blocktable.realloc_on_disk(
+        blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
 
     toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
     toku_free(compressed_buf);
     if (!is_serialized) {
         toku_static_serialized_rollback_log_destroy(&serialized_local);
-        log->dirty = 0;  // See #1957.   Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+        log->dirty = 0;  // See #1957.   Must set the node to be clean after
+                         // serializing it so that it doesn't get written again
+                         // on the next checkpoint or eviction.
     }
     return 0;
 }
@@ -2704,7 +2711,7 @@ exit:
 }
 
 static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
-    // This function exists solely to accomodate future changes in compression.
+    // This function exists solely to accommodate future changes in compression.
     int r = 0;
     if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) ||
         (FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) ||
diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
new file mode 100644
index 00000000000..922850fb3e0
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
@@ -0,0 +1,833 @@
+/*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILIT or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "ft/serialize/rbtree_mhs.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_portability.h"
+#include <algorithm>
+
+namespace MhsRbTree {
+
+    Tree::Tree() : _root(NULL), _align(1) {}
+
+    Tree::Tree(uint64_t align) : _root(NULL), _align(align) {}
+
+    Tree::~Tree() { Destroy(); }
+
+    void Tree::PreOrder(Node *tree) const {
+        if (tree != NULL) {
+            fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+            PreOrder(tree->_left);
+            PreOrder(tree->_right);
+        }
+    }
+
+    void Tree::PreOrder() { PreOrder(_root); }
+
+    void Tree::InOrder(Node *tree) const {
+        if (tree != NULL) {
+            InOrder(tree->_left);
+            fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+            InOrder(tree->_right);
+        }
+    }
+
+    // yeah, i only care about in order visitor. -Jun
+    void Tree::InOrderVisitor(Node *tree,
+                              void (*f)(void *, Node *, uint64_t),
+                              void *extra,
+                              uint64_t depth) {
+        if (tree != NULL) {
+            InOrderVisitor(tree->_left, f, extra, depth + 1);
+            f(extra, tree, depth);
+            InOrderVisitor(tree->_right, f, extra, depth + 1);
+        }
+    }
+
+    void Tree::InOrderVisitor(void (*f)(void *, Node *, uint64_t),
+                              void *extra) {
+        InOrderVisitor(_root, f, extra, 0);
+    }
+
+    void Tree::InOrder() { InOrder(_root); }
+
+    void Tree::PostOrder(Node *tree) const {
+        if (tree != NULL) {
+            PostOrder(tree->_left);
+            PostOrder(tree->_right);
+            fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+        }
+    }
+
+    void Tree::PostOrder() { PostOrder(_root); }
+
+    Node *Tree::SearchByOffset(uint64_t offset) {
+        Node *x = _root;
+        while ((x != NULL) && (rbn_offset(x).ToInt() != offset)) {
+            if (offset < rbn_offset(x).ToInt())
+                x = x->_left;
+            else
+                x = x->_right;
+        }
+
+        return x;
+    }
+
+    // mostly for testing
+    Node *Tree::SearchFirstFitBySize(uint64_t size) {
+        if (EffectiveSize(_root) < size && rbn_left_mhs(_root) < size &&
+            rbn_right_mhs(_root) < size) {
+            return nullptr;
+        } else {
+            return SearchFirstFitBySizeHelper(_root, size);
+        }
+    }
+
+    Node *Tree::SearchFirstFitBySizeHelper(Node *x, uint64_t size) {
+        if (EffectiveSize(x) >= size) {
+            // only possible to go left
+            if (rbn_left_mhs(x) >= size)
+                return SearchFirstFitBySizeHelper(x->_left, size);
+            else
+                return x;
+        }
+        if (rbn_left_mhs(x) >= size)
+            return SearchFirstFitBySizeHelper(x->_left, size);
+
+        if (rbn_right_mhs(x) >= size)
+            return SearchFirstFitBySizeHelper(x->_right, size);
+
+        // this is an invalid state
+        Dump();
+        ValidateBalance();
+        ValidateMhs();
+        invariant(0);
+        return NULL;
+    }
+
+    Node *Tree::MinNode(Node *tree) {
+        if (tree == NULL)
+            return NULL;
+
+        while (tree->_left != NULL)
+            tree = tree->_left;
+        return tree;
+    }
+
+    Node *Tree::MinNode() { return MinNode(_root); }
+
+    Node *Tree::MaxNode(Node *tree) {
+        if (tree == NULL)
+            return NULL;
+
+        while (tree->_right != NULL)
+            tree = tree->_right;
+        return tree;
+    }
+
+    Node *Tree::MaxNode() { return MaxNode(_root); }
+
+    Node *Tree::SuccessorHelper(Node *y, Node *x) {
+        while ((y != NULL) && (x == y->_right)) {
+            x = y;
+            y = y->_parent;
+        }
+        return y;
+    }
+    Node *Tree::Successor(Node *x) {
+        if (x->_right != NULL)
+            return MinNode(x->_right);
+
+        Node *y = x->_parent;
+        return SuccessorHelper(y, x);
+    }
+
+    Node *Tree::PredecessorHelper(Node *y, Node *x) {
+        while ((y != NULL) && (x == y->_left)) {
+            x = y;
+            y = y->_parent;
+        }
+
+        return y;
+    }
+    Node *Tree::Predecessor(Node *x) {
+        if (x->_left != NULL)
+            return MaxNode(x->_left);
+
+        Node *y = x->_parent;
+        return SuccessorHelper(y, x);
+    }
+
+    /*
+    *      px                              px
+    *     /                               /
+    *    x                               y
+    *   /  \      --(left rotation)-->  / \               #
+    *  lx   y                          x  ry
+    *     /   \                       /  \
+    *    ly   ry                      lx  ly
+    *  max_hole_size updates are pretty local
+    */
+
+    void Tree::LeftRotate(Node *&root, Node *x) {
+        Node *y = x->_right;
+
+        x->_right = y->_left;
+        rbn_right_mhs(x) = rbn_left_mhs(y);
+
+        if (y->_left != NULL)
+            y->_left->_parent = x;
+
+        y->_parent = x->_parent;
+
+        if (x->_parent == NULL) {
+            root = y;
+        } else {
+            if (x->_parent->_left == x) {
+                x->_parent->_left = y;
+            } else {
+                x->_parent->_right = y;
+            }
+        }
+        y->_left = x;
+        rbn_left_mhs(y) = mhs_of_subtree(x);
+
+        x->_parent = y;
+    }
+
+    /*            py                               py
+     *           /                                /
+     *          y                                x
+     *         /  \      --(right rotate)-->    /  \                     #
+     *        x   ry                           lx   y
+     *       / \                                   / \                   #
+     *      lx  rx                                rx  ry
+     *
+     */
+
+    void Tree::RightRotate(Node *&root, Node *y) {
+        Node *x = y->_left;
+
+        y->_left = x->_right;
+        rbn_left_mhs(y) = rbn_right_mhs(x);
+
+        if (x->_right != NULL)
+            x->_right->_parent = y;
+
+        x->_parent = y->_parent;
+
+        if (y->_parent == NULL) {
+            root = x;
+        } else {
+            if (y == y->_parent->_right)
+                y->_parent->_right = x;
+            else
+                y->_parent->_left = x;
+        }
+
+        x->_right = y;
+        rbn_right_mhs(x) = mhs_of_subtree(y);
+        y->_parent = x;
+    }
+
+    // walking from this node up to update the mhs info
+    // whenver there is change on left/right mhs or size we should recalculate.
+    // prerequisit: the children of the node are mhs up-to-date.
+    void Tree::RecalculateMhs(Node *node) {
+        uint64_t *p_node_mhs = 0;
+        Node *parent = node->_parent;
+
+        if (!parent)
+            return;
+
+        uint64_t max_mhs = mhs_of_subtree(node);
+        if (node == parent->_left) {
+            p_node_mhs = &rbn_left_mhs(parent);
+        } else if (node == parent->_right) {
+            p_node_mhs = &rbn_right_mhs(parent);
+        } else {
+            return;
+        }
+        if (*p_node_mhs != max_mhs) {
+            *p_node_mhs = max_mhs;
+            RecalculateMhs(parent);
+        }
+    }
+
+    void Tree::IsNewNodeMergable(Node *pred,
+                                 Node *succ,
+                                 Node::BlockPair pair,
+                                 bool *left_merge,
+                                 bool *right_merge) {
+        if (pred) {
+            OUUInt64 end_of_pred = rbn_size(pred) + rbn_offset(pred);
+            if (end_of_pred < pair._offset)
+                *left_merge = false;
+            else {
+                invariant(end_of_pred == pair._offset);
+                *left_merge = true;
+            }
+        }
+        if (succ) {
+            OUUInt64 begin_of_succ = rbn_offset(succ);
+            OUUInt64 end_of_node = pair._offset + pair._size;
+            if (end_of_node < begin_of_succ) {
+                *right_merge = false;
+            } else {
+                invariant(end_of_node == begin_of_succ);
+                *right_merge = true;
+            }
+        }
+    }
+
+    void Tree::AbsorbNewNode(Node *pred,
+                             Node *succ,
+                             Node::BlockPair pair,
+                             bool left_merge,
+                             bool right_merge,
+                             bool is_right_child) {
+        invariant(left_merge || right_merge);
+        if (left_merge && right_merge) {
+            // merge to the succ
+            if (!is_right_child) {
+                rbn_size(succ) += pair._size;
+                rbn_offset(succ) = pair._offset;
+                // merge to the pred
+                rbn_size(pred) += rbn_size(succ);
+                // to keep the invariant of the tree -no overlapping holes
+                rbn_offset(succ) += rbn_size(succ);
+                rbn_size(succ) = 0;
+                RecalculateMhs(succ);
+                RecalculateMhs(pred);
+                // pred dominates succ. this is going to
+                // update the pred labels separately.
+                // remove succ
+                RawRemove(_root, succ);
+            } else {
+                rbn_size(pred) += pair._size;
+                rbn_offset(succ) = rbn_offset(pred);
+                rbn_size(succ) += rbn_size(pred);
+                rbn_offset(pred) += rbn_size(pred);
+                rbn_size(pred) = 0;
+                RecalculateMhs(pred);
+                RecalculateMhs(succ);
+                // now remove pred
+                RawRemove(_root, pred);
+            }
+        } else if (left_merge) {
+            rbn_size(pred) += pair._size;
+            RecalculateMhs(pred);
+        } else if (right_merge) {
+            rbn_offset(succ) -= pair._size;
+            rbn_size(succ) += pair._size;
+            RecalculateMhs(succ);
+        }
+    }
+    // this is the most tedious part, but not complicated:
+    // 1.find where to insert the pair
+    // 2.if the pred and succ can merge with the pair. merge with them. either
+    // pred
+    // or succ can be removed.
+    // 3. if only left-mergable or right-mergeable, just merge
+    // 4. non-mergable case. insert the node and run the fixup.
+
+    int Tree::Insert(Node *&root, Node::BlockPair pair) {
+        Node *x = _root;
+        Node *y = NULL;
+        bool left_merge = false;
+        bool right_merge = false;
+        Node *node = NULL;
+
+        while (x != NULL) {
+            y = x;
+            if (pair._offset < rbn_key(x))
+                x = x->_left;
+            else
+                x = x->_right;
+        }
+
+        // we found where to insert, lets find out the pred and succ for
+        // possible
+        // merges.
+        //  node->parent = y;
+        Node *pred, *succ;
+        if (y != NULL) {
+            if (pair._offset < rbn_key(y)) {
+                // as the left child
+                pred = PredecessorHelper(y->_parent, y);
+                succ = y;
+                IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
+                if (left_merge || right_merge) {
+                    AbsorbNewNode(
+                        pred, succ, pair, left_merge, right_merge, false);
+                } else {
+                    // construct the node
+                    Node::Pair mhsp {0, 0};
+                    node =
+                        new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+                    if (!node)
+                        return -1;
+                    y->_left = node;
+                    node->_parent = y;
+                    RecalculateMhs(node);
+                }
+
+            } else {
+                // as the right child
+                pred = y;
+                succ = SuccessorHelper(y->_parent, y);
+                IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
+                if (left_merge || right_merge) {
+                    AbsorbNewNode(
+                        pred, succ, pair, left_merge, right_merge, true);
+                } else {
+                    // construct the node
+                    Node::Pair mhsp {0, 0};
+                    node =
+                        new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+                    if (!node)
+                        return -1;
+                    y->_right = node;
+                    node->_parent = y;
+                    RecalculateMhs(node);
+                }
+            }
+        } else {
+            Node::Pair mhsp {0, 0};
+            node = new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+            if (!node)
+                return -1;
+            root = node;
+        }
+        if (!left_merge && !right_merge) {
+            invariant_notnull(node);
+            node->_color = EColor::RED;
+            return InsertFixup(root, node);
+        }
+        return 0;
+    }
+
+    int Tree::InsertFixup(Node *&root, Node *node) {
+        Node *parent, *gparent;
+        while ((parent = rbn_parent(node)) && rbn_is_red(parent)) {
+            gparent = rbn_parent(parent);
+            if (parent == gparent->_left) {
+                {
+                    Node *uncle = gparent->_right;
+                    if (uncle && rbn_is_red(uncle)) {
+                        rbn_set_black(uncle);
+                        rbn_set_black(parent);
+                        rbn_set_red(gparent);
+                        node = gparent;
+                        continue;
+                    }
+                }
+
+                if (parent->_right == node) {
+                    Node *tmp;
+                    LeftRotate(root, parent);
+                    tmp = parent;
+                    parent = node;
+                    node = tmp;
+                }
+
+                rbn_set_black(parent);
+                rbn_set_red(gparent);
+                RightRotate(root, gparent);
+            } else {
+                {
+                    Node *uncle = gparent->_left;
+                    if (uncle && rbn_is_red(uncle)) {
+                        rbn_set_black(uncle);
+                        rbn_set_black(parent);
+                        rbn_set_red(gparent);
+                        node = gparent;
+                        continue;
+                    }
+                }
+
+                if (parent->_left == node) {
+                    Node *tmp;
+                    RightRotate(root, parent);
+                    tmp = parent;
+                    parent = node;
+                    node = tmp;
+                }
+                rbn_set_black(parent);
+                rbn_set_red(gparent);
+                LeftRotate(root, gparent);
+            }
+        }
+        rbn_set_black(root);
+        return 0;
+    }
+
+    int Tree::Insert(Node::BlockPair pair) { return Insert(_root, pair); }
+
+    uint64_t Tree::Remove(size_t size) {
+        Node *node = SearchFirstFitBySize(size);
+        return Remove(_root, node, size);
+    }
+
+    void Tree::RawRemove(Node *&root, Node *node) {
+        Node *child, *parent;
+        EColor color;
+
+        if ((node->_left != NULL) && (node->_right != NULL)) {
+            Node *replace = node;
+            replace = replace->_right;
+            while (replace->_left != NULL)
+                replace = replace->_left;
+
+            if (rbn_parent(node)) {
+                if (rbn_parent(node)->_left == node)
+                    rbn_parent(node)->_left = replace;
+                else
+                    rbn_parent(node)->_right = replace;
+            } else {
+                root = replace;
+            }
+            child = replace->_right;
+            parent = rbn_parent(replace);
+            color = rbn_color(replace);
+
+            if (parent == node) {
+                parent = replace;
+            } else {
+                if (child)
+                    rbn_parent(child) = parent;
+
+                parent->_left = child;
+                rbn_left_mhs(parent) = rbn_right_mhs(replace);
+                RecalculateMhs(parent);
+                replace->_right = node->_right;
+                rbn_set_parent(node->_right, replace);
+                rbn_right_mhs(replace) = rbn_right_mhs(node);
+            }
+
+            replace->_parent = node->_parent;
+            replace->_color = node->_color;
+            replace->_left = node->_left;
+            rbn_left_mhs(replace) = rbn_left_mhs(node);
+            node->_left->_parent = replace;
+            RecalculateMhs(replace);
+            if (color == EColor::BLACK)
+                RawRemoveFixup(root, child, parent);
+            delete node;
+            return;
+        }
+
+        if (node->_left != NULL)
+            child = node->_left;
+        else
+            child = node->_right;
+
+        parent = node->_parent;
+        color = node->_color;
+
+        if (child)
+            child->_parent = parent;
+
+        if (parent) {
+            if (parent->_left == node) {
+                parent->_left = child;
+                rbn_left_mhs(parent) = child ? mhs_of_subtree(child) : 0;
+            } else {
+                parent->_right = child;
+                rbn_right_mhs(parent) = child ? mhs_of_subtree(child) : 0;
+            }
+            RecalculateMhs(parent);
+        } else
+            root = child;
+        if (color == EColor::BLACK)
+            RawRemoveFixup(root, child, parent);
+        delete node;
+    }
+
+    void Tree::RawRemove(uint64_t offset) {
+        Node *node = SearchByOffset(offset);
+        RawRemove(_root, node);
+    }
+    static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
+        return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
+    }
+    uint64_t Tree::Remove(Node *&root, Node *node, size_t size) {
+        OUUInt64 n_offset = rbn_offset(node);
+        OUUInt64 n_size = rbn_size(node);
+        OUUInt64 answer_offset(align(rbn_offset(node).ToInt(), _align));
+
+        invariant((answer_offset + size) <= (n_offset + n_size));
+        if (answer_offset == n_offset) {
+            rbn_offset(node) += size;
+            rbn_size(node) -= size;
+            RecalculateMhs(node);
+            if (rbn_size(node) == 0) {
+                RawRemove(root, node);
+            }
+
+        } else {
+            if (answer_offset + size == n_offset + n_size) {
+                rbn_size(node) -= size;
+                RecalculateMhs(node);
+            } else {
+                // well, cut in the middle...
+                rbn_size(node) = answer_offset - n_offset;
+                RecalculateMhs(node);
+                Insert(_root,
+                       {(answer_offset + size),
+                        (n_offset + n_size) - (answer_offset + size)});
+            }
+        }
+        return answer_offset.ToInt();
+    }
+
+    void Tree::RawRemoveFixup(Node *&root, Node *node, Node *parent) {
+        Node *other;
+        while ((!node || rbn_is_black(node)) && node != root) {
+            if (parent->_left == node) {
+                other = parent->_right;
+                if (rbn_is_red(other)) {
+                    // Case 1: the brother of X, w, is read
+                    rbn_set_black(other);
+                    rbn_set_red(parent);
+                    LeftRotate(root, parent);
+                    other = parent->_right;
+                }
+                if ((!other->_left || rbn_is_black(other->_left)) &&
+                    (!other->_right || rbn_is_black(other->_right))) {
+                    // Case 2: w is black and both of w's children are black
+                    rbn_set_red(other);
+                    node = parent;
+                    parent = rbn_parent(node);
+                } else {
+                    if (!other->_right || rbn_is_black(other->_right)) {
+                        // Case 3: w is black and left child of w is red but
+                        // right
+                        // child is black
+                        rbn_set_black(other->_left);
+                        rbn_set_red(other);
+                        RightRotate(root, other);
+                        other = parent->_right;
+                    }
+                    // Case 4: w is black and right child of w is red,
+                    // regardless of
+                    // left child's color
+                    rbn_set_color(other, rbn_color(parent));
+                    rbn_set_black(parent);
+                    rbn_set_black(other->_right);
+                    LeftRotate(root, parent);
+                    node = root;
+                    break;
+                }
+            } else {
+                other = parent->_left;
+                if (rbn_is_red(other)) {
+                    // Case 1: w is red
+                    rbn_set_black(other);
+                    rbn_set_red(parent);
+                    RightRotate(root, parent);
+                    other = parent->_left;
+                }
+                if ((!other->_left || rbn_is_black(other->_left)) &&
+                    (!other->_right || rbn_is_black(other->_right))) {
+                    // Case 2: w is black and both children are black
+                    rbn_set_red(other);
+                    node = parent;
+                    parent = rbn_parent(node);
+                } else {
+                    if (!other->_left || rbn_is_black(other->_left)) {
+                        // Case 3: w is black and left child of w is red whereas
+                        // right child is black
+                        rbn_set_black(other->_right);
+                        rbn_set_red(other);
+                        LeftRotate(root, other);
+                        other = parent->_left;
+                    }
+                    // Case 4:w is black and right child of w is red, regardless
+                    // of
+                    // the left child's color
+                    rbn_set_color(other, rbn_color(parent));
+                    rbn_set_black(parent);
+                    rbn_set_black(other->_left);
+                    RightRotate(root, parent);
+                    node = root;
+                    break;
+                }
+            }
+        }
+        if (node)
+            rbn_set_black(node);
+    }
+
+    void Tree::Destroy(Node *&tree) {
+        if (tree == NULL)
+            return;
+
+        if (tree->_left != NULL)
+            Destroy(tree->_left);
+        if (tree->_right != NULL)
+            Destroy(tree->_right);
+
+        delete tree;
+        tree = NULL;
+    }
+
+    void Tree::Destroy() { Destroy(_root); }
+
+    void Tree::Dump(Node *tree, Node::BlockPair pair, EDirection dir) {
+        if (tree != NULL) {
+            if (dir == EDirection::NONE)
+                fprintf(stderr,
+                        "(%" PRIu64 ",%" PRIu64 ", mhs:(%" PRIu64 ",%" PRIu64
+                        "))(B) is root\n",
+                        rbn_offset(tree).ToInt(),
+                        rbn_size(tree).ToInt(),
+                        rbn_left_mhs(tree),
+                        rbn_right_mhs(tree));
+            else
+                fprintf(stderr,
+                        "(%" PRIu64 ",%" PRIu64 ",mhs:(%" PRIu64 ",%" PRIu64
+                        "))(%c) is %" PRIu64 "'s %s\n",
+                        rbn_offset(tree).ToInt(),
+                        rbn_size(tree).ToInt(),
+                        rbn_left_mhs(tree),
+                        rbn_right_mhs(tree),
+                        rbn_is_red(tree) ? 'R' : 'B',
+                        pair._offset.ToInt(),
+                        dir == EDirection::RIGHT ? "right child" : "left child");
+
+            Dump(tree->_left, tree->_hole, EDirection::LEFT);
+            Dump(tree->_right, tree->_hole, EDirection::RIGHT);
+        }
+    }
+
+    uint64_t Tree::EffectiveSize(Node *node) {
+        OUUInt64 offset = rbn_offset(node);
+        OUUInt64 size = rbn_size(node);
+        OUUInt64 end = offset + size;
+        OUUInt64 aligned_offset(align(offset.ToInt(), _align));
+        if (aligned_offset > end) {
+            return 0;
+        }
+        return (end - aligned_offset).ToInt();
+    }
+
+    void Tree::Dump() {
+        if (_root != NULL)
+            Dump(_root, _root->_hole, (EDirection)0);
+    }
+
+    static void vis_bal_f(void *extra, Node *node, uint64_t depth) {
+        uint64_t **p = (uint64_t **)extra;
+        uint64_t min = *p[0];
+        uint64_t max = *p[1];
+        if (node->_left) {
+            Node *left = node->_left;
+            invariant(node == left->_parent);
+        }
+
+        if (node->_right) {
+            Node *right = node->_right;
+            invariant(node == right->_parent);
+        }
+
+        if (!node->_left || !node->_right) {
+            if (min > depth) {
+                *p[0] = depth;
+            } else if (max < depth) {
+                *p[1] = depth;
+            }
+        }
+    }
+
+    void Tree::ValidateBalance() {
+        uint64_t min_depth = 0xffffffffffffffff;
+        uint64_t max_depth = 0;
+        if (!_root) {
+            return;
+        }
+        uint64_t *p[2] = {&min_depth, &max_depth};
+        InOrderVisitor(vis_bal_f, (void *)p);
+        invariant((min_depth + 1) * 2 >= max_depth + 1);
+    }
+
+    static void vis_cmp_f(void *extra, Node *node, uint64_t UU(depth)) {
+        Node::BlockPair **p = (Node::BlockPair **)extra;
+
+        invariant_notnull(*p);
+        invariant((*p)->_offset == node->_hole._offset);
+
+        *p = *p + 1;
+    }
+
+    // validate the input pairs matches with sorted pairs
+    void Tree::ValidateInOrder(Node::BlockPair *pairs) {
+        InOrderVisitor(vis_cmp_f, &pairs);
+    }
+
+    uint64_t Tree::ValidateMhs(Node *node) {
+        if (!node)
+            return 0;
+        else {
+            uint64_t mhs_left = ValidateMhs(node->_left);
+            uint64_t mhs_right = ValidateMhs(node->_right);
+            if (mhs_left != rbn_left_mhs(node)) {
+                printf("assert failure: mhs_left = %" PRIu64 "\n", mhs_left);
+                Dump(node, node->_hole, (EDirection)0);
+            }
+            invariant(mhs_left == rbn_left_mhs(node));
+
+            if (mhs_right != rbn_right_mhs(node)) {
+                printf("assert failure: mhs_right = %" PRIu64 "\n", mhs_right);
+                Dump(node, node->_hole, (EDirection)0);
+            }
+            invariant(mhs_right == rbn_right_mhs(node));
+            return std::max(EffectiveSize(node), std::max(mhs_left, mhs_right));
+        }
+    }
+
+    void Tree::ValidateMhs() {
+        if (!_root)
+            return;
+        uint64_t mhs_left = ValidateMhs(_root->_left);
+        uint64_t mhs_right = ValidateMhs(_root->_right);
+        invariant(mhs_left == rbn_left_mhs(_root));
+        invariant(mhs_right == rbn_right_mhs(_root));
+    }
+
+}  // namespace MhsRbTree
diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
new file mode 100644
index 00000000000..92f1e278e1a
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
@@ -0,0 +1,351 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <db.h>
+
+#include "portability/toku_pthread.h"
+#include "portability/toku_stdint.h"
+#include "portability/toku_stdlib.h"
+
+// RBTree(Red-black tree) with max hole sizes for subtrees.
+
+// This is a tentative data struct to improve the block allocation time
+// complexity from the linear time to the log time. Please be noted this DS only
+// supports first-fit for now. It is actually easier to do it with
+// best-fit.(just
+// sort by size).
+
+// RBTree is a classic data struct with O(log(n)) for insertion, deletion and
+// search. Many years have seen its efficiency.
+
+// a *hole* is the representation of an available BlockPair for allocation.
+// defined as (start_address,size) or (offset, size) interchangably.
+
+// each node has a *label* to indicate a pair of the max hole sizes for its
+// subtree.
+
+// We are implementing a RBTree with max hole sizes for subtree. It is a red
+// black tree that is sorted by the start_address but also labeld with the max
+// hole sizes of the subtrees.
+
+//        [(6,3)]  -> [(offset, size)], the hole
+//        [{2,5}]  -> [{mhs_of_left, mhs_of_right}], the label
+/*        /     \           */
+// [(0, 1)]    [(10,  5)]
+// [{0, 2}]    [{0,   0}]
+/*        \                 */
+//       [(3,  2)]
+//       [{0,  0}]
+// request of allocation size=2 goes from root to [(3,2)].
+
+// above example shows a simplified RBTree_max_holes.
+// it is easier to tell the search time is O(log(n)) as we can make a decision
+// on each descent until we get to the target.
+
+// the only question is if we can keep the maintenance cost low -- and i think
+// it is not a problem becoz an insertion/deletion is only going to update the
+// max_hole_sizes of the nodes along the path from the root to the node to be
+// deleted/inserted. The path can be cached and search is anyway O(log(n)).
+
+// unlike the typical rbtree, Tree has to handle the inserts and deletes
+// with more care: an allocation that triggers the delete might leave some
+// unused space which we can simply update the start_addr and size without
+// worrying overlapping. An free might not only mean the insertion but also
+// *merging* with the adjacent holes.
+
+namespace MhsRbTree {
+
+#define offset_t uint64_t
+    enum class EColor { RED, BLACK };
+    enum class EDirection { NONE = 0, LEFT, RIGHT };
+
+    // I am a bit tired of fixing overflow/underflow, just quickly craft some
+    // int
+    // class that has an infinity-like max value and prevents overflow and
+    // underflow. If you got a file offset larger than MHS_MAX_VAL, it is not
+    // a problem here. :-/  - JYM
+    class OUUInt64 {
+       public:
+        static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff;
+        OUUInt64() : _value(0) {}
+        OUUInt64(uint64_t s) : _value(s) {}
+        bool operator<(const OUUInt64 &r) const {
+            invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+            return _value < r.ToInt();
+        }
+        bool operator>(const OUUInt64 &r) const {
+            invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+            return _value > r.ToInt();
+        }
+        bool operator<=(const OUUInt64 &r) const {
+            invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+            return _value <= r.ToInt();
+        }
+        bool operator>=(const OUUInt64 &r) const {
+            invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+            return _value >= r.ToInt();
+        }
+        OUUInt64 operator+(const OUUInt64 &r) const {
+            if (_value == MHS_MAX_VAL || r.ToInt() == MHS_MAX_VAL) {
+                OUUInt64 tmp(MHS_MAX_VAL);
+                return tmp;
+            } else {
+                // detecting overflow
+                invariant((MHS_MAX_VAL - _value) >= r.ToInt());
+                uint64_t plus = _value + r.ToInt();
+                OUUInt64 tmp(plus);
+                return tmp;
+            }
+        }
+        OUUInt64 operator-(const OUUInt64 &r) const {
+            invariant(r.ToInt() != MHS_MAX_VAL);
+            if (_value == MHS_MAX_VAL) {
+                return *this;
+            } else {
+                invariant(_value >= r.ToInt());
+                uint64_t minus = _value - r.ToInt();
+                OUUInt64 tmp(minus);
+                return tmp;
+            }
+        }
+        OUUInt64 operator-=(const OUUInt64 &r) {
+            if (_value != MHS_MAX_VAL) {
+                invariant(r.ToInt() != MHS_MAX_VAL);
+                invariant(_value >= r.ToInt());
+                _value -= r.ToInt();
+            }
+            return *this;
+        }
+        OUUInt64 operator+=(const OUUInt64 &r) {
+            if (_value != MHS_MAX_VAL) {
+                if (r.ToInt() == MHS_MAX_VAL) {
+                    _value = MHS_MAX_VAL;
+                } else {
+                    invariant((MHS_MAX_VAL - _value) >= r.ToInt());
+                    this->_value += r.ToInt();
+                }
+            }
+            return *this;
+        }
+        bool operator==(const OUUInt64 &r) const {
+            return _value == r.ToInt();
+        }
+        bool operator!=(const OUUInt64 &r) const {
+            return _value != r.ToInt();
+        }
+        OUUInt64 operator=(const OUUInt64 &r) {
+            _value = r.ToInt();
+            return *this;
+        }
+        uint64_t ToInt() const { return _value; }
+
+       private:
+        uint64_t _value;
+    };
+
+    class Node {
+       public:
+        struct BlockPair {
+            OUUInt64 _offset;
+            OUUInt64 _size;
+
+            BlockPair() : _offset(0), _size(0) {}
+            BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
+
+            BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {}
+            int operator<(const struct BlockPair &rhs) const {
+                return _offset < rhs._offset;
+            }
+            int operator<(const uint64_t &o) const { return _offset < o; }
+        };
+
+        struct Pair {
+            uint64_t _left;
+            uint64_t _right;
+            Pair(uint64_t l, uint64_t r) : _left(l), _right(r) {}
+        };
+
+        EColor _color;
+        struct BlockPair _hole;
+        struct Pair _label;
+        Node *_left;
+        Node *_right;
+        Node *_parent;
+
+        Node(EColor c,
+             Node::BlockPair h,
+             struct Pair lb,
+             Node *l,
+             Node *r,
+             Node *p)
+            : _color(c),
+              _hole(h),
+              _label(lb),
+              _left(l),
+              _right(r),
+              _parent(p) {}
+    };
+
+    class Tree {
+       private:
+        Node *_root;
+        uint64_t _align;
+
+       public:
+        Tree();
+        Tree(uint64_t);
+        ~Tree();
+
+        void PreOrder();
+        void InOrder();
+        void PostOrder();
+        // immutable operations
+        Node *SearchByOffset(uint64_t addr);
+        Node *SearchFirstFitBySize(uint64_t size);
+
+        Node *MinNode();
+        Node *MaxNode();
+
+        Node *Successor(Node *);
+        Node *Predecessor(Node *);
+
+        // mapped from tree_allocator::free_block
+        int Insert(Node::BlockPair pair);
+        // mapped from tree_allocator::alloc_block
+        uint64_t Remove(size_t size);
+        // mapped from tree_allocator::alloc_block_after
+
+        void RawRemove(uint64_t offset);
+        void Destroy();
+        // print the tree
+        void Dump();
+        // validation
+        // balance
+        void ValidateBalance();
+        void ValidateInOrder(Node::BlockPair *);
+        void InOrderVisitor(void (*f)(void *, Node *, uint64_t), void *);
+        void ValidateMhs();
+
+       private:
+        void PreOrder(Node *node) const;
+        void InOrder(Node *node) const;
+        void PostOrder(Node *node) const;
+        Node *SearchByOffset(Node *node, offset_t addr) const;
+        Node *SearchFirstFitBySize(Node *node, size_t size) const;
+
+        Node *MinNode(Node *node);
+        Node *MaxNode(Node *node);
+
+        // rotations to fix up. we will have to update the labels too.
+        void LeftRotate(Node *&root, Node *x);
+        void RightRotate(Node *&root, Node *y);
+
+        int Insert(Node *&root, Node::BlockPair pair);
+        int InsertFixup(Node *&root, Node *node);
+
+        void RawRemove(Node *&root, Node *node);
+        uint64_t Remove(Node *&root, Node *node, size_t size);
+        void RawRemoveFixup(Node *&root, Node *node, Node *parent);
+
+        void Destroy(Node *&tree);
+        void Dump(Node *tree, Node::BlockPair pair, EDirection dir);
+        void RecalculateMhs(Node *node);
+        void IsNewNodeMergable(Node *, Node *, Node::BlockPair, bool *, bool *);
+        void AbsorbNewNode(Node *, Node *, Node::BlockPair, bool, bool, bool);
+        Node *SearchFirstFitBySizeHelper(Node *x, uint64_t size);
+
+        Node *SuccessorHelper(Node *y, Node *x);
+
+        Node *PredecessorHelper(Node *y, Node *x);
+
+        void InOrderVisitor(Node *,
+                            void (*f)(void *, Node *, uint64_t),
+                            void *,
+                            uint64_t);
+        uint64_t ValidateMhs(Node *);
+
+        uint64_t EffectiveSize(Node *);
+// mixed with some macros.....
+#define rbn_parent(r) ((r)->_parent)
+#define rbn_color(r) ((r)->_color)
+#define rbn_is_red(r) ((r)->_color == EColor::RED)
+#define rbn_is_black(r) ((r)->_color == EColor::BLACK)
+#define rbn_set_black(r)     \
+    do {                     \
+        (r)->_color = EColor::BLACK; \
+    } while (0)
+#define rbn_set_red(r)     \
+    do {                   \
+        (r)->_color = EColor::RED; \
+    } while (0)
+#define rbn_set_parent(r, p) \
+    do {                     \
+        (r)->_parent = (p);  \
+    } while (0)
+#define rbn_set_color(r, c) \
+    do {                    \
+        (r)->_color = (c);  \
+    } while (0)
+#define rbn_set_offset(r)         \
+    do {                          \
+        (r)->_hole._offset = (c); \
+    } while (0)
+#define rbn_set_size(r, c)      \
+    do {                        \
+        (r)->_hole._size = (c); \
+    } while (0)
+#define rbn_set_left_mhs(r, c)   \
+    do {                         \
+        (r)->_label._left = (c); \
+    } while (0)
+#define rbn_set_right_mhs(r, c)   \
+    do {                          \
+        (r)->_label._right = (c); \
+    } while (0)
+#define rbn_size(r) ((r)->_hole._size)
+#define rbn_offset(r) ((r)->_hole._offset)
+#define rbn_key(r) ((r)->_hole._offset)
+#define rbn_left_mhs(r) ((r)->_label._left)
+#define rbn_right_mhs(r) ((r)->_label._right)
+#define mhs_of_subtree(y) \
+    (std::max(std::max(rbn_left_mhs(y), rbn_right_mhs(y)), EffectiveSize(y)))
+    };
+
+}  // namespace MhsRbTree
diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
deleted file mode 100644
index 3670ef81cc2..00000000000
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License, version 2,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License, version 3,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-#include "ft/tests/test.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static const uint64_t alignment = 4096;
-
-static void test_first_vs_best_fit(void) {
-    struct block_allocator::blockpair pairs[] = {
-        block_allocator::blockpair(1 * alignment, 6 * alignment),
-        // hole between 7x align -> 8x align
-        block_allocator::blockpair(8 * alignment, 4 * alignment),
-        // hole between 12x align -> 16x align
-        block_allocator::blockpair(16 * alignment, 1 * alignment),
-        block_allocator::blockpair(17 * alignment, 2 * alignment),
-        // hole between 19 align -> 21x align
-        block_allocator::blockpair(21 * alignment, 2 * alignment),
-    };
-    const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-    
-    block_allocator::blockpair *bp;
-
-    // first fit
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment);
-    assert(bp == &pairs[0]);
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment);
-    assert(bp == &pairs[0]);
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment);
-    assert(bp == &pairs[1]);
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment);
-    assert(bp == nullptr);
-
-    // best fit
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment);
-    assert(bp == &pairs[0]);
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment);
-    assert(bp == &pairs[3]);
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment);
-    assert(bp == &pairs[1]);
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment);
-    assert(bp == nullptr);
-}
-
-static void test_padded_fit(void) {
-    struct block_allocator::blockpair pairs[] = {
-        block_allocator::blockpair(1 * alignment, 1 * alignment),
-        // 4096 byte hole after bp[0]
-        block_allocator::blockpair(3 * alignment, 1 * alignment),
-        // 8192 byte hole after bp[1]
-        block_allocator::blockpair(6 * alignment, 1 * alignment),
-        // 16384 byte hole after bp[2]
-        block_allocator::blockpair(11 * alignment, 1 * alignment),
-        // 32768 byte hole after bp[3]
-        block_allocator::blockpair(17 * alignment, 1 * alignment),
-        // 116kb hole after bp[4]
-        block_allocator::blockpair(113 * alignment, 1 * alignment),
-        // 256kb hole after bp[5]
-        block_allocator::blockpair(371 * alignment, 1 * alignment),
-    };
-    const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-    
-    block_allocator::blockpair *bp;
-
-    // padding for a 100 byte allocation will be < than standard alignment,
-    // so it should fit in the first 4096 byte hole.
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment);
-    assert(bp == &pairs[0]);
-
-    // Even padded, a 12kb alloc will fit in a 16kb hole
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment);
-    assert(bp == &pairs[2]);
-
-    // would normally fit in the 116kb hole but the padding will bring it over
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment);
-    assert(bp == &pairs[5]);
-
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment);
-    assert(bp == &pairs[5]);
-}
-
-int test_main(int argc, const char *argv[]) {
-    (void) argc;
-    (void) argv;
-
-    test_first_vs_best_fit();
-    test_padded_fit();
-
-    return 0;
-}
diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
index d80ee83cbc9..3eff52b915d 100644
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
@@ -38,253 +38,243 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #include "test.h"
 
-static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
-    ba->validate();
+static void ba_alloc(BlockAllocator *ba, uint64_t size, uint64_t *answer) {
+    ba->Validate();
     uint64_t actual_answer;
-    const uint64_t heat = random() % 2;
-    ba->alloc_block(512 * size, heat, &actual_answer);
-    ba->validate();
+    ba->AllocBlock(512 * size, &actual_answer);
+    ba->Validate();
 
-    assert(actual_answer%512==0);
-    *answer = actual_answer/512;
+    invariant(actual_answer % 512 == 0);
+    *answer = actual_answer / 512;
 }
 
-static void ba_free(block_allocator *ba, uint64_t offset) {
-    ba->validate();
-    ba->free_block(offset * 512);
-    ba->validate();
+static void ba_free(BlockAllocator *ba, uint64_t offset, uint64_t size) {
+    ba->Validate();
+    ba->FreeBlock(offset * 512, 512 * size);
+    ba->Validate();
 }
 
-static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order,
-                       uint64_t expected_offset, uint64_t expected_size) {
+static void ba_check_l(BlockAllocator *ba,
+                       uint64_t blocknum_in_layout_order,
+                       uint64_t expected_offset,
+                       uint64_t expected_size) {
     uint64_t actual_offset, actual_size;
-    int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
-    assert(r==0);
-    assert(expected_offset*512 == actual_offset);
-    assert(expected_size  *512 == actual_size);
+    int r = ba->NthBlockInLayoutOrder(
+        blocknum_in_layout_order, &actual_offset, &actual_size);
+    invariant(r == 0);
+    invariant(expected_offset * 512 == actual_offset);
+    invariant(expected_size * 512 == actual_size);
 }
 
-static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) {
+static void ba_check_none(BlockAllocator *ba,
+                          uint64_t blocknum_in_layout_order) {
     uint64_t actual_offset, actual_size;
-    int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
-    assert(r==-1);
+    int r = ba->NthBlockInLayoutOrder(
+        blocknum_in_layout_order, &actual_offset, &actual_size);
+    invariant(r == -1);
 }
 
-
 // Simple block allocator test
-static void test_ba0(block_allocator::allocation_strategy strategy) {
-    block_allocator allocator;
-    block_allocator *ba = &allocator;
-    ba->create(100*512, 1*512);
-    ba->set_strategy(strategy);
-    assert(ba->allocated_limit()==100*512);
+static void test_ba0() {
+    BlockAllocator allocator;
+    BlockAllocator *ba = &allocator;
+    ba->Create(100 * 512, 1 * 512);
+    invariant(ba->AllocatedLimit() == 100 * 512);
 
     uint64_t b2, b3, b4, b5, b6, b7;
-    ba_alloc(ba, 100, &b2);     
-    ba_alloc(ba, 100, &b3);     
-    ba_alloc(ba, 100, &b4);     
-    ba_alloc(ba, 100, &b5);     
-    ba_alloc(ba, 100, &b6);     
-    ba_alloc(ba, 100, &b7);     
-    ba_free(ba, b2);
-    ba_alloc(ba, 100, &b2);  
-    ba_free(ba, b4);         
-    ba_free(ba, b6);         
+    ba_alloc(ba, 100, &b2);
+    ba_alloc(ba, 100, &b3);
+    ba_alloc(ba, 100, &b4);
+    ba_alloc(ba, 100, &b5);
+    ba_alloc(ba, 100, &b6);
+    ba_alloc(ba, 100, &b7);
+    ba_free(ba, b2, 100);
+    ba_alloc(ba, 100, &b2);
+    ba_free(ba, b4, 100);
+    ba_free(ba, b6, 100);
     uint64_t b8, b9;
-    ba_alloc(ba, 100, &b4);    
-    ba_free(ba, b2);           
-    ba_alloc(ba, 100, &b6);    
-    ba_alloc(ba, 100, &b8);    
-    ba_alloc(ba, 100, &b9);    
-    ba_free(ba, b6);           
-    ba_free(ba, b7);           
-    ba_free(ba, b8);           
-    ba_alloc(ba, 100, &b6);    
-    ba_alloc(ba, 100, &b7);    
-    ba_free(ba, b4);           
-    ba_alloc(ba, 100, &b4);    
-
-    ba->destroy();
+    ba_alloc(ba, 100, &b4);
+    ba_free(ba, b2, 100);
+    ba_alloc(ba, 100, &b6);
+    ba_alloc(ba, 100, &b8);
+    ba_alloc(ba, 100, &b9);
+    ba_free(ba, b6, 100);
+    ba_free(ba, b7, 100);
+    ba_free(ba, b8, 100);
+    ba_alloc(ba, 100, &b6);
+    ba_alloc(ba, 100, &b7);
+    ba_free(ba, b4, 100);
+    ba_alloc(ba, 100, &b4);
+
+    ba->Destroy();
 }
 
 // Manually to get coverage of all the code in the block allocator.
-static void
-test_ba1(block_allocator::allocation_strategy strategy, int n_initial) {
-    block_allocator allocator;
-    block_allocator *ba = &allocator;
-    ba->create(0*512, 1*512);
-    ba->set_strategy(strategy);
-
-    int n_blocks=0;
+static void test_ba1(int n_initial) {
+    BlockAllocator allocator;
+    BlockAllocator *ba = &allocator;
+    ba->Create(0 * 512, 1 * 512);
+
+    int n_blocks = 0;
     uint64_t blocks[1000];
     for (int i = 0; i < 1000; i++) {
-	if (i < n_initial || random() % 2 == 0) {
-	    if (n_blocks < 1000) {
-		ba_alloc(ba, 1, &blocks[n_blocks]);
-		//printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
-		n_blocks++;
-	    } 
-	} else {
-	    if (n_blocks > 0) {
-		int blocknum = random()%n_blocks;
-		//printf("F[%d]%ld\n", blocknum, blocks[blocknum]);
-		ba_free(ba, blocks[blocknum]);
-		blocks[blocknum]=blocks[n_blocks-1];
-		n_blocks--;
-	    }
-	}
+        if (i < n_initial || random() % 2 == 0) {
+            if (n_blocks < 1000) {
+                ba_alloc(ba, 1, &blocks[n_blocks]);
+                // printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
+                n_blocks++;
+            }
+        } else {
+            if (n_blocks > 0) {
+                int blocknum = random() % n_blocks;
+                // printf("F[%d]=%ld\n", blocknum, blocks[blocknum]);
+                ba_free(ba, blocks[blocknum], 1);
+                blocks[blocknum] = blocks[n_blocks - 1];
+                n_blocks--;
+            }
+        }
     }
-    
-    ba->destroy();
+
+    ba->Destroy();
 }
-    
+
 // Check to see if it is first fit or best fit.
-static void
-test_ba2 (void)
-{
-    block_allocator allocator;
-    block_allocator *ba = &allocator;
+static void test_ba2(void) {
+    BlockAllocator allocator;
+    BlockAllocator *ba = &allocator;
     uint64_t b[6];
     enum { BSIZE = 1024 };
-    ba->create(100*512, BSIZE*512);
-    ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT);
-    assert(ba->allocated_limit()==100*512);
-
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_none (ba, 1);
-
-    ba_alloc (ba, 100, &b[0]);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1, BSIZE, 100);
-    ba_check_none (ba, 2);
-
-    ba_alloc (ba, BSIZE + 100, &b[1]);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_none (ba, 3);
-
-    ba_alloc (ba, 100, &b[2]);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_none (ba, 4);
-
-    ba_alloc (ba, 100, &b[3]);
-    ba_alloc (ba, 100, &b[4]);
-    ba_alloc (ba, 100, &b[5]);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_l    (ba, 4, 5*BSIZE,       100);
-    ba_check_l    (ba, 5, 6*BSIZE,       100);
-    ba_check_l    (ba, 6, 7*BSIZE,       100);
-    ba_check_none (ba, 7);
-   
-    ba_free (ba, 4*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 5*BSIZE,       100);
-    ba_check_l    (ba, 4, 6*BSIZE,       100);
-    ba_check_l    (ba, 5, 7*BSIZE,       100);
-    ba_check_none (ba, 6);
+    ba->Create(100 * 512, BSIZE * 512);
+    invariant(ba->AllocatedLimit() == 100 * 512);
+
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_none(ba, 1);
+
+    ba_alloc(ba, 100, &b[0]);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_none(ba, 2);
+
+    ba_alloc(ba, BSIZE + 100, &b[1]);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_none(ba, 3);
+
+    ba_alloc(ba, 100, &b[2]);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_none(ba, 4);
+
+    ba_alloc(ba, 100, &b[3]);
+    ba_alloc(ba, 100, &b[4]);
+    ba_alloc(ba, 100, &b[5]);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_l(ba, 4, 5 * BSIZE, 100);
+    ba_check_l(ba, 5, 6 * BSIZE, 100);
+    ba_check_l(ba, 6, 7 * BSIZE, 100);
+    ba_check_none(ba, 7);
+
+    ba_free(ba, 4 * BSIZE, 100);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 5 * BSIZE, 100);
+    ba_check_l(ba, 4, 6 * BSIZE, 100);
+    ba_check_l(ba, 5, 7 * BSIZE, 100);
+    ba_check_none(ba, 6);
 
     uint64_t b2;
     ba_alloc(ba, 100, &b2);
-    assert(b2==4*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_l    (ba, 4, 5*BSIZE,       100);
-    ba_check_l    (ba, 5, 6*BSIZE,       100);
-    ba_check_l    (ba, 6, 7*BSIZE,       100);
-    ba_check_none (ba, 7);
-
-    ba_free (ba,   BSIZE);
-    ba_free (ba, 5*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 2, 4*BSIZE,       100);
-    ba_check_l    (ba, 3, 6*BSIZE,       100);
-    ba_check_l    (ba, 4, 7*BSIZE,       100);
-    ba_check_none (ba, 5);
-
-    // This alloc will allocate the first block after the reserve space in the case of first fit.
+    invariant(b2 == 4 * BSIZE);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_l(ba, 4, 5 * BSIZE, 100);
+    ba_check_l(ba, 5, 6 * BSIZE, 100);
+    ba_check_l(ba, 6, 7 * BSIZE, 100);
+    ba_check_none(ba, 7);
+
+    ba_free(ba, BSIZE, 100);
+    ba_free(ba, 5 * BSIZE, 100);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 2, 4 * BSIZE, 100);
+    ba_check_l(ba, 3, 6 * BSIZE, 100);
+    ba_check_l(ba, 4, 7 * BSIZE, 100);
+    ba_check_none(ba, 5);
+
+    // This alloc will allocate the first block after the reserve space in the
+    // case of first fit.
     uint64_t b3;
     ba_alloc(ba, 100, &b3);
-    assert(b3==  BSIZE);      // First fit.
+    invariant(b3 == BSIZE);  // First fit.
     // if (b3==5*BSIZE) then it is next fit.
 
     // Now 5*BSIZE is free
     uint64_t b5;
     ba_alloc(ba, 100, &b5);
-    assert(b5==5*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_l    (ba, 4, 5*BSIZE,       100);
-    ba_check_l    (ba, 5, 6*BSIZE,       100);
-    ba_check_l    (ba, 6, 7*BSIZE,       100);
-    ba_check_none (ba, 7);
+    invariant(b5 == 5 * BSIZE);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_l(ba, 4, 5 * BSIZE, 100);
+    ba_check_l(ba, 5, 6 * BSIZE, 100);
+    ba_check_l(ba, 6, 7 * BSIZE, 100);
+    ba_check_none(ba, 7);
 
     // Now all blocks are busy
     uint64_t b6, b7, b8;
     ba_alloc(ba, 100, &b6);
     ba_alloc(ba, 100, &b7);
     ba_alloc(ba, 100, &b8);
-    assert(b6==8*BSIZE);
-    assert(b7==9*BSIZE);
-    assert(b8==10*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_l    (ba, 4, 5*BSIZE,       100);
-    ba_check_l    (ba, 5, 6*BSIZE,       100);
-    ba_check_l    (ba, 6, 7*BSIZE,       100);
-    ba_check_l    (ba, 7, 8*BSIZE,       100);
-    ba_check_l    (ba, 8, 9*BSIZE,       100);
-    ba_check_l    (ba, 9, 10*BSIZE,       100);
-    ba_check_none (ba, 10);
-    
-    ba_free(ba, 9*BSIZE);
-    ba_free(ba, 7*BSIZE);
+    invariant(b6 == 8 * BSIZE);
+    invariant(b7 == 9 * BSIZE);
+    invariant(b8 == 10 * BSIZE);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_l(ba, 4, 5 * BSIZE, 100);
+    ba_check_l(ba, 5, 6 * BSIZE, 100);
+    ba_check_l(ba, 6, 7 * BSIZE, 100);
+    ba_check_l(ba, 7, 8 * BSIZE, 100);
+    ba_check_l(ba, 8, 9 * BSIZE, 100);
+    ba_check_l(ba, 9, 10 * BSIZE, 100);
+    ba_check_none(ba, 10);
+
+    ba_free(ba, 9 * BSIZE, 100);
+    ba_free(ba, 7 * BSIZE, 100);
     uint64_t b9;
     ba_alloc(ba, 100, &b9);
-    assert(b9==7*BSIZE);
+    invariant(b9 == 7 * BSIZE);
 
-    ba_free(ba, 5*BSIZE);
-    ba_free(ba, 2*BSIZE);
+    ba_free(ba, 5 * BSIZE, 100);
+    ba_free(ba, 2 * BSIZE, BSIZE + 100);
     uint64_t b10, b11;
     ba_alloc(ba, 100, &b10);
-    assert(b10==2*BSIZE);
+    invariant(b10 == 2 * BSIZE);
     ba_alloc(ba, 100, &b11);
-    assert(b11==3*BSIZE);
+    invariant(b11 == 3 * BSIZE);
     ba_alloc(ba, 100, &b11);
-    assert(b11==5*BSIZE);
+    invariant(b11 == 5 * BSIZE);
 
-    ba->destroy();
+    ba->Destroy();
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
-    enum block_allocator::allocation_strategy strategies[] = {
-        block_allocator::BA_STRATEGY_FIRST_FIT,
-        block_allocator::BA_STRATEGY_BEST_FIT,
-        block_allocator::BA_STRATEGY_PADDED_FIT,
-        block_allocator::BA_STRATEGY_HEAT_ZONE,
-    };
-    for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) {
-        test_ba0(strategies[i]);
-        test_ba1(strategies[i], 0);
-        test_ba1(strategies[i], 10);
-        test_ba1(strategies[i], 20);
-    }
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
+    test_ba0();
+    test_ba1(0);
+    test_ba1(10);
+    test_ba1(20);
     test_ba2();
     return 0;
 }
diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
index a7c48ef709a..ee68ab3ef0b 100644
--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
@@ -45,7 +45,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // #5978 is fixed. Here is what we do. We have four pairs with
 // blocknums and fullhashes of 1,2,3,4. The cachetable has only
 // two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4.
-// We pin all four with expensive write locks. Then, on backgroud threads,
+// We pin all four with expensive write locks. Then, on background threads,
 // we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and
 // we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this
 // enough times, and we should see a deadlock before the fix, and no deadlock
diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
index be4bae898be..51cf70c3e76 100644
--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
@@ -77,7 +77,7 @@ flush (
 
 //
 // test the following things for simple cloning:
-//  - verifies that after teh checkpoint ends, the PAIR is properly 
+//  - verifies that after the checkpoint ends, the PAIR is properly
 //     dirty or clean based on the second unpin
 //
 static void
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
index cb03a23e0fc..7abd2267a7e 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
@@ -38,69 +38,72 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #include "test.h"
 
-static  int
-int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) {
-    int64_t x = *(int64_t *) a->data;
-    int64_t y = *(int64_t *) b->data;
-
-    if (x<y) return -1;
-    if (x>y) return 1;
+static int int64_key_cmp(DB *db UU(), const DBT *a, const DBT *b) {
+    int64_t x = *(int64_t *)a->data;
+    int64_t y = *(int64_t *)b->data;
+
+    if (x < y)
+        return -1;
+    if (x > y)
+        return 1;
     return 0;
 }
 
-static void
-test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
+static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     int r;
     FT_CURSOR XMALLOC(cursor);
     FTNODE dn = NULL;
     PAIR_ATTR attr;
-    
+
     // first test that prefetching everything should work
-    memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
-    memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
+    memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
+    memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
     cursor->left_is_neg_infty = true;
     cursor->right_is_pos_infty = true;
     cursor->disable_prefetching = false;
-    
+
     ftnode_fetch_extra bfe;
 
     // quick test to see that we have the right behavior when we set
     // disable_prefetching to true
     cursor->disable_prefetching = true;
-    bfe.create_for_prefetch( ft_h, cursor);
+    bfe.create_for_prefetch(ft_h, cursor);
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     // now enable prefetching again
     cursor->disable_prefetching = false;
-    
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_COMPRESSED);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
@@ -108,21 +111,23 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     uint64_t left_key = 150;
     toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t));
     cursor->left_is_neg_infty = false;
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
@@ -130,63 +135,69 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     uint64_t right_key = 151;
     toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t));
     cursor->right_is_pos_infty = false;
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     left_key = 100000;
     right_key = 100000;
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     bfe.destroy();
     toku_free(ndd);
     toku_ftnode_free(&dn);
 
     left_key = 100;
     right_key = 100;
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_COMPRESSED);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
@@ -194,20 +205,19 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     toku_free(cursor);
 }
 
-static void
-test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
+static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     int r;
     FT_CURSOR XMALLOC(cursor);
     FTNODE dn = NULL;
     FTNODE_DISK_DATA ndd = NULL;
     PAIR_ATTR attr;
-    
+
     // first test that prefetching everything should work
-    memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
-    memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
+    memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
+    memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
     cursor->left_is_neg_infty = true;
     cursor->right_is_pos_infty = true;
-    
+
     uint64_t left_key = 150;
     uint64_t right_key = 151;
     DBT left, right;
@@ -216,101 +226,106 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
 
     ftnode_fetch_extra bfe;
     bfe.create_for_subset_read(
-        ft_h,
-        NULL, 
-        &left,
-        &right,
-        false,
-        false,
-        false,
-        false
-        );
-    
+        ft_h, NULL, &left, &right, false, false, false, false);
+
     // fake the childnum to read
     // set disable_prefetching ON
     bfe.child_to_read = 2;
     bfe.disable_prefetching = true;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    // need to call this twice because we had a subset read before, that touched the clock
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    // need to call this twice because we had a subset read before, that touched
+    // the clock
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     // fake the childnum to read
     bfe.child_to_read = 2;
     bfe.disable_prefetching = false;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    // need to call this twice because we had a subset read before, that touched the clock
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    // need to call this twice because we had a subset read before, that touched
+    // the clock
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     // fake the childnum to read
     bfe.child_to_read = 0;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
-    // need to call this twice because we had a subset read before, that touched the clock
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_COMPRESSED);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+    // need to call this twice because we had a subset read before, that touched
+    // the clock
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     toku_free(cursor);
 }
 
-
-static void
-test_prefetching(void) {
+static void test_prefetching(void) {
     //    struct ft_handle source_ft;
     struct ftnode sn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -327,7 +342,7 @@ test_prefetching(void) {
 
     uint64_t key1 = 100;
     uint64_t key2 = 200;
-    
+
     MALLOC_N(sn.n_children, sn.bp);
     DBT pivotkeys[2];
     toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1));
@@ -336,13 +351,13 @@ test_prefetching(void) {
     BP_BLOCKNUM(&sn, 0).b = 30;
     BP_BLOCKNUM(&sn, 1).b = 35;
     BP_BLOCKNUM(&sn, 2).b = 40;
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
-    BP_STATE(&sn,2) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
+    BP_STATE(&sn, 2) = PT_AVAIL;
     set_BNC(&sn, 0, toku_create_empty_nl());
     set_BNC(&sn, 1, toku_create_empty_nl());
     set_BNC(&sn, 2, toku_create_empty_nl());
-    //Create XIDS
+    // Create XIDS
     XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     XIDS xids_234;
@@ -352,7 +367,7 @@ test_prefetching(void) {
     CKERR(r);
 
     // data in the buffers does not matter in this test
-    //Cleanup:
+    // Cleanup:
     toku_xids_destroy(&xids_0);
     toku_xids_destroy(&xids_123);
     toku_xids_destroy(&xids_234);
@@ -363,41 +378,48 @@ test_prefetching(void) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(int64_key_cmp, nullptr);
     ft->ft = ft_h;
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
-    assert(r==0);
+    r = toku_serialize_ftnode_to(
+        fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+    invariant(r == 0);
 
-    test_prefetch_read(fd, ft, ft_h);    
+    test_prefetch_read(fd, ft, ft_h);
     test_subset_read(fd, ft, ft_h);
 
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
@@ -405,11 +427,12 @@ test_prefetching(void) {
     toku_free(ft);
     toku_free(ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
     test_prefetching();
 
     return 0;
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
index ceef3772e2a..26a3dae673c 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
@@ -40,38 +40,28 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #include "ft/cursor.h"
 
-enum ftnode_verify_type {
-    read_all=1,
-    read_compressed,
-    read_none
-};
+enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
 
 #ifndef MIN
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #endif
 
-static int
-string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
     char *CAST_FROM_VOIDP(s, a->data);
     char *CAST_FROM_VOIDP(t, b->data);
     return strcmp(s, t);
 }
 
-static void
-le_add_to_bn(bn_data* bn, uint32_t idx, const  char *key, int keylen, const char *val, int vallen)
-{
+static void le_add_to_bn(bn_data *bn,
+                         uint32_t idx,
+                         const char *key,
+                         int keylen,
+                         const char *val,
+                         int vallen) {
     LEAFENTRY r = NULL;
     uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
     void *maybe_free = nullptr;
-    bn->get_space_for_insert(
-        idx, 
-        key,
-        keylen,
-        size_needed,
-        &r,
-        &maybe_free
-        );
+    bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
     if (maybe_free) {
         toku_free(maybe_free);
     }
@@ -81,70 +71,67 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const  char *key, int keylen, const char
     memcpy(r->u.clean.val, val, vallen);
 }
 
-
-static void
-le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val)
-{
+static void le_malloc(bn_data *bn,
+                      uint32_t idx,
+                      const char *key,
+                      const char *val) {
     int keylen = strlen(key) + 1;
     int vallen = strlen(val) + 1;
     le_add_to_bn(bn, idx, key, keylen, val, vallen);
 }
 
-
-static void
-test1(int fd, FT ft_h, FTNODE *dn) {
+static void test1(int fd, FT ft_h, FTNODE *dn) {
     int r;
     ftnode_fetch_extra bfe_all;
     bfe_all.create_for_full_read(ft_h);
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_all);
     bool is_leaf = ((*dn)->height == 0);
-    assert(r==0);
+    invariant(r == 0);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     // should sweep and NOT get rid of anything
     PAIR_ATTR attr;
-    memset(&attr,0,sizeof(attr));
+    memset(&attr, 0, sizeof(attr));
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     // should sweep and get compress all
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
         if (!is_leaf) {
-            assert(BP_STATE(*dn,i) == PT_COMPRESSED);
-        }
-        else {
-            assert(BP_STATE(*dn,i) == PT_ON_DISK);
+            invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
+        } else {
+            invariant(BP_STATE(*dn, i) == PT_ON_DISK);
         }
     }
     PAIR_ATTR size;
     bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
-    assert(req);
+    invariant(req);
     toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     // should sweep and get compress all
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
         if (!is_leaf) {
-            assert(BP_STATE(*dn,i) == PT_COMPRESSED);
-        }
-        else {
-            assert(BP_STATE(*dn,i) == PT_ON_DISK);
+            invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
+        } else {
+            invariant(BP_STATE(*dn, i) == PT_ON_DISK);
         }
-    }    
+    }
 
     req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
-    assert(req);
+    invariant(req);
     toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     (*dn)->dirty = 1;
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
@@ -152,101 +139,102 @@ test1(int fd, FT ft_h, FTNODE *dn) {
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     toku_free(ndd);
     toku_ftnode_free(dn);
 }
 
-
-static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) {
+static int search_cmp(const struct ft_search &UU(so), const DBT *UU(key)) {
     return 0;
 }
 
-static void
-test2(int fd, FT ft_h, FTNODE *dn) {
+static void test2(int fd, FT ft_h, FTNODE *dn) {
     DBT left, right;
     DB dummy_db;
     memset(&dummy_db, 0, sizeof(dummy_db));
     memset(&left, 0, sizeof(left));
     memset(&right, 0, sizeof(right));
     ft_search search;
-    
+
     ftnode_fetch_extra bfe_subset;
     bfe_subset.create_for_subset_read(
         ft_h,
-        ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
+        ft_search_init(
+            &search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
         &left,
         &right,
         true,
         true,
         false,
-        false
-        );
+        false);
 
     FTNODE_DISK_DATA ndd = NULL;
-    int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset);
-    assert(r==0);
+    int r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_subset);
+    invariant(r == 0);
     bool is_leaf = ((*dn)->height == 0);
-    // at this point, although both partitions are available, only the 
+    // at this point, although both partitions are available, only the
     // second basement node should have had its clock
     // touched
-    assert(BP_STATE(*dn, 0) == PT_AVAIL);
-    assert(BP_STATE(*dn, 1) == PT_AVAIL);
-    assert(BP_SHOULD_EVICT(*dn, 0));
-    assert(!BP_SHOULD_EVICT(*dn, 1));
+    invariant(BP_STATE(*dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+    invariant(BP_SHOULD_EVICT(*dn, 0));
+    invariant(!BP_SHOULD_EVICT(*dn, 1));
     PAIR_ATTR attr;
-    memset(&attr,0,sizeof(attr));
+    memset(&attr, 0, sizeof(attr));
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
-    assert(BP_STATE(*dn, 1) == PT_AVAIL);
-    assert(BP_SHOULD_EVICT(*dn, 1));
+    invariant(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
+    invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+    invariant(BP_SHOULD_EVICT(*dn, 1));
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
+    invariant(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
 
     bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset);
-    assert(req);
+    invariant(req);
     toku_ftnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr);
-    assert(BP_STATE(*dn, 0) == PT_AVAIL);
-    assert(BP_STATE(*dn, 1) == PT_AVAIL);
-    assert(BP_SHOULD_EVICT(*dn, 0));
-    assert(!BP_SHOULD_EVICT(*dn, 1));
+    invariant(BP_STATE(*dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+    invariant(BP_SHOULD_EVICT(*dn, 0));
+    invariant(!BP_SHOULD_EVICT(*dn, 1));
 
     toku_free(ndd);
     toku_ftnode_free(dn);
 }
 
-static void
-test3_leaf(int fd, FT ft_h, FTNODE *dn) {
+static void test3_leaf(int fd, FT ft_h, FTNODE *dn) {
     DBT left, right;
     DB dummy_db;
     memset(&dummy_db, 0, sizeof(dummy_db));
     memset(&left, 0, sizeof(left));
     memset(&right, 0, sizeof(right));
-    
+
     ftnode_fetch_extra bfe_min;
     bfe_min.create_for_min_read(ft_h);
 
     FTNODE_DISK_DATA ndd = NULL;
-    int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min);
-    assert(r==0);
+    int r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_min);
+    invariant(r == 0);
     //
     // make sure we have a leaf
     //
-    assert((*dn)->height == 0);
+    invariant((*dn)->height == 0);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn, i) == PT_ON_DISK);
+        invariant(BP_STATE(*dn, i) == PT_ON_DISK);
     }
     toku_ftnode_free(dn);
     toku_free(ndd);
 }
 
-static void
-test_serialize_nonleaf(void) {
+static void test_serialize_nonleaf(void) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -265,11 +253,11 @@ test_serialize_nonleaf(void) {
     sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
     BP_BLOCKNUM(&sn, 0).b = 30;
     BP_BLOCKNUM(&sn, 1).b = 35;
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
     set_BNC(&sn, 0, toku_create_empty_nl());
     set_BNC(&sn, 1, toku_create_empty_nl());
-    //Create XIDS
+    // Create XIDS
     XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     XIDS xids_234;
@@ -281,11 +269,38 @@ test_serialize_nonleaf(void) {
     toku::comparator cmp;
     cmp.create(string_key_cmp, nullptr);
 
-    toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
-    toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
-    toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
-
-    //Cleanup:
+    toku_bnc_insert_msg(BNC(&sn, 0),
+                        "a",
+                        2,
+                        "aval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_0,
+                        true,
+                        cmp);
+    toku_bnc_insert_msg(BNC(&sn, 0),
+                        "b",
+                        2,
+                        "bval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_123,
+                        false,
+                        cmp);
+    toku_bnc_insert_msg(BNC(&sn, 1),
+                        "x",
+                        2,
+                        "xval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_234,
+                        true,
+                        cmp);
+
+    // Cleanup:
     toku_xids_destroy(&xids_0);
     toku_xids_destroy(&xids_123);
     toku_xids_destroy(&xids_234);
@@ -297,35 +312,41 @@ test_serialize_nonleaf(void) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(string_key_cmp, nullptr);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
-    assert(r==0);
+    r = toku_serialize_ftnode_to(
+        fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+    invariant(r == 0);
 
     test1(fd, ft_h, &dn);
     test2(fd, ft_h, &dn);
@@ -333,22 +354,26 @@ test_serialize_nonleaf(void) {
     toku_destroy_ftnode_internals(&sn);
     toku_free(ndd);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     ft_h->cmp.destroy();
     toku_free(ft_h);
     toku_free(ft);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_leaf(void) {
+static void test_serialize_leaf(void) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -364,8 +389,8 @@ test_serialize_leaf(void) {
     MALLOC_N(sn.n_children, sn.bp);
     DBT pivotkey;
     sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
     set_BLB(&sn, 0, toku_create_empty_bn());
     set_BLB(&sn, 1, toku_create_empty_bn());
     le_malloc(BLB_DATA(&sn, 0), 0, "a", "aval");
@@ -378,51 +403,59 @@ test_serialize_leaf(void) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
-    assert(r==0);
+    r = toku_serialize_ftnode_to(
+        fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+    invariant(r == 0);
 
     test1(fd, ft_h, &dn);
-    test3_leaf(fd, ft_h,&dn);
+    test3_leaf(fd, ft_h, &dn);
 
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
     toku_free(ndd);
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
     initialize_dummymsn();
     test_serialize_nonleaf();
     test_serialize_leaf();
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
index 9828f49513c..d50488ae197 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
@@ -41,27 +41,21 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include <sys/time.h>
 #include "test.h"
 
-
-
 #ifndef MIN
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #endif
 const double USECS_PER_SEC = 1000000.0;
 
-static void
-le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int vallen)
-{
+static void le_add_to_bn(bn_data *bn,
+                         uint32_t idx,
+                         char *key,
+                         int keylen,
+                         char *val,
+                         int vallen) {
     LEAFENTRY r = NULL;
     uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
     void *maybe_free = nullptr;
-    bn->get_space_for_insert(
-        idx, 
-        key,
-        keylen,
-        size_needed,
-        &r,
-        &maybe_free
-        );
+    bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
     if (maybe_free) {
         toku_free(maybe_free);
     }
@@ -71,20 +65,24 @@ le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int va
     memcpy(r->u.clean.val, val, vallen);
 }
 
-static int
-long_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
     const long *CAST_FROM_VOIDP(x, a->data);
     const long *CAST_FROM_VOIDP(y, b->data);
     return (*x > *y) - (*x < *y);
 }
 
-static void
-test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
+static void test_serialize_leaf(int valsize,
+                                int nelts,
+                                double entropy,
+                                int ser_runs,
+                                int deser_runs) {
     //    struct ft_handle source_ft;
     struct ftnode *sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -102,7 +100,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
     MALLOC_N(sn->n_children, sn->bp);
     sn->pivotkeys.create_empty();
     for (int i = 0; i < sn->n_children; ++i) {
-        BP_STATE(sn,i) = PT_AVAIL;
+        BP_STATE(sn, i) = PT_AVAIL;
         set_BLB(sn, i, toku_create_empty_bn());
     }
     int nperbn = nelts / sn->n_children;
@@ -112,24 +110,19 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
             k = ck * nperbn + i;
             char buf[valsize];
             int c;
-            for (c = 0; c < valsize * entropy; ) {
-                int *p = (int *) &buf[c];
+            for (c = 0; c < valsize * entropy;) {
+                int *p = (int *)&buf[c];
                 *p = rand();
                 c += sizeof(*p);
             }
             memset(&buf[c], 0, valsize - c);
             le_add_to_bn(
-                BLB_DATA(sn,ck),
-                i,
-                (char *)&k, 
-                sizeof k, 
-                buf, 
-                sizeof buf
-                );
+                BLB_DATA(sn, ck), i, (char *)&k, sizeof k, buf, sizeof buf);
         }
         if (ck < 7) {
             DBT pivotkey;
-            sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
+            sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)),
+                                    ck);
         }
     }
 
@@ -139,31 +132,36 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(long_key_cmp, nullptr);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     struct timeval total_start;
@@ -176,8 +174,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         gettimeofday(&t[0], NULL);
         ndd = NULL;
         sn->dirty = 1;
-        r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
-        assert(r==0);
+        r = toku_serialize_ftnode_to(
+            fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
+        invariant(r == 0);
         gettimeofday(&t[1], NULL);
         total_start.tv_sec += t[0].tv_sec;
         total_start.tv_usec += t[0].tv_usec;
@@ -186,12 +185,14 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         toku_free(ndd);
     }
     double dt;
-    dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
+    dt = (total_end.tv_sec - total_start.tv_sec) +
+         ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
     dt *= 1000;
     dt /= ser_runs;
-    printf("serialize leaf(ms):   %0.05lf (average of %d runs)\n", dt, ser_runs);
+    printf(
+        "serialize leaf(ms):   %0.05lf (average of %d runs)\n", dt, ser_runs);
 
-    //reset 
+    // reset
     total_start.tv_sec = total_start.tv_usec = 0;
     total_end.tv_sec = total_end.tv_usec = 0;
 
@@ -200,8 +201,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         bfe.create_for_full_read(ft_h);
         gettimeofday(&t[0], NULL);
         FTNODE_DISK_DATA ndd2 = NULL;
-        r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
-        assert(r==0);
+        r = toku_deserialize_ftnode_from(
+            fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
+        invariant(r == 0);
         gettimeofday(&t[1], NULL);
 
         total_start.tv_sec += t[0].tv_sec;
@@ -212,35 +214,46 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         toku_ftnode_free(&dn);
         toku_free(ndd2);
     }
-    dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
+    dt = (total_end.tv_sec - total_start.tv_sec) +
+         ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
     dt *= 1000;
     dt /= deser_runs;
-    printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
-    printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n",
-           tokutime_to_seconds(bfe.io_time)*1000,
-           tokutime_to_seconds(bfe.decompress_time)*1000,
-           tokutime_to_seconds(bfe.deserialize_time)*1000,
-           deser_runs
-           );
+    printf(
+        "deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
+    printf(
+        "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
+        "(average of %d runs)\n",
+        tokutime_to_seconds(bfe.io_time) * 1000,
+        tokutime_to_seconds(bfe.decompress_time) * 1000,
+        tokutime_to_seconds(bfe.deserialize_time) * 1000,
+        deser_runs);
 
     toku_ftnode_free(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
+static void test_serialize_nonleaf(int valsize,
+                                   int nelts,
+                                   double entropy,
+                                   int ser_runs,
+                                   int deser_runs) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -257,11 +270,11 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     MALLOC_N(sn.n_children, sn.bp);
     sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_BLOCKNUM(&sn, i).b = 30 + (i*5);
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_BLOCKNUM(&sn, i).b = 30 + (i * 5);
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BNC(&sn, i, toku_create_empty_nl());
     }
-    //Create XIDS
+    // Create XIDS
     XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
@@ -276,14 +289,23 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
             k = ck * nperchild + i;
             char buf[valsize];
             int c;
-            for (c = 0; c < valsize * entropy; ) {
-                int *p = (int *) &buf[c];
+            for (c = 0; c < valsize * entropy;) {
+                int *p = (int *)&buf[c];
                 *p = rand();
                 c += sizeof(*p);
             }
             memset(&buf[c], 0, valsize - c);
 
-            toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp);
+            toku_bnc_insert_msg(bnc,
+                                &k,
+                                sizeof k,
+                                buf,
+                                valsize,
+                                FT_NONE,
+                                next_dummymsn(),
+                                xids_123,
+                                true,
+                                cmp);
         }
         if (ck < 7) {
             DBT pivotkey;
@@ -291,7 +313,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
         }
     }
 
-    //Cleanup:
+    // Cleanup:
     toku_xids_destroy(&xids_0);
     toku_xids_destroy(&xids_123);
     cmp.destroy();
@@ -302,65 +324,78 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(long_key_cmp, nullptr);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     struct timeval t[2];
     gettimeofday(&t[0], NULL);
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
-    assert(r==0);
+    r = toku_serialize_ftnode_to(
+        fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+    invariant(r == 0);
     gettimeofday(&t[1], NULL);
     double dt;
-    dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
+    dt = (t[1].tv_sec - t[0].tv_sec) +
+         ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
     dt *= 1000;
-    printf("serialize nonleaf(ms):   %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
+    printf(
+        "serialize nonleaf(ms):   %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
 
     ftnode_fetch_extra bfe;
     bfe.create_for_full_read(ft_h);
     gettimeofday(&t[0], NULL);
     FTNODE_DISK_DATA ndd2 = NULL;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
-    assert(r==0);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
+    invariant(r == 0);
     gettimeofday(&t[1], NULL);
-    dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
+    dt = (t[1].tv_sec - t[0].tv_sec) +
+         ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
     dt *= 1000;
-    printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
-    printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n",
-           tokutime_to_seconds(bfe.io_time)*1000,
-           tokutime_to_seconds(bfe.decompress_time)*1000,
-           tokutime_to_seconds(bfe.deserialize_time)*1000,
-           deser_runs
-           );
+    printf(
+        "deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
+    printf(
+        "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
+        "(IGNORED RUNS=%d)\n",
+        tokutime_to_seconds(bfe.io_time) * 1000,
+        tokutime_to_seconds(bfe.decompress_time) * 1000,
+        tokutime_to_seconds(bfe.deserialize_time) * 1000,
+        deser_runs);
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     ft_h->cmp.destroy();
@@ -369,17 +404,21 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     toku_free(ndd);
     toku_free(ndd2);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
     const int DEFAULT_RUNS = 5;
     long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS;
     double entropy = 0.3;
 
     if (argc != 3 && argc != 5) {
-        fprintf(stderr, "Usage: %s <valsize> <nelts> [<serialize_runs> <deserialize_runs>]\n", argv[0]);
+        fprintf(stderr,
+                "Usage: %s <valsize> <nelts> [<serialize_runs> "
+                "<deserialize_runs>]\n",
+                argv[0]);
         fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS);
         return 2;
     }
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
index 332aaa0c170..0cddaf19651 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
@@ -39,26 +39,20 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "test.h"
 #include "bndata.h"
 
-
-
 #ifndef MIN
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #endif
 
-static size_t
-le_add_to_bn(bn_data* bn, uint32_t idx, const  char *key, int keysize, const char *val, int valsize)
-{
+static size_t le_add_to_bn(bn_data *bn,
+                           uint32_t idx,
+                           const char *key,
+                           int keysize,
+                           const char *val,
+                           int valsize) {
     LEAFENTRY r = NULL;
     uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize);
     void *maybe_free = nullptr;
-    bn->get_space_for_insert(
-        idx,
-        key,
-        keysize,
-        size_needed,
-        &r,
-        &maybe_free
-        );
+    bn->get_space_for_insert(idx, key, keysize, size_needed, &r, &maybe_free);
     if (maybe_free) {
         toku_free(maybe_free);
     }
@@ -70,16 +64,19 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const  char *key, int keysize, const cha
 }
 
 class test_key_le_pair {
-    public:
+   public:
     uint32_t keylen;
-    char* keyp;
+    char *keyp;
     LEAFENTRY le;
 
     test_key_le_pair() : keylen(), keyp(), le() {}
     void init(const char *_keyp, const char *_val) {
         init(_keyp, strlen(_keyp) + 1, _val, strlen(_val) + 1);
     }
-    void init(const char * _keyp, uint32_t _keylen, const char*_val, uint32_t _vallen) {
+    void init(const char *_keyp,
+              uint32_t _keylen,
+              const char *_val,
+              uint32_t _vallen) {
         keylen = _keylen;
 
         CAST_FROM_VOIDP(le, toku_malloc(LE_CLEAN_MEMSIZE(_vallen)));
@@ -95,126 +92,144 @@ class test_key_le_pair {
     }
 };
 
-enum ftnode_verify_type {
-    read_all=1,
-    read_compressed,
-    read_none
-};
+enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
 
-static int
-string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
     char *CAST_FROM_VOIDP(s, a->data);
     char *CAST_FROM_VOIDP(t, b->data);
     return strcmp(s, t);
 }
 
-static void
-setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) {
+static void setup_dn(enum ftnode_verify_type bft,
+                     int fd,
+                     FT ft_h,
+                     FTNODE *dn,
+                     FTNODE_DISK_DATA *ndd) {
     int r;
     if (bft == read_all) {
         ftnode_fetch_extra bfe;
         bfe.create_for_full_read(ft_h);
-        r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
-        assert(r==0);
-    }
-    else if (bft == read_compressed || bft == read_none) {
+        r = toku_deserialize_ftnode_from(
+            fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe);
+        invariant(r == 0);
+    } else if (bft == read_compressed || bft == read_none) {
         ftnode_fetch_extra bfe;
         bfe.create_for_min_read(ft_h);
-        r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
-        assert(r==0);
-        // assert all bp's are compressed or on disk.
+        r = toku_deserialize_ftnode_from(
+            fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe);
+        invariant(r == 0);
+        // invariant all bp's are compressed or on disk.
         for (int i = 0; i < (*dn)->n_children; i++) {
-            assert(BP_STATE(*dn,i) == PT_COMPRESSED || BP_STATE(*dn, i) == PT_ON_DISK);
+            invariant(BP_STATE(*dn, i) == PT_COMPRESSED ||
+                   BP_STATE(*dn, i) == PT_ON_DISK);
         }
         // if read_none, get rid of the compressed bp's
         if (bft == read_none) {
             if ((*dn)->height == 0) {
-                toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-                // assert all bp's are on disk
+                toku_ftnode_pe_callback(*dn,
+                                        make_pair_attr(0xffffffff),
+                                        ft_h,
+                                        def_pe_finalize_impl,
+                                        nullptr);
+                // invariant all bp's are on disk
                 for (int i = 0; i < (*dn)->n_children; i++) {
                     if ((*dn)->height == 0) {
-                        assert(BP_STATE(*dn,i) == PT_ON_DISK);
-                        assert(is_BNULL(*dn, i));
-                    }
-                    else {
-                        assert(BP_STATE(*dn,i) == PT_COMPRESSED);
+                        invariant(BP_STATE(*dn, i) == PT_ON_DISK);
+                        invariant(is_BNULL(*dn, i));
+                    } else {
+                        invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
                     }
                 }
-            }
-            else {
+            } else {
                 // first decompress everything, and make sure
                 // that it is available
                 // then run partial eviction to get it compressed
                 PAIR_ATTR attr;
                 bfe.create_for_full_read(ft_h);
-                assert(toku_ftnode_pf_req_callback(*dn, &bfe));
+                invariant(toku_ftnode_pf_req_callback(*dn, &bfe));
                 r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
-                assert(r==0);
-                // assert all bp's are available
+                invariant(r == 0);
+                // invariant all bp's are available
                 for (int i = 0; i < (*dn)->n_children; i++) {
-                    assert(BP_STATE(*dn,i) == PT_AVAIL);
+                    invariant(BP_STATE(*dn, i) == PT_AVAIL);
                 }
-                toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+                toku_ftnode_pe_callback(*dn,
+                                        make_pair_attr(0xffffffff),
+                                        ft_h,
+                                        def_pe_finalize_impl,
+                                        nullptr);
                 for (int i = 0; i < (*dn)->n_children; i++) {
-                    // assert all bp's are still available, because we touched the clock
-                    assert(BP_STATE(*dn,i) == PT_AVAIL);
-                    // now assert all should be evicted
-                    assert(BP_SHOULD_EVICT(*dn, i));
+                    // invariant all bp's are still available, because we touched
+                    // the clock
+                    invariant(BP_STATE(*dn, i) == PT_AVAIL);
+                    // now invariant all should be evicted
+                    invariant(BP_SHOULD_EVICT(*dn, i));
                 }
-                toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+                toku_ftnode_pe_callback(*dn,
+                                        make_pair_attr(0xffffffff),
+                                        ft_h,
+                                        def_pe_finalize_impl,
+                                        nullptr);
                 for (int i = 0; i < (*dn)->n_children; i++) {
-                    assert(BP_STATE(*dn,i) == PT_COMPRESSED);
+                    invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
                 }
             }
         }
         // now decompress them
         bfe.create_for_full_read(ft_h);
-        assert(toku_ftnode_pf_req_callback(*dn, &bfe));
+        invariant(toku_ftnode_pf_req_callback(*dn, &bfe));
         PAIR_ATTR attr;
         r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
-        assert(r==0);
-        // assert all bp's are available
+        invariant(r == 0);
+        // invariant all bp's are available
         for (int i = 0; i < (*dn)->n_children; i++) {
-            assert(BP_STATE(*dn,i) == PT_AVAIL);
+            invariant(BP_STATE(*dn, i) == PT_AVAIL);
         }
         // continue on with test
-    }
-    else {
+    } else {
         // if we get here, this is a test bug, NOT a bug in development code
-        assert(false);
+        invariant(false);
     }
 }
 
-static void write_sn_to_disk(int fd, FT_HANDLE ft, FTNODE sn, FTNODE_DISK_DATA* src_ndd, bool do_clone) {
+static void write_sn_to_disk(int fd,
+                             FT_HANDLE ft,
+                             FTNODE sn,
+                             FTNODE_DISK_DATA *src_ndd,
+                             bool do_clone) {
     int r;
     if (do_clone) {
-        void* cloned_node_v = NULL;
+        void *cloned_node_v = NULL;
         PAIR_ATTR attr;
         long clone_size;
-        toku_ftnode_clone_callback(sn, &cloned_node_v, &clone_size, &attr, false, ft->ft);
+        toku_ftnode_clone_callback(
+            sn, &cloned_node_v, &clone_size, &attr, false, ft->ft);
         FTNODE CAST_FROM_VOIDP(cloned_node, cloned_node_v);
-        r = toku_serialize_ftnode_to(fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false);
-        assert(r==0);        
+        r = toku_serialize_ftnode_to(
+            fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false);
+        invariant(r == 0);
         toku_ftnode_free(&cloned_node);
-    }
-    else {
-        r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false);
-        assert(r==0);
+    } else {
+        r = toku_serialize_ftnode_to(
+            fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false);
+        invariant(r == 0);
     }
 }
 
-static void
-test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_check_msn(enum ftnode_verify_type bft,
+                                          bool do_clone) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
-#define PRESERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 42 })
-#define POSTSERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 84 })
+#define PRESERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 42})
+#define POSTSERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 84})
 
     sn.max_msn_applied_to_node_on_disk = PRESERIALIZE_MSN_ON_DISK;
     sn.flags = 0x11223344;
@@ -228,14 +243,14 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
     MALLOC_N(sn.n_children, sn.bp);
     DBT pivotkey;
     sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
     set_BLB(&sn, 0, toku_create_empty_bn());
     set_BLB(&sn, 1, toku_create_empty_bn());
     le_add_to_bn(BLB_DATA(&sn, 0), 0, "a", 2, "aval", 5);
     le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5);
     le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5);
-    BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN) { MIN_MSN.msn + 73 });
+    BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN){MIN_MSN.msn + 73});
     BLB_MAX_MSN_APPLIED(&sn, 1) = POSTSERIALIZE_MSN_ON_DISK;
 
     FT_HANDLE XMALLOC(ft);
@@ -244,30 +259,35 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
 
-    //Want to use block #20
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
     FTNODE_DISK_DATA dest_ndd = NULL;
@@ -276,16 +296,18 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
-    assert(dn->height == 0);
-    assert(dn->n_children>=1);
-    assert(dn->max_msn_applied_to_node_on_disk.msn == POSTSERIALIZE_MSN_ON_DISK.msn);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+    invariant(dn->height == 0);
+    invariant(dn->n_children >= 1);
+    invariant(dn->max_msn_applied_to_node_on_disk.msn ==
+           POSTSERIALIZE_MSN_ON_DISK.msn);
     {
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         test_key_le_pair elts[3];
         elts[0].init("a", "aval");
@@ -294,34 +316,41 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
         const uint32_t npartitions = dn->n_children;
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(BLB_MAX_MSN_APPLIED(dn, bn).msn == POSTSERIALIZE_MSN_ON_DISK.msn);
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(BLB_MAX_MSN_APPLIED(dn, bn).msn ==
+                   POSTSERIALIZE_MSN_ON_DISK.msn);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
-                assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0);
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(elts[last_i].le));
+                invariant(memcmp(curr_le,
+                              elts[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+                                  elts[last_i].keyp) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
-
         }
-        assert(last_i == 3);
+        invariant(last_i == 3);
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -329,17 +358,21 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft,
+                                                  bool do_clone) {
     int r;
     struct ftnode sn, *dn;
-    const int keylens = 256*1024, vallens = 0;
+    const int keylens = 256 * 1024, vallens = 0;
     const uint32_t nrows = 8;
-    // assert(val_size > BN_MAX_SIZE);  // BN_MAX_SIZE isn't visible
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    // invariant(val_size > BN_MAX_SIZE);  // BN_MAX_SIZE isn't visible
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
@@ -354,21 +387,27 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     MALLOC_N(sn.n_children, sn.bp);
     sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
     }
     for (uint32_t i = 0; i < nrows; ++i) {  // one basement per row
         char key[keylens], val[vallens];
-        key[keylens-1] = '\0';
+        key[keylens - 1] = '\0';
         char c = 'a' + i;
-        memset(key, c, keylens-1);
-        le_add_to_bn(BLB_DATA(&sn, i), 0, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
-        if (i < nrows-1) {
+        memset(key, c, keylens - 1);
+        le_add_to_bn(BLB_DATA(&sn, i),
+                     0,
+                     (char *)&key,
+                     sizeof(key),
+                     (char *)&val,
+                     sizeof(val));
+        if (i < nrows - 1) {
             uint32_t keylen;
-            void* curr_key;
+            void *curr_key;
             BLB_DATA(&sn, i)->fetch_key_and_len(0, &keylen, &curr_key);
             DBT pivotkey;
-            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), i);
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen),
+                                   i);
         }
     }
 
@@ -378,29 +417,34 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
     FTNODE_DISK_DATA dest_ndd = NULL;
@@ -408,55 +452,64 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone);
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
-    
-    assert(dn->blocknum.b==20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+    invariant(dn->blocknum.b == 20);
+
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
     {
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         test_key_le_pair *les = new test_key_le_pair[nrows];
         {
             char key[keylens], val[vallens];
-            key[keylens-1] = '\0';
+            key[keylens - 1] = '\0';
             for (uint32_t i = 0; i < nrows; ++i) {
                 char c = 'a' + i;
-                memset(key, c, keylens-1);
-                les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val));
+                memset(key, c, keylens - 1);
+                les[i].init(
+                    (char *)&key, sizeof(key), (char *)&val, sizeof(val));
             }
         }
         const uint32_t npartitions = dn->n_children;
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
-            assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+            invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
-                assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0);
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(les[last_i].le));
+                invariant(memcmp(curr_le,
+                              les[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+                                  les[last_i].keyp) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
         }
-        assert(last_i == nrows);
+        invariant(last_i == nrows);
         delete[] les;
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -464,15 +517,19 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft,
+                                               bool do_clone) {
     int r;
     struct ftnode sn, *dn;
-    const uint32_t nrows = 196*1024;
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    const uint32_t nrows = 196 * 1024;
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
@@ -487,14 +544,19 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
     XMALLOC_N(sn.n_children, sn.bp);
     sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
-        set_BLB(&sn, i, toku_create_empty_bn()); 
+        BP_STATE(&sn, i) = PT_AVAIL;
+        set_BLB(&sn, i, toku_create_empty_bn());
     }
     size_t total_size = 0;
     for (uint32_t i = 0; i < nrows; ++i) {
         uint32_t key = i;
         uint32_t val = i;
-        total_size += le_add_to_bn(BLB_DATA(&sn, 0), i, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
+        total_size += le_add_to_bn(BLB_DATA(&sn, 0),
+                                   i,
+                                   (char *)&key,
+                                   sizeof(key),
+                                   (char *)&val,
+                                   sizeof(val));
     }
 
     FT_HANDLE XMALLOC(ft);
@@ -503,30 +565,35 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -535,56 +602,66 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
     {
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         test_key_le_pair *les = new test_key_le_pair[nrows];
         {
             int key = 0, val = 0;
             for (uint32_t i = 0; i < nrows; ++i, key++, val++) {
-                les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val));
+                les[i].init(
+                    (char *)&key, sizeof(key), (char *)&val, sizeof(val));
             }
         }
         const uint32_t npartitions = dn->n_children;
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
-            assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+            invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
-                assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data);
-                    void* tmp = les[last_i].keyp;
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(les[last_i].le));
+                invariant(memcmp(curr_le,
+                              les[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    uint32_t *CAST_FROM_VOIDP(pivot,
+                                              dn->pivotkeys.get_pivot(bn).data);
+                    void *tmp = les[last_i].keyp;
                     uint32_t *CAST_FROM_VOIDP(item, tmp);
-                    assert(*pivot >= *item);
+                    invariant(*pivot >= *item);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
             // don't check soft_copy_is_up_to_date or seqinsert
-            assert(BLB_DATA(dn, bn)->get_disk_size() < 128*1024);  // BN_MAX_SIZE, apt to change
+            invariant(BLB_DATA(dn, bn)->get_disk_size() <
+                   128 * 1024);  // BN_MAX_SIZE, apt to change
         }
-        assert(last_i == nrows);
+        invariant(last_i == nrows);
         delete[] les;
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -592,19 +669,22 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-
-static void
-test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft,
+                                                bool do_clone) {
     int r;
     struct ftnode sn, *dn;
     const uint32_t nrows = 7;
     const size_t key_size = 8;
-    const size_t val_size = 512*1024;
-    // assert(val_size > BN_MAX_SIZE);  // BN_MAX_SIZE isn't visible
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    const size_t val_size = 512 * 1024;
+    // invariant(val_size > BN_MAX_SIZE);  // BN_MAX_SIZE isn't visible
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
@@ -615,21 +695,21 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
     sn.n_children = 1;
     sn.dirty = 1;
     sn.oldest_referenced_xid_known = TXNID_NONE;
-    
+
     MALLOC_N(sn.n_children, sn.bp);
     sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
     }
     for (uint32_t i = 0; i < nrows; ++i) {
         char key[key_size], val[val_size];
-        key[key_size-1] = '\0';
-        val[val_size-1] = '\0';
+        key[key_size - 1] = '\0';
+        val[val_size - 1] = '\0';
         char c = 'a' + i;
-        memset(key, c, key_size-1);
-        memset(val, c, val_size-1);
-        le_add_to_bn(BLB_DATA(&sn, 0), i,key, 8, val, val_size);
+        memset(key, c, key_size - 1);
+        memset(val, c, val_size - 1);
+        le_add_to_bn(BLB_DATA(&sn, 0), i, key, 8, val, val_size);
     }
 
     FT_HANDLE XMALLOC(ft);
@@ -638,30 +718,35 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -670,58 +755,66 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
     {
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         test_key_le_pair *les = new test_key_le_pair[nrows];
         {
             char key[key_size], val[val_size];
-            key[key_size-1] = '\0';
-            val[val_size-1] = '\0';
+            key[key_size - 1] = '\0';
+            val[val_size - 1] = '\0';
             for (uint32_t i = 0; i < nrows; ++i) {
                 char c = 'a' + i;
-                memset(key, c, key_size-1);
-                memset(val, c, val_size-1);
+                memset(key, c, key_size - 1);
+                memset(val, c, val_size - 1);
                 les[i].init(key, key_size, val, val_size);
             }
         }
         const uint32_t npartitions = dn->n_children;
-        assert(npartitions == nrows);
+        invariant(npartitions == nrows);
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
-            assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+            invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
-                assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0);
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(les[last_i].le));
+                invariant(memcmp(curr_le,
+                              les[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+                                  (char *)(les[last_i].keyp)) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
             // don't check soft_copy_is_up_to_date or seqinsert
         }
-        assert(last_i == 7);
+        invariant(last_i == 7);
         delete[] les;
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -729,15 +822,19 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-
-static void
-test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_empty_basement_nodes(
+    enum ftnode_verify_type bft,
+    bool do_clone) {
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -760,7 +857,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
     toku_fill_dbt(&pivotkeys[5], "x", 2);
     sn.pivotkeys.create_from_dbts(pivotkeys, 6);
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
         BLB_SEQINSERT(&sn, i) = 0;
     }
@@ -774,30 +871,35 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
     FTNODE_DISK_DATA dest_ndd = NULL;
@@ -805,17 +907,18 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
-    assert(dn->height == 0);
-    assert(dn->n_children>0);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+    invariant(dn->height == 0);
+    invariant(dn->n_children > 0);
     {
         test_key_le_pair elts[3];
 
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         elts[0].init("a", "aval");
         elts[1].init("b", "bval");
@@ -823,33 +926,39 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
         const uint32_t npartitions = dn->n_children;
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
-                assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0);
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(elts[last_i].le));
+                invariant(memcmp(curr_le,
+                              elts[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+                                  (char *)(elts[last_i].keyp)) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
-
         }
-        assert(last_i == 3);
+        invariant(last_i == 3);
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -857,14 +966,19 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_multiple_empty_basement_nodes(
+    enum ftnode_verify_type bft,
+    bool do_clone) {
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -884,7 +998,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
     toku_fill_dbt(&pivotkeys[2], "A", 2);
     sn.pivotkeys.create_from_dbts(pivotkeys, 3);
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
     }
 
@@ -894,30 +1008,35 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -926,29 +1045,31 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
-    assert(dn->height == 0);
-    assert(dn->n_children == 1);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+    invariant(dn->height == 0);
+    invariant(dn->n_children == 1);
     {
         const uint32_t npartitions = dn->n_children;
         for (uint32_t i = 0; i < npartitions; ++i) {
-            assert(dest_ndd[i].start > 0);
-            assert(dest_ndd[i].size  > 0);
+            invariant(dest_ndd[i].start > 0);
+            invariant(dest_ndd[i].size > 0);
             if (i > 0) {
-                assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size);
+                invariant(dest_ndd[i].start >=
+                       dest_ndd[i - 1].start + dest_ndd[i - 1].size);
             }
-            assert(BLB_DATA(dn, i)->num_klpairs() == 0);
+            invariant(BLB_DATA(dn, i)->num_klpairs() == 0);
         }
     }
-    
+
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -956,16 +1077,18 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-
-static void
-test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -984,11 +1107,11 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
     BP_BLOCKNUM(&sn, 0).b = 30;
     BP_BLOCKNUM(&sn, 1).b = 35;
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
     set_BNC(&sn, 0, toku_create_empty_nl());
     set_BNC(&sn, 1, toku_create_empty_nl());
-    //Create XIDS
+    // Create XIDS
     XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     XIDS xids_234;
@@ -1000,11 +1123,38 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     toku::comparator cmp;
     cmp.create(string_key_cmp, nullptr);
 
-    toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
-    toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
-    toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
-
-    //Cleanup:
+    toku_bnc_insert_msg(BNC(&sn, 0),
+                        "a",
+                        2,
+                        "aval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_0,
+                        true,
+                        cmp);
+    toku_bnc_insert_msg(BNC(&sn, 0),
+                        "b",
+                        2,
+                        "bval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_123,
+                        false,
+                        cmp);
+    toku_bnc_insert_msg(BNC(&sn, 1),
+                        "x",
+                        2,
+                        "xval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_234,
+                        true,
+                        cmp);
+
+    // Cleanup:
     toku_xids_destroy(&xids_0);
     toku_xids_destroy(&xids_123);
     toku_xids_destroy(&xids_234);
@@ -1016,31 +1166,36 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(string_key_cmp, nullptr);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
     FTNODE_DISK_DATA dest_ndd = NULL;
@@ -1048,30 +1203,31 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
-    assert(dn->height == 1);
-    assert(dn->n_children==2);
-    assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0);
-    assert(dn->pivotkeys.get_pivot(0).size==6);
-    assert(BP_BLOCKNUM(dn,0).b==30);
-    assert(BP_BLOCKNUM(dn,1).b==35);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+    invariant(dn->height == 1);
+    invariant(dn->n_children == 2);
+    invariant(strcmp((char *)dn->pivotkeys.get_pivot(0).data, "hello") == 0);
+    invariant(dn->pivotkeys.get_pivot(0).size == 6);
+    invariant(BP_BLOCKNUM(dn, 0).b == 30);
+    invariant(BP_BLOCKNUM(dn, 1).b == 35);
 
     message_buffer *src_msg_buffer1 = &BNC(&sn, 0)->msg_buffer;
     message_buffer *src_msg_buffer2 = &BNC(&sn, 1)->msg_buffer;
     message_buffer *dest_msg_buffer1 = &BNC(dn, 0)->msg_buffer;
     message_buffer *dest_msg_buffer2 = &BNC(dn, 1)->msg_buffer;
 
-    assert(src_msg_buffer1->equals(dest_msg_buffer1));
-    assert(src_msg_buffer2->equals(dest_msg_buffer2));
+    invariant(src_msg_buffer1->equals(dest_msg_buffer1));
+    invariant(src_msg_buffer2->equals(dest_msg_buffer2));
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
@@ -1080,11 +1236,12 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
     initialize_dummymsn();
 
     test_serialize_nonleaf(read_none, false);
@@ -1103,10 +1260,12 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
 
     test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, false);
     test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, false);
-    test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, false);
+    test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed,
+                                                           false);
     test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, true);
     test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, true);
-    test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, true);
+    test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed,
+                                                           true);
 
     test_serialize_leaf_with_empty_basement_nodes(read_none, false);
     test_serialize_leaf_with_empty_basement_nodes(read_all, false);
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
index 598a1cc7085..706bd94fbc3 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
@@ -164,17 +164,16 @@ static void  test_read_what_was_written (void) {
     int r;
     const int NVALS=10000;
 
-    if (verbose) printf("test_read_what_was_written(): "); fflush(stdout);
+    if (verbose) {
+        printf("test_read_what_was_written(): "); fflush(stdout);
+    }
 
     unlink(fname);
-    
 
     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);  assert(r==0);
     r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
-    toku_cachetable_close(&ct);
-
-    
+    toku_cachetable_close(&ct);    
 
     /* Now see if we can read an empty tree in. */
     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
@@ -189,8 +188,6 @@ static void  test_read_what_was_written (void) {
     r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
     toku_cachetable_close(&ct);
 
-    
-
     /* Now see if we can read it in and get the value. */
     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
diff --git a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
index 53973794eae..aeb5a897c48 100644
--- a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
@@ -109,7 +109,9 @@ static int run_test(void)
         r = pqueue_pop(pq, &node);   assert(r==0);
         if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data));
         if ( *(int*)(node->key->data) != i ) { 
-            if (verbose) printf("FAIL\n"); return -1; 
+            if (verbose)
+                printf("FAIL\n");
+            return -1;
         }
     }
     pqueue_free(pq);
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
index a78f787cdf2..f2004964862 100644
--- a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
@@ -793,7 +793,7 @@ static void test_le_garbage_collection_birdie(void) {
     do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200);
     invariant(do_garbage_collect);
 
-    // It is definately worth doing when the above case is true
+    // It is definitely worth doing when the above case is true
     // and there is more than one provisional entry.
     ule.num_cuxrs = 1;
     ule.num_puxrs = 2;
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
index 419af550545..71357a1e16a 100644
--- a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
@@ -72,7 +72,7 @@ static void dummy_update_status(FTNODE UU(child), int UU(dirtied), void* UU(extr
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
-static void test_oldest_referenced_xid_gets_propogated(void) {
+static void test_oldest_referenced_xid_gets_propagated(void) {
     int r;
     CACHETABLE ct;
     FT_HANDLE t;
@@ -166,7 +166,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
     toku_ft_flush_some_child(t->ft, node, &fa);
 
     // pin the child, verify that oldest referenced xid was
-    // propogated from parent to child during the flush
+    // propagated from parent to child during the flush
     toku_pin_ftnode(
         t->ft, 
         child_nonleaf_blocknum,
@@ -185,6 +185,6 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
 
 int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
     default_parse_args(argc, argv);
-    test_oldest_referenced_xid_gets_propogated();
+    test_oldest_referenced_xid_gets_propagated();
     return 0;
 }
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc
index 8aded3898c1..ea4f9374dc3 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h
+++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc
@@ -36,30 +36,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
 
-#pragma once
-
-#include <db.h>
-
-#include "ft/serialize/block_allocator.h"
-
-// Block allocation strategy implementations
-
-class block_allocator_strategy {
-public:
-    static struct block_allocator::blockpair *
-    first_fit(struct block_allocator::blockpair *blocks_array,
-              uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
-    static struct block_allocator::blockpair *
-    best_fit(struct block_allocator::blockpair *blocks_array,
-             uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
-    static struct block_allocator::blockpair *
-    padded_fit(struct block_allocator::blockpair *blocks_array,
-               uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
-    static struct block_allocator::blockpair *
-    heat_zone(struct block_allocator::blockpair *blocks_array,
-              uint64_t n_blocks, uint64_t size, uint64_t alignment,
-              uint64_t heat);
-};
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+static void test_insert_remove(void) {
+    uint64_t i;
+    MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+    verbose = 0;
+
+    tree->Insert({0, 100});
+
+    for (i = 0; i < 10; i++) {
+        tree->Remove(3);
+        tree->Remove(2);
+    }
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    for (i = 0; i < 10; i++) {
+        tree->Insert({5 * i, 3});
+    }
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    uint64_t offset = tree->Remove(2);
+    invariant(offset == 0);
+    offset = tree->Remove(10);
+    invariant(offset == 50);
+    offset = tree->Remove(3);
+    invariant(offset == 5);
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    tree->Insert({48, 2});
+    tree->Insert({50, 10});
+
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    tree->Insert({3, 7});
+    offset = tree->Remove(10);
+    invariant(offset == 2);
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+    tree->Dump();
+    delete tree;
+}
+
+int test_main(int argc, const char *argv[]) {
+    default_parse_args(argc, argv);
+
+    test_insert_remove();
+    if (verbose)
+        printf("test ok\n");
+    return 0;
+}
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
new file mode 100644
index 00000000000..85f29ce9813
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
@@ -0,0 +1,102 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+#define N 1000000
+std::vector<MhsRbTree::Node::BlockPair> input_vector;
+MhsRbTree::Node::BlockPair old_vector[N];
+
+static int myrandom(int i) { return std::rand() % i; }
+
+static void generate_random_input() {
+    std::srand(unsigned(std::time(0)));
+
+    // set some values:
+    for (uint64_t i = 1; i < N; ++i) {
+        input_vector.push_back({i, 0});
+        old_vector[i] = {i, 0};
+    }
+    // using built-in random generator:
+    std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom);
+}
+
+static void test_insert_remove(void) {
+    int i;
+    MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+    verbose = 0;
+    generate_random_input();
+    if (verbose) {
+        printf("\n we are going to insert the following block offsets\n");
+        for (i = 0; i < N; i++)
+            printf("%" PRIu64 "\t", input_vector[i]._offset.ToInt());
+    }
+    for (i = 0; i < N; i++) {
+        tree->Insert(input_vector[i]);
+        // tree->ValidateBalance();
+    }
+    tree->ValidateBalance();
+    MhsRbTree::Node::BlockPair *p_bps = &old_vector[0];
+    tree->ValidateInOrder(p_bps);
+    printf("min node of the tree:%" PRIu64 "\n",
+           rbn_offset(tree->MinNode()).ToInt());
+    printf("max node of the tree:%" PRIu64 "\n",
+           rbn_offset(tree->MaxNode()).ToInt());
+
+    for (i = 0; i < N; i++) {
+        // tree->ValidateBalance();
+        tree->RawRemove(input_vector[i]._offset.ToInt());
+    }
+
+    tree->Destroy();
+    delete tree;
+}
+
+int test_main(int argc, const char *argv[]) {
+    default_parse_args(argc, argv);
+
+    test_insert_remove();
+    if (verbose)
+        printf("test ok\n");
+    return 0;
+}
diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc
index 407116b983c..90eee1e580a 100644
--- a/storage/tokudb/PerconaFT/ft/txn/roll.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc
@@ -49,7 +49,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // functionality provided by roll.c is exposed by an autogenerated
 // header file, logheader.h
 //
-// this (poorly) explains the absense of "roll.h"
+// this (poorly) explains the absence of "roll.h"
 
 // these flags control whether or not we send commit messages for
 // various operations
diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
index df830afd0df..c9464c3ed60 100644
--- a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
@@ -169,7 +169,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
             txn->roll_info.spilled_rollback_head      = ROLLBACK_NONE; 
             txn->roll_info.spilled_rollback_tail      = ROLLBACK_NONE; 
         }
-        // if we're commiting a child rollback, put its entries into the parent
+        // if we're committing a child rollback, put its entries into the parent
         // by pinning both child and parent and then linking the child log entry
         // list to the end of the parent log entry list.
         if (txn_has_current_rollback_log(txn)) {
diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
index 68c94c2ad11..08d7c8874e5 100644
--- a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
@@ -59,21 +59,18 @@ rollback_log_destroy(ROLLBACK_LOG_NODE log) {
 
 // flush an ununused log to disk, by allocating a size 0 blocknum in
 // the blocktable
-static void
-toku_rollback_flush_unused_log(
-    ROLLBACK_LOG_NODE log,
-    BLOCKNUM logname,
-    int fd,
-    FT ft,
-    bool write_me,
-    bool keep_me,
-    bool for_checkpoint,
-    bool is_clone
-    )
-{
+static void toku_rollback_flush_unused_log(ROLLBACK_LOG_NODE log,
+                                           BLOCKNUM logname,
+                                           int fd,
+                                           FT ft,
+                                           bool write_me,
+                                           bool keep_me,
+                                           bool for_checkpoint,
+                                           bool is_clone) {
     if (write_me) {
         DISKOFF offset;
-        ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX);
+        ft->blocktable.realloc_on_disk(
+            logname, 0, &offset, ft, fd, for_checkpoint);
     }
     if (!keep_me && !is_clone) {
         toku_free(log);
diff --git a/storage/tokudb/PerconaFT/ft/ule.cc b/storage/tokudb/PerconaFT/ft/ule.cc
index ac393fbf179..e3dce6d27dd 100644
--- a/storage/tokudb/PerconaFT/ft/ule.cc
+++ b/storage/tokudb/PerconaFT/ft/ule.cc
@@ -587,8 +587,8 @@ bool toku_le_worth_running_garbage_collection(
 //                by new txns.
 //            2.) There is only one committed entry, but the outermost
 //                provisional entry is older than the oldest known referenced
-//                xid, so it must have commited. Therefor we can promote it to
-//                committed and get rid of the old commited entry.
+//                xid, so it must have committed. Therefor we can promote it to
+//                committed and get rid of the old committed entry.
     if (le->type != LE_MVCC) {
         return false;
     }
diff --git a/storage/tokudb/PerconaFT/portability/CMakeLists.txt b/storage/tokudb/PerconaFT/portability/CMakeLists.txt
index 9f84d9b03df..4793db63cc1 100644
--- a/storage/tokudb/PerconaFT/portability/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/portability/CMakeLists.txt
@@ -14,12 +14,11 @@ set(tokuportability_srcs
   )
 
 add_library(${LIBTOKUPORTABILITY} SHARED ${tokuportability_srcs})
-target_link_libraries(${LIBTOKUPORTABILITY} LINK_PRIVATE ${LIBJEMALLOC})
 target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
 
 add_library(tokuportability_static_conv STATIC ${tokuportability_srcs})
 set_target_properties(tokuportability_static_conv PROPERTIES POSITION_INDEPENDENT_CODE ON)
-set(tokuportability_source_libs tokuportability_static_conv ${LIBJEMALLOC} ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
+set(tokuportability_source_libs tokuportability_static_conv ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
 toku_merge_static_libs(${LIBTOKUPORTABILITY}_static ${LIBTOKUPORTABILITY}_static "${tokuportability_source_libs}")
 
 maybe_add_gcov_to_libraries(${LIBTOKUPORTABILITY} tokuportability_static_conv)
diff --git a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
index bc48e93937d..8e73c56a6c5 100644
--- a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
+++ b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
@@ -90,7 +90,13 @@ static bool check_huge_pages_in_practice(void)
 
     const long pagesize = 4096;
     const long n_pages = TWO_MB/pagesize;
+#ifdef __linux__
+    // On linux mincore is defined as mincore(void *, size_t, unsigned char *)
     unsigned char vec[n_pages];
+#else
+    // On BSD (OS X included) it is defined as mincore(void *, size_t, char *)
+    char vec[n_pages];
+#endif
     {
         int r = mincore(second, TWO_MB, vec);
         if (r!=0 && errno==ENOMEM) {
diff --git a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
index 880f9a3a9bb..dbbea974a49 100644
--- a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
+++ b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
@@ -64,7 +64,7 @@ int main(int argc, char *const argv[]) {
     if (verbose) printf("maxdata=%" PRIu64 " 0x%" PRIx64 "\n", maxdata, maxdata);
 
     // check the data size
-#if __x86_64__
+#if defined(__x86_64__) || defined(__aarch64__)
     assert(maxdata > (1ULL << 32));
 #elif __i386__
     assert(maxdata < (1ULL << 32));
diff --git a/storage/tokudb/PerconaFT/portability/toku_config.h.in b/storage/tokudb/PerconaFT/portability/toku_config.h.in
index e1412cc9e14..1a34bf1ef45 100644
--- a/storage/tokudb/PerconaFT/portability/toku_config.h.in
+++ b/storage/tokudb/PerconaFT/portability/toku_config.h.in
@@ -42,7 +42,6 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #cmakedefine TOKU_DEBUG_PARANOID 1
 #cmakedefine USE_VALGRIND 1
-
 #cmakedefine HAVE_ALLOCA_H 1
 #cmakedefine HAVE_ARPA_INET_H 1
 #cmakedefine HAVE_BYTESWAP_H 1
diff --git a/storage/tokudb/PerconaFT/portability/toku_time.h b/storage/tokudb/PerconaFT/portability/toku_time.h
index 11a3f3aa2b9..a1278ef0337 100644
--- a/storage/tokudb/PerconaFT/portability/toku_time.h
+++ b/storage/tokudb/PerconaFT/portability/toku_time.h
@@ -98,9 +98,17 @@ double tokutime_to_seconds(tokutime_t)  __attribute__((__visibility__("default")
 
 // Get the value of tokutime for right now.  We want this to be fast, so we expose the implementation as RDTSC.
 static inline tokutime_t toku_time_now(void) {
+#if defined(__x86_64__) || defined(__i386__)
     uint32_t lo, hi;
     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
     return (uint64_t)hi << 32 | lo;
+#elif defined (__aarch64__)
+    uint64_t result;
+    __asm __volatile__ ("mrs %[rt], cntvct_el0" : [rt] "=r" (result));
+    return result;
+#else
+#error No timer implementation for this platform
+#endif
 }
 
 static inline uint64_t toku_current_time_microsec(void) {
diff --git a/storage/tokudb/PerconaFT/src/indexer-internal.h b/storage/tokudb/PerconaFT/src/indexer-internal.h
index 48e62ee49b2..fdaa561e3d0 100644
--- a/storage/tokudb/PerconaFT/src/indexer-internal.h
+++ b/storage/tokudb/PerconaFT/src/indexer-internal.h
@@ -42,7 +42,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include <toku_pthread.h>
 
 // the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array.
-// the array is a resizeable array with max size "max_keys" and current size "current_keys".
+// the array is a resizable array with max size "max_keys" and current size "current_keys".
 // the ordered set is used by the hotindex undo function to collect the commit keys.
 struct indexer_commit_keys {
     int max_keys;        // max number of keys
diff --git a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
index 8d0b080b9fe..4c7f5336161 100644
--- a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
+++ b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
@@ -528,7 +528,7 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u
 }
 
 // inject "delete" message into ft with logging in recovery and rollback logs,
-// and making assocation between txn and ft
+// and making association between txn and ft
 static int 
 indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn) {
     int result = 0;
@@ -577,7 +577,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
 }
 
 // inject "insert" message into ft with logging in recovery and rollback logs,
-// and making assocation between txn and ft
+// and making association between txn and ft
 static int 
 indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn) {
     int result = 0;
diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
index 20df13923e6..7cce68e6ff8 100644
--- a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
+++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
@@ -1,3 +1,3 @@
-# commited insert
+# committed insert
 key k1
 insert committed 0 v100
diff --git a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
index 3f2f8d7455a..aaf77c503cc 100644
--- a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
+++ b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
@@ -51,7 +51,7 @@ int DISALLOW_PUTS=0;
 int COMPRESS=0;
 enum {MAGIC=311};
 
-bool dup_row_at_end = false; // false: duplicate at the begining.  true: duplicate at the end.   The duplicated row is row 0.
+bool dup_row_at_end = false; // false: duplicate at the beginning.  true: duplicate at the end.   The duplicated row is row 0.
 int  dup_row_id     = 0;     // 0 means to use row 1 if inserting at the end, row NUM_ROWS if inserting at the beginning.  Otherwise insert the row specified here.
 
 //
diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
index a4dc0ea9236..2c905c5ff12 100644
--- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
+++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
@@ -156,7 +156,7 @@ do_args(int argc, char * const argv[]) {
         choices[i] = -1;
     }
 
-    char c;
+    int c;
     while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:X:")) != -1) {
 	switch(c) {
         case 'v':
diff --git a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
index a2b48e443cd..48843a0bd32 100644
--- a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
+++ b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
@@ -166,7 +166,7 @@ run_test (void) {
 
         DB_BTREE_STAT64 s;
         r = db->stat64(db, NULL, &s); CKERR(r);
-        assert(s.bt_nkeys == 0);
+        assert(s.bt_nkeys == 1);
 
         r = db->close(db, 0);     CKERR(r);
 
@@ -176,7 +176,7 @@ run_test (void) {
         r = txn->commit(txn, 0);    CKERR(r);
 
         r = db->stat64(db, NULL, &s); CKERR(r);
-        assert(s.bt_nkeys == 0);
+        assert(s.bt_nkeys == 1);
     }
 
     // verify update callback overwrites the row
diff --git a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
index 8e5109cd2a9..f6111d4b67c 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
@@ -78,7 +78,7 @@ static void test_insert_many_gc(void) {
     // from having an MVCC stack of size 'N'. At the time of this
     // writing, we run full GC on leaf-inject when the leaf is
     // 32mb or larger. A good invariant is that the max LE size
-    // never grew larger than 35mb and that the max commited xr stack
+    // never grew larger than 35mb and that the max committed xr stack
     // length never exceeded 35
     const uint64_t le_max_memsize = get_engine_status_val(env, "LE_MAX_MEMSIZE");
     const uint64_t le_max_committed_xr = get_engine_status_val(env, "LE_MAX_COMMITTED_XR");
diff --git a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
index aaafe284906..88140dd1731 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
@@ -53,7 +53,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // This test is a micro stress test that does multithreaded updates on a fixed size table.
 // There is also a thread that scans the table with bulk fetch, ensuring the sum is zero.
 //
-// This test is targetted at stressing the locktree, hence the small table and many update threads.
+// This test is targeted at stressing the locktree, hence the small table and many update threads.
 //
 
 static int UU() lock_escalation_op(DB_TXN *UU(txn), ARG arg, void* operation_extra, void *UU(stats_extra)) {
diff --git a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
index fec454b8009..301eed1560e 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
@@ -123,7 +123,8 @@ test_main(int argc, char *const argv[]) {
             continue;
         }
     }
-    if (verbose>0) printf("%s", __FILE__); if (verbose>1) printf("\n");
+    if (verbose>0) printf("%s", __FILE__);
+    if (verbose>1) printf("\n");
     for (i=1; i<100; i++) 
         test_txn_abort(i);
     if (verbose>1) printf("%s OK\n", __FILE__);
diff --git a/storage/tokudb/PerconaFT/src/ydb-internal.h b/storage/tokudb/PerconaFT/src/ydb-internal.h
index 462a2a3d861..2d6c84126e1 100644
--- a/storage/tokudb/PerconaFT/src/ydb-internal.h
+++ b/storage/tokudb/PerconaFT/src/ydb-internal.h
@@ -114,7 +114,7 @@ struct __toku_db_env_internal {
 
     char *real_data_dir;                                // data dir used when the env is opened (relative to cwd, or absolute with leading /)
     char *real_log_dir;                                 // log dir used when the env is opened  (relative to cwd, or absolute with leading /)
-    char *real_tmp_dir;                                 // tmp dir used for temporary files (relative to cwd, or absoulte with leading /)
+    char *real_tmp_dir;                                 // tmp dir used for temporary files (relative to cwd, or absolute with leading /)
 
     fs_redzone_state fs_state;
     uint64_t fs_seq;                                    // how many times has fs_poller run?
diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
index da833146088..7501b1bee01 100644
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
-#   Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2009-04-27'
+timestamp='2016-06-22'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@ timestamp='2009-04-27'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -27,16 +25,16 @@ timestamp='2009-04-27'
 # the same distribution terms that you use for the rest of that program.
 
 
-# Originally written by Per Bothner <per@bothner.com>.
-# Please send patches to <config-patches@gnu.org>.  Submit a context
-# diff and a properly formatted ChangeLog entry.
+# Originally written by Per Bothner.  Please send patches (context
+# diff format) to <config-patches@gnu.org> and include a ChangeLog
+# entry.
 #
 # This script attempts to guess a canonical system name similar to
 # config.sub.  If it succeeds, it prints the system name on stdout, and
 # exits with 0.  Otherwise, it exits with 1.
 #
-# The plan is that this can be called by configure scripts if you
-# don't specify an explicit build system type.
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
 
 me=`echo "$0" | sed -e 's,.*/,,'`
 
@@ -56,8 +54,9 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -144,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
 	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
 	# switched to ELF, *-*-netbsd* would select the old
 	# object file format.  This provides both forward
@@ -170,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
 		eval $set_cc_for_build
 		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
-			| grep __ELF__ >/dev/null
+			| grep -q __ELF__
 		then
 		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
 		    # Return netbsd for either.  FIX?
@@ -180,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		fi
 		;;
 	    *)
-	        os=netbsd
+		os=netbsd
 		;;
 	esac
 	# The OS release
@@ -223,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
@@ -269,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
 	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	exit ;;
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
     Alpha\ *:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# Should we change UNAME_MACHINE based on the output of uname instead
@@ -295,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo s390-ibm-zvmoe
 	exit ;;
     *:OS400:*:*)
-        echo powerpc-ibm-os400
+	echo powerpc-ibm-os400
 	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
@@ -333,6 +335,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
 	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
 	exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+	echo i386-pc-auroraux${UNAME_RELEASE}
+	exit ;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
 	eval $set_cc_for_build
 	SUN_ARCH="i386"
@@ -391,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
 	echo m68k-atari-mint${UNAME_RELEASE}
-        exit ;;
+	exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-        echo m68k-milan-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-milan-mint${UNAME_RELEASE}
+	exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-        echo m68k-hades-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-hades-mint${UNAME_RELEASE}
+	exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-        echo m68k-unknown-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-unknown-mint${UNAME_RELEASE}
+	exit ;;
     m68k:machten:*:*)
 	echo m68k-apple-machten${UNAME_RELEASE}
 	exit ;;
@@ -477,8 +482,8 @@ EOF
 	echo m88k-motorola-sysv3
 	exit ;;
     AViiON:dgux:*:*)
-        # DG/UX returns AViiON for all architectures
-        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
 	then
 	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@@ -491,7 +496,7 @@ EOF
 	else
 	    echo i586-dg-dgux${UNAME_RELEASE}
 	fi
- 	exit ;;
+	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
 	echo m88k-dolphin-sysv3
 	exit ;;
@@ -548,7 +553,7 @@ EOF
 		echo rs6000-ibm-aix3.2
 	fi
 	exit ;;
-    *:AIX:*:[456])
+    *:AIX:*:[4567])
 	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
@@ -591,52 +596,52 @@ EOF
 	    9000/[678][0-9][0-9])
 		if [ -x /usr/bin/getconf ]; then
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-                    case "${sc_cpu_version}" in
-                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
-                      532)                      # CPU_PA_RISC2_0
-                        case "${sc_kernel_bits}" in
-                          32) HP_ARCH="hppa2.0n" ;;
-                          64) HP_ARCH="hppa2.0w" ;;
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "${sc_cpu_version}" in
+		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "${sc_kernel_bits}" in
+			  32) HP_ARCH="hppa2.0n" ;;
+			  64) HP_ARCH="hppa2.0w" ;;
 			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
-                        esac ;;
-                    esac
+			esac ;;
+		    esac
 		fi
 		if [ "${HP_ARCH}" = "" ]; then
 		    eval $set_cc_for_build
-		    sed 's/^              //' << EOF >$dummy.c
+		    sed 's/^		//' << EOF >$dummy.c
 
-              #define _HPUX_SOURCE
-              #include <stdlib.h>
-              #include <unistd.h>
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
 
-              int main ()
-              {
-              #if defined(_SC_KERNEL_BITS)
-                  long bits = sysconf(_SC_KERNEL_BITS);
-              #endif
-                  long cpu  = sysconf (_SC_CPU_VERSION);
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
 
-                  switch (cpu)
-              	{
-              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-              	case CPU_PA_RISC2_0:
-              #if defined(_SC_KERNEL_BITS)
-              	    switch (bits)
-              		{
-              		case 64: puts ("hppa2.0w"); break;
-              		case 32: puts ("hppa2.0n"); break;
-              		default: puts ("hppa2.0"); break;
-              		} break;
-              #else  /* !defined(_SC_KERNEL_BITS) */
-              	    puts ("hppa2.0"); break;
-              #endif
-              	default: puts ("hppa1.0"); break;
-              	}
-                  exit (0);
-              }
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
 EOF
 		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
@@ -656,7 +661,7 @@ EOF
 	    # => hppa64-hp-hpux11.23
 
 	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
-		grep __LP64__ >/dev/null
+		grep -q __LP64__
 	    then
 		HP_ARCH="hppa2.0w"
 	    else
@@ -727,22 +732,22 @@ EOF
 	exit ;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
 	echo c1-convex-bsd
-        exit ;;
+	exit ;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-        exit ;;
+	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
 	echo c34-convex-bsd
-        exit ;;
+	exit ;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
 	echo c38-convex-bsd
-        exit ;;
+	exit ;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
 	echo c4-convex-bsd
-        exit ;;
+	exit ;;
     CRAY*Y-MP:*:*:*)
 	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
@@ -766,14 +771,14 @@ EOF
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
 	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit ;;
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
     5000:UNIX_System_V:4.*:*)
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
-        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
 	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
@@ -785,13 +790,12 @@ EOF
 	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
 	exit ;;
     *:FreeBSD:*:*)
-	case ${UNAME_MACHINE} in
-	    pc98)
-		echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case ${UNAME_PROCESSOR} in
 	    amd64)
 		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	    *)
-		echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	esac
 	exit ;;
     i*:CYGWIN*:*)
@@ -800,19 +804,22 @@ EOF
     *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
 	exit ;;
+    i*:MSYS*:*)
+	echo ${UNAME_MACHINE}-pc-msys
+	exit ;;
     i*:windows32*:*)
-    	# uname -m includes "-pc" on this system.
-    	echo ${UNAME_MACHINE}-mingw32
+	# uname -m includes "-pc" on this system.
+	echo ${UNAME_MACHINE}-mingw32
 	exit ;;
     i*:PW*:*)
 	echo ${UNAME_MACHINE}-pc-pw32
 	exit ;;
-    *:Interix*:[3456]*)
-    	case ${UNAME_MACHINE} in
+    *:Interix*:*)
+	case ${UNAME_MACHINE} in
 	    x86)
 		echo i586-pc-interix${UNAME_RELEASE}
 		exit ;;
-	    EM64T | authenticamd | genuineintel)
+	    authenticamd | genuineintel | EM64T)
 		echo x86_64-unknown-interix${UNAME_RELEASE}
 		exit ;;
 	    IA64)
@@ -822,6 +829,9 @@ EOF
     [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
 	echo i${UNAME_MACHINE}-pc-mks
 	exit ;;
+    8664:Windows_NT:*)
+	echo x86_64-pc-mks
+	exit ;;
     i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
@@ -851,6 +861,27 @@ EOF
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
 	exit ;;
+    aarch64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+	esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	exit ;;
     arm*:Linux:*:*)
 	eval $set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
@@ -858,20 +889,40 @@ EOF
 	then
 	    echo ${UNAME_MACHINE}-unknown-linux-gnu
 	else
-	    echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    else
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     cris:Linux:*:*)
-	echo cris-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     crisv32:Linux:*:*)
-	echo crisv32-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     frv:Linux:*:*)
-    	echo frv-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    hexagon:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    i*86:Linux:*:*)
+	LIBC=gnu
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#ifdef __dietlibc__
+	LIBC=dietlibc
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+	echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
 	exit ;;
     ia64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -882,78 +933,34 @@ EOF
     m68*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
-    mips:Linux:*:*)
+    mips:Linux:*:* | mips64:Linux:*:*)
 	eval $set_cc_for_build
 	sed 's/^	//' << EOF >$dummy.c
 	#undef CPU
-	#undef mips
-	#undef mipsel
+	#undef ${UNAME_MACHINE}
+	#undef ${UNAME_MACHINE}el
 	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=mipsel
+	CPU=${UNAME_MACHINE}el
 	#else
 	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=mips
+	CPU=${UNAME_MACHINE}
 	#else
 	CPU=
 	#endif
 	#endif
 EOF
-	eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
-	    /^CPU/{
-		s: ::g
-		p
-	    }'`"
-	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
-	;;
-    mips64:Linux:*:*)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#undef CPU
-	#undef mips64
-	#undef mips64el
-	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=mips64el
-	#else
-	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=mips64
-	#else
-	CPU=
-	#endif
-	#endif
-EOF
-	eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
-	    /^CPU/{
-		s: ::g
-		p
-	    }'`"
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
 	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
 	;;
     or32:Linux:*:*)
-	echo or32-unknown-linux-gnu
-	exit ;;
-    ppc:Linux:*:*)
-	echo powerpc-unknown-linux-gnu
-	exit ;;
-    ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-gnu
-	exit ;;
-    alpha:Linux:*:*)
-	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
-	  EV5)   UNAME_MACHINE=alphaev5 ;;
-	  EV56)  UNAME_MACHINE=alphaev56 ;;
-	  PCA56) UNAME_MACHINE=alphapca56 ;;
-	  PCA57) UNAME_MACHINE=alphapca56 ;;
-	  EV6)   UNAME_MACHINE=alphaev6 ;;
-	  EV67)  UNAME_MACHINE=alphaev67 ;;
-	  EV68*) UNAME_MACHINE=alphaev68 ;;
-        esac
-	objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
-	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     padre:Linux:*:*)
 	echo sparc-unknown-linux-gnu
 	exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-gnu
+	exit ;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
 	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
@@ -962,14 +969,17 @@ EOF
 	  *)    echo hppa-unknown-linux-gnu ;;
 	esac
 	exit ;;
-    parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-gnu
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-gnu
+	exit ;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-gnu
 	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
 	echo ${UNAME_MACHINE}-ibm-linux
 	exit ;;
     sh64*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     sh*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -977,75 +987,18 @@ EOF
     sparc:Linux:*:* | sparc64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
+    tile*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     vax:Linux:*:*)
 	echo ${UNAME_MACHINE}-dec-linux-gnu
 	exit ;;
     x86_64:Linux:*:*)
-	echo x86_64-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     xtensa*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
-    i*86:Linux:*:*)
-	# The BFD linker knows what the default object file format is, so
-	# first see if it will tell us. cd to the root directory to prevent
-	# problems with other programs or directories called `ld' in the path.
-	# Set LC_ALL=C to ensure ld outputs messages in English.
-	ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
-			 | sed -ne '/supported targets:/!d
-				    s/[ 	][ 	]*/ /g
-				    s/.*supported targets: *//
-				    s/ .*//
-				    p'`
-        case "$ld_supported_targets" in
-	  elf32-i386)
-		TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
-		;;
-	  a.out-i386-linux)
-		echo "${UNAME_MACHINE}-pc-linux-gnuaout"
-		exit ;;
-	  "")
-		# Either a pre-BFD a.out linker (linux-gnuoldld) or
-		# one that does not give us useful --help.
-		echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
-		exit ;;
-	esac
-	# Determine whether the default compiler is a.out or elf
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#include <features.h>
-	#ifdef __ELF__
-	# ifdef __GLIBC__
-	#  if __GLIBC__ >= 2
-	LIBC=gnu
-	#  else
-	LIBC=gnulibc1
-	#  endif
-	# else
-	LIBC=gnulibc1
-	# endif
-	#else
-	#if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-	LIBC=gnu
-	#else
-	LIBC=gnuaout
-	#endif
-	#endif
-	#ifdef __dietlibc__
-	LIBC=dietlibc
-	#endif
-EOF
-	eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
-	    /^LIBC/{
-		s: ::g
-		p
-	    }'`"
-	test x"${LIBC}" != x && {
-		echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
-		exit
-	}
-	test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
-	;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
 	# earlier versions are messed up and put the nodename in both
@@ -1053,11 +1006,11 @@ EOF
 	echo i386-sequent-sysv4
 	exit ;;
     i*86:UNIX_SV:4.2MP:2.*)
-        # Unixware is an offshoot of SVR4, but it has its own version
-        # number series starting with 2...
-        # I am not positive that other SVR4 systems won't match this,
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
-        # Use sysv4.2uw... so that sysv4* matches it.
+	# Use sysv4.2uw... so that sysv4* matches it.
 	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
 	exit ;;
     i*86:OS/2:*:*)
@@ -1074,7 +1027,7 @@ EOF
     i*86:syllable:*:*)
 	echo ${UNAME_MACHINE}-pc-syllable
 	exit ;;
-    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
 	echo i386-unknown-lynxos${UNAME_RELEASE}
 	exit ;;
     i*86:*DOS:*:*)
@@ -1089,7 +1042,7 @@ EOF
 	fi
 	exit ;;
     i*86:*:5:[678]*)
-    	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
 	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
@@ -1117,13 +1070,13 @@ EOF
 	exit ;;
     pc:*:*:*)
 	# Left here for compatibility:
-        # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i586.
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
 	# Note: whatever this is, it MUST be the same as what config.sub
 	# prints for the "djgpp" host, or else GDB configury will decide that
 	# this is a cross-build.
 	echo i586-pc-msdosdjgpp
-        exit ;;
+	exit ;;
     Intel:Mach:3*:*)
 	echo i386-pc-mach3
 	exit ;;
@@ -1158,8 +1111,8 @@ EOF
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
 	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && { echo i486-ncr-sysv4; exit; } ;;
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
@@ -1182,7 +1135,7 @@ EOF
     rs6000:LynxOS:2.*:*)
 	echo rs6000-unknown-lynxos${UNAME_RELEASE}
 	exit ;;
-    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
 	echo powerpc-unknown-lynxos${UNAME_RELEASE}
 	exit ;;
     SM[BE]S:UNIX_SV:*:*)
@@ -1202,10 +1155,10 @@ EOF
 		echo ns32k-sni-sysv
 	fi
 	exit ;;
-    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                      # says <Richard.M.Bartel@ccMail.Census.GOV>
-        echo i586-unisys-sysv4
-        exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel@ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes@openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
@@ -1231,11 +1184,11 @@ EOF
 	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if [ -d /usr/nec ]; then
-	        echo mips-nec-sysv${UNAME_RELEASE}
+		echo mips-nec-sysv${UNAME_RELEASE}
 	else
-	        echo mips-unknown-sysv${UNAME_RELEASE}
+		echo mips-unknown-sysv${UNAME_RELEASE}
 	fi
-        exit ;;
+	exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
 	echo powerpc-be-beos
 	exit ;;
@@ -1275,6 +1228,16 @@ EOF
     *:Darwin:*:*)
 	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
 	case $UNAME_PROCESSOR in
+	    i386)
+		eval $set_cc_for_build
+		if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+		  if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		      (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		      grep IS_64BIT_ARCH >/dev/null
+		  then
+		      UNAME_PROCESSOR="x86_64"
+		  fi
+		fi ;;
 	    unknown) UNAME_PROCESSOR=powerpc ;;
 	esac
 	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
@@ -1290,6 +1253,9 @@ EOF
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit ;;
+    NEO-?:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk${UNAME_RELEASE}
+	exit ;;
     NSE-?:NONSTOP_KERNEL:*:*)
 	echo nse-tandem-nsk${UNAME_RELEASE}
 	exit ;;
@@ -1335,13 +1301,13 @@ EOF
 	echo pdp10-unknown-its
 	exit ;;
     SEI:*:*:SEIUX)
-        echo mips-sei-seiux${UNAME_RELEASE}
+	echo mips-sei-seiux${UNAME_RELEASE}
 	exit ;;
     *:DragonFly:*:*)
 	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
 	exit ;;
     *:*VMS:*:*)
-    	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
 	case "${UNAME_MACHINE}" in
 	    A*) echo alpha-dec-vms ; exit ;;
 	    I*) echo ia64-dec-vms ; exit ;;
@@ -1359,6 +1325,9 @@ EOF
     i*86:AROS:*:*)
 	echo ${UNAME_MACHINE}-pc-aros
 	exit ;;
+    x86_64:VMkernel:*:*)
+	echo ${UNAME_MACHINE}-unknown-esx
+	exit ;;
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1381,11 +1350,11 @@ main ()
 #include <sys/param.h>
   printf ("m68k-sony-newsos%s\n",
 #ifdef NEWSOS4
-          "4"
+	"4"
 #else
-	  ""
+	""
 #endif
-         ); exit (0);
+	); exit (0);
 #endif
 #endif
 
diff --git a/storage/tokudb/PerconaFT/tools/CMakeLists.txt b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
index af82b4357d2..f11b9f350d7 100644
--- a/storage/tokudb/PerconaFT/tools/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
@@ -1,6 +1,6 @@
 set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO)
 
-set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify ba_replay)
+set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify)
 foreach(tool ${tools})
   add_executable(${tool} ${tool}.cc)
   add_dependencies(${tool} install_tdb_h)
@@ -14,4 +14,3 @@ target_link_libraries(ftverify m)
 
 install(TARGETS tokuftdump      DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
 install(TARGETS tokuft_logprint DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
-
diff --git a/storage/tokudb/PerconaFT/tools/ba_replay.cc b/storage/tokudb/PerconaFT/tools/ba_replay.cc
deleted file mode 100644
index cade7e5dfaf..00000000000
--- a/storage/tokudb/PerconaFT/tools/ba_replay.cc
+++ /dev/null
@@ -1,629 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License, version 2,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License, version 3,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-// Replay a block allocator trace against different strategies and compare
-// the results
-
-#include <db.h>
-
-#include <getopt.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <map>
-#include <set>
-#include <string>
-#include <sstream>
-#include <vector>
-
-#include <portability/memory.h>
-#include <portability/toku_assert.h>
-#include <portability/toku_stdlib.h>
-
-#include "ft/serialize/block_allocator.h"
-
-using std::map;
-using std::set;
-using std::string;
-using std::vector;
-
-static int verbose = false;
-
-static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
-    if (!pred) {
-        fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line);
-        abort();
-    }
-}
-
-static char *trim_whitespace(char *line) {
-    // skip leading whitespace
-    while (isspace(*line)) {
-        line++;
-    }
-    return line;
-}
-
-static int64_t parse_number(char **ptr, int line_num, int base) {
-    *ptr = trim_whitespace(*ptr);
-    char *line = *ptr;
-
-    char *new_ptr;
-    int64_t n = strtoll(line, &new_ptr, base);
-    ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num);
-    ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num);
-    *ptr = new_ptr;
-    return n;
-}
-
-static uint64_t parse_uint64(char **ptr, int line_num) {
-    int64_t n = parse_number(ptr, line_num, 10);
-    // we happen to know that the uint64's we deal with will
-    // take less than 63 bits (they come from pointers)
-    return static_cast<uint64_t>(n);
-}
-
-static string parse_token(char **ptr, int line_num) {
-    *ptr = trim_whitespace(*ptr);
-    char *line = *ptr;
-
-    // parse the first token, which represents the traced function
-    char token[64];
-    int r = sscanf(*ptr, "%64s", token);
-    ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num);
-    *ptr += strlen(token);
-    return string(token);
-}
-
-static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
-    *ptr = trim_whitespace(*ptr);
-    char *line = *ptr;
-
-    uint64_t offset, size;
-    int bytes_read;
-    int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read);
-    ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num);
-    *ptr += bytes_read;
-    return block_allocator::blockpair(offset, size);
-}
-
-static char *strip_newline(char *line, bool *found) {
-    char *ptr = strchr(line, '\n');
-    if (ptr != nullptr) {
-        if (found != nullptr) {
-            *found = true;
-        }
-        *ptr = '\0';
-    }
-    return line;
-}
-
-static char *read_trace_line(FILE *file) {
-    const int buf_size = 4096;
-    char buf[buf_size];
-    std::stringstream ss;
-    while (true) {
-        if (fgets(buf, buf_size, file) == nullptr) {
-            break;
-        }
-        bool has_newline = false;
-        ss << strip_newline(buf, &has_newline);
-        if (has_newline) {
-            // end of the line, we're done out
-            break;
-        }
-    }
-    std::string s = ss.str();
-    return s.size() ? toku_strdup(s.c_str()) : nullptr;
-}
-
-static vector<string> canonicalize_trace_from(FILE *file) {
-    // new trace, canonicalized from a raw trace
-    vector<string> canonicalized_trace;
-
-    // raw allocator id -> canonical allocator id
-    //
-    // keeps track of allocators that were created as part of the trace,
-    // and therefore will be part of the canonicalized trace.
-    uint64_t allocator_id_seq_num = 0;
-    map<uint64_t, uint64_t> allocator_ids;
-
-    // allocated offset -> allocation seq num
-    //
-    uint64_t allocation_seq_num = 0;
-    static const uint64_t ASN_NONE = (uint64_t) -1;
-    typedef map<uint64_t, uint64_t> offset_seq_map;
-
-    // raw allocator id -> offset_seq_map that tracks its allocations
-    map<uint64_t, offset_seq_map> offset_to_seq_num_maps;
-
-    int line_num = 0;
-    char *line;
-    while ((line = read_trace_line(file)) != nullptr) {
-        line_num++;
-        char *ptr = line;
-
-        string fn = parse_token(&ptr, line_num);
-        int64_t allocator_id = parse_number(&ptr, line_num, 16);
-
-        std::stringstream ss;
-        if (fn.find("ba_trace_create") != string::npos) {
-            ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
-            ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs",
-                             "corrupted trace: bad fn", line, line_num);
-
-            // we only convert the allocator_id to an allocator_id_seq_num
-            // in the canonical trace and leave the rest of the line as-is.
-            allocator_ids[allocator_id] = allocator_id_seq_num;
-            ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl;
-            allocator_id_seq_num++;
-
-            // First, read passed the reserve / alignment values.
-            (void) parse_uint64(&ptr, line_num);
-            (void) parse_uint64(&ptr, line_num);
-            if (fn == "ba_trace_create_from_blockpairs") {
-                // For each blockpair created by this traceline, add its offset to the offset seq map
-                // with asn ASN_NONE so that later canonicalizations of `free' know whether to write
-                // down the asn or the raw offset.
-                offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
-                while (*trim_whitespace(ptr) != '\0') {
-                    const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
-                    (*map)[bp.offset] = ASN_NONE;
-                }
-            }
-        } else {
-            ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num);
-            uint64_t canonical_allocator_id = allocator_ids[allocator_id];
-
-            // this is the map that tracks allocations for this allocator
-            offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
-
-            if (fn == "ba_trace_alloc") {
-                const uint64_t size = parse_uint64(&ptr, line_num);
-                const uint64_t heat = parse_uint64(&ptr, line_num);
-                const uint64_t offset = parse_uint64(&ptr, line_num);
-                ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num);
-
-                // remember that an allocation at `offset' has the current alloc seq num
-                (*map)[offset] = allocation_seq_num;
-
-                // translate `offset = alloc(size)' to `asn = alloc(size)'
-                ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl;
-                allocation_seq_num++;
-            } else if (fn == "ba_trace_free") {
-                const uint64_t offset = parse_uint64(&ptr, line_num);
-                ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num);
-
-                // get the alloc seq num for an allcation that occurred at `offset'
-                const uint64_t asn = (*map)[offset];
-                map->erase(offset);
-
-                // if there's an asn, then a corresponding ba_trace_alloc occurred and we should
-                // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs
-                // and we write the original offset.
-                if (asn != ASN_NONE) {
-                    ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
-                } else {
-                    ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl;
-                }
-            } else if (fn == "ba_trace_destroy") {
-                // Remove this allocator from both maps
-                allocator_ids.erase(allocator_id);
-                offset_to_seq_num_maps.erase(allocator_id);
-
-                // translate `destroy(ptr_id) to destroy(canonical_id)'
-                ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl;
-            } else {
-                ba_replay_assert(false, "corrupted trace: bad fn", line, line_num);
-            }
-        }
-        canonicalized_trace.push_back(ss.str());
-
-        toku_free(line);
-    }
-
-    if (allocator_ids.size() != 0) {
-        fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running");
-    }
-
-    return canonicalized_trace;
-}
-
-struct streaming_variance_calculator {
-    int64_t n_samples;
-    int64_t mean;
-    int64_t variance;
-
-    // math credit: AoCP, Donald Knuth, '62
-    void add_sample(int64_t x) {
-        n_samples++;
-        if (n_samples == 1) {
-            mean = x;
-            variance = 0;
-        } else {
-            int64_t old_mean = mean;
-            mean = old_mean + ((x - old_mean) / n_samples);
-            variance = (((n_samples - 1) * variance) +
-                        ((x - old_mean) * (x - mean))) / n_samples;
-        }
-    }
-};
-
-struct canonical_trace_stats {
-    uint64_t n_lines_replayed;
-
-    uint64_t n_create;
-    uint64_t n_create_from_blockpairs;
-    uint64_t n_alloc_hot;
-    uint64_t n_alloc_cold;
-    uint64_t n_free;
-    uint64_t n_destroy;
-
-    struct streaming_variance_calculator alloc_hot_bytes;
-    struct streaming_variance_calculator alloc_cold_bytes;
-
-    canonical_trace_stats() {
-        memset(this, 0, sizeof(*this));
-    }
-};
-
-struct fragmentation_report {
-    TOKU_DB_FRAGMENTATION_S beginning;
-    TOKU_DB_FRAGMENTATION_S end;
-    fragmentation_report() {
-        memset(this, 0, sizeof(*this));
-    }
-    void merge(const struct fragmentation_report &src_report) {
-        for (int i = 0; i < 2; i++) {
-            TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end;
-            const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end;
-            dst->file_size_bytes += src->file_size_bytes;
-            dst->data_bytes += src->data_bytes;
-            dst->data_blocks += src->data_blocks;
-            dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional;
-            dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional;
-            dst->unused_bytes += src->unused_bytes;
-            dst->unused_blocks += src->unused_blocks;
-            dst->largest_unused_block += src->largest_unused_block;
-        }
-    }
-};
-
-static void replay_canonicalized_trace(const vector<string> &canonicalized_trace,
-                                       block_allocator::allocation_strategy strategy,
-                                       map<uint64_t, struct fragmentation_report> *reports,
-                                       struct canonical_trace_stats *stats) {
-    // maps an allocator id to its block allocator
-    map<uint64_t, block_allocator *> allocator_map;
-
-    // maps allocation seq num to allocated offset
-    map<uint64_t, uint64_t> seq_num_to_offset;
-
-    for (vector<string>::const_iterator it = canonicalized_trace.begin();
-         it != canonicalized_trace.end(); it++) {
-        const int line_num = stats->n_lines_replayed++;
-
-        char *line = toku_strdup(it->c_str());
-        line = strip_newline(line, nullptr);
-
-        char *ptr = trim_whitespace(line);
-
-        // canonical allocator id is in base 10, not 16
-        string fn = parse_token(&ptr, line_num);
-        int64_t allocator_id = parse_number(&ptr, line_num, 10);
-
-        if (fn.find("ba_trace_create") != string::npos) {
-            const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num);
-            const uint64_t alignment = parse_uint64(&ptr, line_num);
-            ba_replay_assert(allocator_map.count(allocator_id) == 0,
-                             "corrupted canonical trace: double create", line, line_num);
-
-            block_allocator *ba = new block_allocator();
-            if (fn == "ba_trace_create") {
-                ba->create(reserve_at_beginning, alignment);
-                stats->n_create++;
-            } else {
-                ba_replay_assert(fn == "ba_trace_create_from_blockpairs",
-                                 "corrupted canonical trace: bad create fn", line, line_num);
-                vector<block_allocator::blockpair> pairs;
-                while (*trim_whitespace(ptr) != '\0') {
-                    const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
-                    pairs.push_back(bp);
-                }
-                ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size());
-                stats->n_create_from_blockpairs++;
-            }
-            ba->set_strategy(strategy);
-
-            TOKU_DB_FRAGMENTATION_S report;
-            ba->get_statistics(&report);
-            (*reports)[allocator_id].beginning = report;
-            allocator_map[allocator_id] = ba;
-        } else {
-            ba_replay_assert(allocator_map.count(allocator_id) > 0,
-                             "corrupted canonical trace: no such allocator", line, line_num);
-
-            block_allocator *ba = allocator_map[allocator_id];
-            if (fn == "ba_trace_alloc") {
-                // replay an `alloc' whose result will be associated with a certain asn
-                const uint64_t size = parse_uint64(&ptr, line_num);
-                const uint64_t heat = parse_uint64(&ptr, line_num);
-                const uint64_t asn = parse_uint64(&ptr, line_num);
-                ba_replay_assert(seq_num_to_offset.count(asn) == 0,
-                                 "corrupted canonical trace: double alloc (asn in use)", line, line_num);
-
-                uint64_t offset;
-                ba->alloc_block(size, heat, &offset);
-                seq_num_to_offset[asn] = offset;
-                heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++;
-                heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size);
-            } else if (fn == "ba_trace_free_asn") {
-                // replay a `free' on a block whose offset is the result of an alloc with an asn
-                const uint64_t asn = parse_uint64(&ptr, line_num);
-                ba_replay_assert(seq_num_to_offset.count(asn) == 1,
-                                 "corrupted canonical trace: double free (asn unused)", line, line_num);
-
-                const uint64_t offset = seq_num_to_offset[asn];
-                ba->free_block(offset);
-                seq_num_to_offset.erase(asn);
-                stats->n_free++;
-            } else if (fn == "ba_trace_free_offset") {
-                // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs
-                const uint64_t offset = parse_uint64(&ptr, line_num);
-                ba->free_block(offset);
-                stats->n_free++;
-            } else if (fn == "ba_trace_destroy") {
-                TOKU_DB_FRAGMENTATION_S report;
-                ba->get_statistics(&report);
-                ba->destroy();
-                (*reports)[allocator_id].end = report;
-                allocator_map.erase(allocator_id);
-                stats->n_destroy++;
-            } else {
-                ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num);
-            }
-        }
-
-        toku_free(line);
-    }
-}
-
-static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) {
-    switch (strategy) {
-    case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT:
-        return "first-fit";
-    case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT:
-        return "best-fit";
-    case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE:
-        return "heat-zone";
-    case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT:
-        return "padded-fit";
-    default:
-        abort();
-    }
-}
-
-static block_allocator::allocation_strategy cstring_to_strategy(const char *str) {
-    if (strcmp(str, "first-fit") == 0) {
-        return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT;
-    }
-    if (strcmp(str, "best-fit") == 0) {
-        return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT;
-    }
-    if (strcmp(str, "heat-zone") == 0) {
-        return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE;
-    }
-    if (strcmp(str, "padded-fit") != 0) {
-        fprintf(stderr, "bad strategy string: %s\n", str);
-        abort();
-    }
-    return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT;
-}
-
-static void print_result_verbose(uint64_t allocator_id,
-                                 block_allocator::allocation_strategy strategy,
-                                 const struct fragmentation_report &report) {
-    if (report.end.data_bytes + report.end.unused_bytes +
-        report.beginning.data_bytes + report.beginning.unused_bytes
-        < 32UL * 1024 * 1024) {
-        printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
-        return;
-    }
-
-    printf(" allocator_id:   %20" PRId64 "\n", allocator_id);
-    printf(" strategy:       %20s\n", strategy_to_cstring(strategy));
-
-    for (int i = 0; i < 2; i++) {
-        const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end;
-        printf("%s\n", i == 0 ? "BEFORE" : "AFTER");
-
-        uint64_t total_bytes = r->data_bytes + r->unused_bytes;
-        uint64_t total_blocks = r->data_blocks + r->unused_blocks;
-
-        // byte statistics
-        printf(" total bytes:    %20" PRId64 "\n", total_bytes);
-        printf(" used bytes:     %20" PRId64 " (%.3lf)\n", r->data_bytes,
-               static_cast<double>(r->data_bytes) / total_bytes);
-        printf(" unused bytes:   %20" PRId64 " (%.3lf)\n", r->unused_bytes,
-               static_cast<double>(r->unused_bytes) / total_bytes);
-
-        // block statistics
-        printf(" total blocks:   %20" PRId64 "\n", total_blocks);
-        printf(" used blocks:    %20" PRId64 " (%.3lf)\n", r->data_blocks,
-               static_cast<double>(r->data_blocks) / total_blocks);
-        printf(" unused blocks:  %20" PRId64 " (%.3lf)\n", r->unused_blocks,
-               static_cast<double>(r->unused_blocks) / total_blocks);
-
-        // misc
-        printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block);
-    }
-}
-
-static void print_result(uint64_t allocator_id,
-                         block_allocator::allocation_strategy strategy,
-                         const struct fragmentation_report &report) {
-    const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning;
-    const TOKU_DB_FRAGMENTATION_S *end = &report.end;
-
-    uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes;
-    uint64_t total_end_bytes = end->data_bytes + end->unused_bytes;
-    if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) {
-        if (verbose) {
-            printf("\n");
-            printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
-        }
-        return;
-    }
-    printf("\n");
-    if (verbose) {
-        print_result_verbose(allocator_id, strategy, report);
-    } else {
-        printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n",
-               strategy_to_cstring(strategy), allocator_id,
-               static_cast<double>(report.end.data_bytes) / total_end_bytes,
-               static_cast<double>(report.beginning.data_bytes) / total_beginning_bytes);
-    }
-}
-
-static int only_aggregate_reports;
-
-static struct option getopt_options[] = {
-    { "verbose", no_argument, &verbose, 1 },
-    { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 },
-    { "include-strategy", required_argument, nullptr, 'i' },
-    { "exclude-strategy", required_argument, nullptr, 'x' },
-    { nullptr, 0, nullptr, 0 },
-};
-
-int main(int argc, char *argv[]) {
-    int opt;
-    set<block_allocator::allocation_strategy> candidate_strategies, excluded_strategies;
-    while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) {
-        switch (opt) {
-        case 0:
-            break;
-        case 'i':
-            candidate_strategies.insert(cstring_to_strategy(optarg));
-            break;
-        case 'x':
-            excluded_strategies.insert(cstring_to_strategy(optarg));
-            break;
-        case '?':
-        default:
-            abort();
-        };
-    }
-    // Default to everything if nothing was explicitly included.
-    if (candidate_strategies.empty()) {
-        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
-        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
-        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
-        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
-    }
-    // ..but remove anything that was explicitly excluded
-    for (set<block_allocator::allocation_strategy>::const_iterator it = excluded_strategies.begin();
-         it != excluded_strategies.end(); it++) {
-        candidate_strategies.erase(*it);
-    }
-
-    // Run the real trace
-    //
-    // First, read the raw trace from stdin
-    vector<string> canonicalized_trace = canonicalize_trace_from(stdin);
-
-    if (!only_aggregate_reports) {
-        printf("\n");
-        printf("Individual reports, by allocator:\n");
-    }
-
-    struct canonical_trace_stats stats;
-    map<block_allocator::allocation_strategy, struct fragmentation_report> reports_by_strategy; 
-    for (set<block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
-         it != candidate_strategies.end(); it++) {
-        const block_allocator::allocation_strategy strategy(*it);
-
-        // replay the canonicalized trace against the current strategy.
-        //
-        // we provided the allocator map so we can gather statistics later
-        struct canonical_trace_stats dummy_stats;
-        map<uint64_t, struct fragmentation_report> reports;
-        replay_canonicalized_trace(canonicalized_trace, strategy, &reports,
-                                   // Only need to gather canonical trace stats once
-                                   it == candidate_strategies.begin() ? &stats : &dummy_stats);
-
-        struct fragmentation_report aggregate_report;
-        memset(&aggregate_report, 0, sizeof(aggregate_report));
-        for (map<uint64_t, struct fragmentation_report>::iterator rp = reports.begin();
-             rp != reports.end(); rp++) {
-            const struct fragmentation_report &report = rp->second;
-            aggregate_report.merge(report);
-            if (!only_aggregate_reports) {
-                print_result(rp->first, strategy, report);
-            }
-        }
-        reports_by_strategy[strategy] = aggregate_report;
-    }
-
-    printf("\n");
-    printf("Aggregate reports, by strategy:\n");
-
-    for (map<block_allocator::allocation_strategy, struct fragmentation_report>::iterator it = reports_by_strategy.begin();
-         it != reports_by_strategy.end(); it++) {
-        print_result(0, it->first, it->second);
-    }
-
-    printf("\n");
-    printf("Overall trace stats:\n");
-    printf("\n");
-    printf(" n_lines_played:            %15" PRIu64 "\n", stats.n_lines_replayed);
-    printf(" n_create:                  %15" PRIu64 "\n", stats.n_create);
-    printf(" n_create_from_blockpairs:  %15" PRIu64 "\n", stats.n_create_from_blockpairs);
-    printf(" n_alloc_hot:               %15" PRIu64 "\n", stats.n_alloc_hot);
-    printf(" n_alloc_cold:              %15" PRIu64 "\n", stats.n_alloc_cold);
-    printf(" n_free:                    %15" PRIu64 "\n", stats.n_free);
-    printf(" n_destroy:                 %15" PRIu64 "\n", stats.n_destroy);
-    printf("\n");
-    printf(" avg_alloc_hot:             %15" PRIu64 "\n", stats.alloc_hot_bytes.mean);
-    printf(" stddev_alloc_hot:          %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance));
-    printf(" avg_alloc_cold:            %15" PRIu64 "\n", stats.alloc_cold_bytes.mean);
-    printf(" stddev_alloc_cold:         %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance));
-    printf("\n");
-
-    return 0;
-}
diff --git a/storage/tokudb/PerconaFT/tools/ftverify.cc b/storage/tokudb/PerconaFT/tools/ftverify.cc
index 5920be8deda..2324249ba00 100644
--- a/storage/tokudb/PerconaFT/tools/ftverify.cc
+++ b/storage/tokudb/PerconaFT/tools/ftverify.cc
@@ -148,7 +148,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p)
         }
     }
     {
-        toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+        toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
         r1 = deserialize_ft_from_fd_into_rbuf(
             fd,
             header_1_off,
diff --git a/storage/tokudb/PerconaFT/tools/tokuftdump.cc b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
index 23ef72218ac..f6d777b4161 100644
--- a/storage/tokudb/PerconaFT/tools/tokuftdump.cc
+++ b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
@@ -192,6 +192,7 @@ static void dump_header(FT ft) {
     dump_descriptor(&ft->descriptor);
     printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows);
     printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes);
+    printf(" logical row count=%" PRId64 "\n", ft->in_memory_logical_rows);
 }
 
 static int64_t getRootNode(FT ft) {
diff --git a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
index 48ff28e89af..76b1d9c713e 100644
--- a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
+++ b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
@@ -110,7 +110,7 @@ test2 (void) {
 
 static void
 test3 (void)
-// Compare the simple version to the highly optimized verison.
+// Compare the simple version to the highly optimized version.
 {
     const int datalen = 1000;
     char data[datalen];
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 672ae32f80a..7e9e6100c6e 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -382,17 +382,17 @@ void TOKUDB_SHARE::update_row_count(
         pct_of_rows_changed_to_trigger = ((_rows * auto_threshold) / 100);
         if (_row_delta_activity >= pct_of_rows_changed_to_trigger) {
             char msg[200];
-            snprintf(
-                msg,
-                sizeof(msg),
-                "TokuDB: Auto %s background analysis for %s, delta_activity "
-                "%llu is greater than %llu percent of %llu rows.",
-                tokudb::sysvars::analyze_in_background(thd) > 0 ?
-                    "scheduling" : "running",
-                full_table_name(),
-                _row_delta_activity,
-                auto_threshold,
-                (ulonglong)(_rows));
+            snprintf(msg,
+                     sizeof(msg),
+                     "TokuDB: Auto %s analysis for %s, delta_activity %llu is "
+                     "greater than %llu percent of %llu rows.",
+                     tokudb::sysvars::analyze_in_background(thd) > 0
+                         ? "scheduling background"
+                         : "running foreground",
+                     full_table_name(),
+                     _row_delta_activity,
+                     auto_threshold,
+                     (ulonglong)(_rows));
 
             // analyze_standard will unlock _mutex regardless of success/failure
             int ret = analyze_standard(thd, NULL);
@@ -4097,7 +4097,7 @@ int ha_tokudb::write_row(uchar * record) {
             goto cleanup; 
         }
         if (curr_num_DBs == 1) {
-            error = insert_row_to_main_dictionary(record,&prim_key, &row, txn);
+            error = insert_row_to_main_dictionary(record, &prim_key, &row, txn);
             if (error) { goto cleanup; }
         } else {
             error = insert_rows_to_dictionaries_mult(&prim_key, &row, txn, thd);
@@ -6130,7 +6130,7 @@ int ha_tokudb::info(uint flag) {
             // we should always have a primary key
             assert_always(share->file != NULL);
 
-            error = estimate_num_rows(share->file,&num_rows, txn);
+            error = estimate_num_rows(share->file, &num_rows, txn);
             if (error == 0) {
                 share->set_row_count(num_rows, false);
                 stats.records = num_rows;
diff --git a/storage/tokudb/ha_tokudb_admin.cc b/storage/tokudb/ha_tokudb_admin.cc
index db3d6c112d4..6d8e7173c8d 100644
--- a/storage/tokudb/ha_tokudb_admin.cc
+++ b/storage/tokudb/ha_tokudb_admin.cc
@@ -7,7 +7,7 @@ This file is part of TokuDB
 
 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
-    TokuDBis is free software: you can redistribute it and/or modify
+    TokuDB is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License, version 2,
     as published by the Free Software Foundation.
 
@@ -43,13 +43,11 @@ public:
     virtual ~recount_rows_t();
 
     virtual const char* key();
-
-    virtual void status(
-        char* database,
-        char* table,
-        char* type,
-        char* params,
-        char* status);
+    virtual const char* database();
+    virtual const char* table();
+    virtual const char* type();
+    virtual const char* parameters();
+    virtual const char* status();
 
 protected:
     virtual void on_run();
@@ -64,6 +62,8 @@ private:
     ulonglong       _throttle;
 
     // for recount rows status reporting
+    char            _parameters[256];
+    char            _status[1024];
     int             _result;
     ulonglong       _recount_start; // in microseconds
     ulonglong       _total_elapsed_time; // in microseconds
@@ -78,7 +78,6 @@ private:
         uint64_t deleted,
         void* extra);
     int analyze_recount_rows_progress(uint64_t count, uint64_t deleted);
-    void get_analyze_status(char*);
 };
 
 void* recount_rows_t::operator new(size_t sz) {
@@ -114,10 +113,19 @@ recount_rows_t::recount_rows_t(
     }
 
     _throttle = tokudb::sysvars::analyze_throttle(thd);
+
+    snprintf(_parameters,
+             sizeof(_parameters),
+             "TOKUDB_ANALYZE_THROTTLE=%llu;",
+             _throttle);
+    _status[0] = '\0';
 }
 recount_rows_t::~recount_rows_t() {
 }
 void recount_rows_t::on_run() {
+    const char* orig_proc_info = NULL;
+    if (_thd)
+        orig_proc_info = tokudb_thd_get_proc_info(_thd);
     _recount_start = tokudb::time::microsec();
     _total_elapsed_time = 0;
 
@@ -171,6 +179,8 @@ void recount_rows_t::on_run() {
         _result,
         _share->row_count());
 error:
+    if(_thd)
+        tokudb_thd_set_proc_info(_thd, orig_proc_info);
     return;
 }
 void recount_rows_t::on_destroy() {
@@ -179,18 +189,21 @@ void recount_rows_t::on_destroy() {
 const char* recount_rows_t::key() {
     return _share->full_table_name();
 }
-void recount_rows_t::status(
-    char* database,
-    char* table,
-    char* type,
-    char* params,
-    char* status) {
-
-    strcpy(database, _share->database_name());
-    strcpy(table, _share->table_name());
-    strcpy(type, "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS");
-    sprintf(params, "TOKUDB_ANALYZE_THROTTLE=%llu;", _throttle);
-    get_analyze_status(status);
+const char* recount_rows_t::database() {
+    return _share->database_name();
+}
+const char* recount_rows_t::table() {
+    return _share->table_name();
+}
+const char* recount_rows_t::type() {
+    static const char* type = "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS";
+    return type;
+}
+const char* recount_rows_t::parameters() {
+    return _parameters;
+}
+const char* recount_rows_t::status() {
+    return _status;
 }
 int recount_rows_t::analyze_recount_rows_progress(
     uint64_t count,
@@ -217,12 +230,32 @@ int recount_rows_t::analyze_recount_rows_progress(
             return ER_ABORTING_CONNECTION;
         }
 
+        // rebuild status
+        // There is a slight race condition here,
+        // _status is used here for tokudb_thd_set_proc_info and it is also used
+        // for the status column in i_s.background_job_status.
+        // If someone happens to be querying/building the i_s table
+        // at the exact same time that the status is being rebuilt here,
+        // the i_s table could get some garbage status.
+        // This solution is a little heavy handed but it works, it prevents us
+        // from changing the status while someone might be immediately observing
+        // us and it prevents someone from observing us while we change the
+        // status
+        tokudb::background::_job_manager->lock();
+        snprintf(_status,
+                 sizeof(_status),
+                 "recount_rows %s.%s counted %llu rows and %llu deleted "
+                 "in %llu seconds.",
+                 _share->database_name(),
+                 _share->table_name(),
+                 _rows,
+                 _deleted_rows,
+                 _total_elapsed_time / tokudb::time::MICROSECONDS);
+        tokudb::background::_job_manager->unlock();
+
         // report
-        if (_thd) {
-            char status[256];
-            get_analyze_status(status);
-            thd_proc_info(_thd, status);
-        }
+        if (_thd)
+            tokudb_thd_set_proc_info(_thd, _status);
 
         // throttle
         // given the throttle value, lets calculate the maximum number of rows
@@ -238,18 +271,6 @@ int recount_rows_t::analyze_recount_rows_progress(
     }
     return 0;
 }
-void recount_rows_t::get_analyze_status(char* msg) {
-    sprintf(
-        msg,
-        "recount_rows %s.%s counted %llu rows and %llu deleted in %llu "
-        "seconds.",
-        _share->database_name(),
-        _share->table_name(),
-        _rows,
-        _deleted_rows,
-        _total_elapsed_time / tokudb::time::MICROSECONDS);
-}
-
 
 class standard_t : public tokudb::background::job_manager_t::job_t {
 public:
@@ -261,13 +282,11 @@ public:
     virtual ~standard_t();
 
     virtual const char* key(void);
-
-    virtual void status(
-        char* database,
-        char* table,
-        char* type,
-        char* params,
-        char* status);
+    virtual const char* database();
+    virtual const char* table();
+    virtual const char* type();
+    virtual const char* parameters();
+    virtual const char* status();
 
 protected:
     virtual void on_run();
@@ -284,6 +303,8 @@ private:
     double          _delete_fraction;
 
     // for analyze status reporting, may also use other state
+    char            _parameters[256];
+    char            _status[1024];
     int             _result;
     ulonglong       _analyze_start; // in microseconds
     ulonglong       _total_elapsed_time; // in microseconds
@@ -305,7 +326,6 @@ private:
         uint64_t deleted_rows);
     bool analyze_standard_cursor_callback(uint64_t deleted_rows);
 
-    void get_analyze_status(char*);
     int analyze_key_progress();
     int analyze_key(uint64_t* rec_per_key_part);
 };
@@ -351,6 +371,16 @@ standard_t::standard_t(
     _time_limit =
         tokudb::sysvars::analyze_time(thd) * tokudb::time::MICROSECONDS;
     _delete_fraction = tokudb::sysvars::analyze_delete_fraction(thd);
+
+    snprintf(_parameters,
+             sizeof(_parameters),
+             "TOKUDB_ANALYZE_DELETE_FRACTION=%f; "
+             "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;",
+             _delete_fraction,
+             _time_limit / tokudb::time::MICROSECONDS,
+             _throttle);
+
+    _status[0] = '\0';
 }
 standard_t::~standard_t() {
 }
@@ -358,6 +388,10 @@ void standard_t::on_run() {
     DB_BTREE_STAT64 stat64;
     uint64_t rec_per_key_part[_share->_max_key_parts];
     uint64_t total_key_parts = 0;
+    const char* orig_proc_info = NULL;
+    if (_thd)
+        orig_proc_info = tokudb_thd_get_proc_info(_thd);
+
     _analyze_start = tokudb::time::microsec();
     _half_time = _time_limit > 0 ? _time_limit/2 : 0;
 
@@ -395,7 +429,7 @@ void standard_t::on_run() {
             _result = HA_ADMIN_FAILED;
         }
         if (_thd && (_result == HA_ADMIN_FAILED ||
-            (double)_deleted_rows >
+            static_cast<double>(_deleted_rows) >
                 _delete_fraction * (_rows + _deleted_rows))) {
 
             char name[256]; int namelen;
@@ -460,8 +494,9 @@ cleanup:
     }
 
 error:
+    if (_thd)
+        tokudb_thd_set_proc_info(_thd, orig_proc_info);
     return;
-
 }
 void standard_t::on_destroy() {
     _share->lock();
@@ -472,24 +507,21 @@ void standard_t::on_destroy() {
 const char* standard_t::key() {
     return _share->full_table_name();
 }
-void standard_t::status(
-    char* database,
-    char* table,
-    char* type,
-    char* params,
-    char* status) {
-
-    strcpy(database, _share->database_name());
-    strcpy(table, _share->table_name());
-    strcpy(type, "TOKUDB_ANALYZE_MODE_STANDARD");
-    sprintf(
-        params,
-        "TOKUDB_ANALYZE_DELETE_FRACTION=%f; "
-        "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;",
-        _delete_fraction,
-        _time_limit / tokudb::time::MICROSECONDS,
-        _throttle);
-    get_analyze_status(status);
+const char* standard_t::database() {
+    return _share->database_name();
+}
+const char* standard_t::table() {
+    return _share->table_name();
+}
+const char* standard_t::type() {
+    static const char* type = "TOKUDB_ANALYZE_MODE_STANDARD";
+    return type;
+}
+const char* standard_t::parameters() {
+    return _parameters;
+}
+const char* standard_t::status() {
+    return _status;
 }
 bool standard_t::analyze_standard_cursor_callback(
     void* extra,
@@ -502,41 +534,6 @@ bool standard_t::analyze_standard_cursor_callback(uint64_t deleted_rows) {
     _ticks += deleted_rows;
     return analyze_key_progress() != 0;
 }
-void standard_t::get_analyze_status(char* msg) {
-    static const char* scan_direction_str[] = {
-        "not scanning",
-        "scanning forward",
-        "scanning backward",
-        "scan unknown"
-    };
-
-    const char* scan_direction = NULL;
-    switch (_scan_direction) {
-        case 0: scan_direction = scan_direction_str[0]; break;
-        case DB_NEXT: scan_direction = scan_direction_str[1]; break;
-        case DB_PREV: scan_direction = scan_direction_str[2]; break;
-        default: scan_direction = scan_direction_str[3]; break;
-    }
-
-    float progress_rows = 0.0;
-    if (_share->row_count() > 0)
-        progress_rows = (float) _rows / (float) _share->row_count();
-    float progress_time = 0.0;
-    if (_time_limit > 0)
-        progress_time = (float) _key_elapsed_time / (float) _time_limit;
-    sprintf(
-        msg,
-        "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% time, "
-        "%s",
-        _share->database_name(),
-        _share->table_name(),
-        _share->_key_descriptors[_current_key]._name,
-        _current_key,
-        _share->_keys,
-        progress_rows * 100.0,
-        progress_time * 100.0,
-        scan_direction);
-}
 int standard_t::analyze_key_progress(void) {
     if (_ticks > 1000) {
         _ticks = 0;
@@ -546,19 +543,72 @@ int standard_t::analyze_key_progress(void) {
         if ((_thd && thd_killed(_thd)) || cancelled()) {
             // client killed
             return ER_ABORTING_CONNECTION;
-        } else if(_time_limit > 0 &&
-                  (uint64_t)_key_elapsed_time > _time_limit) {
+        } else if (_time_limit > 0 &&
+                   static_cast<uint64_t>(_key_elapsed_time) > _time_limit) {
             // time limit reached
             return ETIME;
         }
 
-        // report
-        if (_thd) {
-            char status[256];
-            get_analyze_status(status);
-            thd_proc_info(_thd, status);
+        // rebuild status
+        // There is a slight race condition here,
+        // _status is used here for tokudb_thd_set_proc_info and it is also used
+        // for the status column in i_s.background_job_status.
+        // If someone happens to be querying/building the i_s table
+        // at the exact same time that the status is being rebuilt here,
+        // the i_s table could get some garbage status.
+        // This solution is a little heavy handed but it works, it prevents us
+        // from changing the status while someone might be immediately observing
+        // us and it prevents someone from observing us while we change the
+        // status.
+        static const char* scan_direction_str[] = {"not scanning",
+                                                   "scanning forward",
+                                                   "scanning backward",
+                                                   "scan unknown"};
+
+        const char* scan_direction = NULL;
+        switch (_scan_direction) {
+            case 0:
+                scan_direction = scan_direction_str[0];
+                break;
+            case DB_NEXT:
+                scan_direction = scan_direction_str[1];
+                break;
+            case DB_PREV:
+                scan_direction = scan_direction_str[2];
+                break;
+            default:
+                scan_direction = scan_direction_str[3];
+                break;
         }
 
+        float progress_rows = 0.0;
+        if (_share->row_count() > 0)
+            progress_rows = static_cast<float>(_rows) /
+                            static_cast<float>(_share->row_count());
+        float progress_time = 0.0;
+        if (_time_limit > 0)
+            progress_time = static_cast<float>(_key_elapsed_time) /
+                            static_cast<float>(_time_limit);
+        tokudb::background::_job_manager->lock();
+        snprintf(
+            _status,
+            sizeof(_status),
+            "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% "
+            "time, %s",
+            _share->database_name(),
+            _share->table_name(),
+            _share->_key_descriptors[_current_key]._name,
+            _current_key,
+            _share->_keys,
+            progress_rows * 100.0,
+            progress_time * 100.0,
+            scan_direction);
+        tokudb::background::_job_manager->unlock();
+
+        // report
+        if (_thd)
+            tokudb_thd_set_proc_info(_thd, _status);
+
         // throttle
         // given the throttle value, lets calculate the maximum number of rows
         // we should have seen so far in a .1 sec resolution
@@ -694,6 +744,11 @@ int standard_t::analyze_key(uint64_t* rec_per_key_part) {
     assert_always(close_error == 0);
 
 done:
+    // in case we timed out (bunch of deleted records) without hitting a
+    // single row
+    if (_rows == 0)
+        _rows = 1;
+
     // return cardinality
     for (uint64_t i = 0; i < num_key_parts; i++) {
         rec_per_key_part[i] = _rows / unique_rows[i];
@@ -733,7 +788,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) {
 
     assert_always(thd != NULL);
 
-    const char *orig_proc_info = tokudb_thd_get_proc_info(thd);
     int result = HA_ADMIN_OK;
 
     tokudb::analyze::recount_rows_t* job
@@ -753,8 +807,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) {
         result = HA_ADMIN_FAILED;
     }
 
-    thd_proc_info(thd, orig_proc_info);
-
     TOKUDB_HANDLER_DBUG_RETURN(result);
 }
 
@@ -778,8 +830,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) {
         TOKUDB_HANDLER_DBUG_RETURN(result);
     }
 
-    const char *orig_proc_info = tokudb_thd_get_proc_info(thd);
-
     tokudb::analyze::standard_t* job
         = new tokudb::analyze::standard_t(txn == NULL ? false : true, thd,
                                           this, txn);
@@ -808,8 +858,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) {
 
     lock();
 
-    thd_proc_info(thd, orig_proc_info);
-
     TOKUDB_HANDLER_DBUG_RETURN(result);
 }
 
diff --git a/storage/tokudb/hatoku_defines.h b/storage/tokudb/hatoku_defines.h
index b7726a746ad..1b33e0a53e4 100644
--- a/storage/tokudb/hatoku_defines.h
+++ b/storage/tokudb/hatoku_defines.h
@@ -7,7 +7,7 @@ This file is part of TokuDB
 
 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
-    TokuDBis is free software: you can redistribute it and/or modify
+    TokuDB is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License, version 2,
     as published by the Free Software Foundation.
 
@@ -234,9 +234,12 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // mysql 5.6.15 removed the test macro, so we define our own
 #define tokudb_test(e) ((e) ? 1 : 0)
 
-inline const char* tokudb_thd_get_proc_info(const THD *thd) {
+inline const char* tokudb_thd_get_proc_info(const THD* thd) {
     return thd->proc_info;
 }
+inline void tokudb_thd_set_proc_info(THD* thd, const char* proc_info) {
+    thd_proc_info(thd, proc_info);
+}
 
 // uint3korr reads 4 bytes and valgrind reports an error, so we use this function instead
 inline uint tokudb_uint3korr(const uchar *a) {
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result
deleted file mode 100644
index ccfffb53976..00000000000
--- a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result
+++ /dev/null
@@ -1,51 +0,0 @@
-include/master-slave.inc
-[connection master]
-CREATE TABLE t1 (a INT AUTO_INCREMENT KEY) ENGINE=TokuDB;
-CREATE TABLE t2 (b INT AUTO_INCREMENT KEY, c INT, FOREIGN KEY(b) REFERENCES t1(a)) ENGINE=TokuDB;
-SET FOREIGN_KEY_CHECKS=0;
-INSERT INTO t1 VALUES (10);
-INSERT INTO t1 VALUES (NULL),(NULL),(NULL);
-INSERT INTO t2 VALUES (5,0);
-INSERT INTO t2 VALUES (NULL,LAST_INSERT_ID());
-SET FOREIGN_KEY_CHECKS=1;
-SELECT * FROM t1 ORDER BY a;
-a
-10
-11
-12
-13
-SELECT * FROM t2 ORDER BY b;
-b	c
-5	0
-6	11
-SELECT * FROM t1 ORDER BY a;
-a
-10
-11
-12
-13
-SELECT * FROM t2 ORDER BY b;
-b	c
-5	0
-6	11
-SET TIMESTAMP=1000000000;
-CREATE TABLE t3 ( a INT UNIQUE );
-SET FOREIGN_KEY_CHECKS=0;
-INSERT INTO t3 VALUES (1),(1);
-ERROR 23000: Duplicate entry '1' for key 'a'
-SET FOREIGN_KEY_CHECKS=0;
-DROP TABLE IF EXISTS t1,t2,t3;
-SET FOREIGN_KEY_CHECKS=1;
-create table t1 (b int primary key) engine = TokuDB;
-create table t2 (a int primary key, b int, foreign key (b) references t1(b))
-engine = TokuDB;
-insert into t1 set b=1;
-insert into t2 set a=1, b=1;
-set foreign_key_checks=0;
-delete from t1;
-must sync w/o a problem (could not with the buggy code)
-select count(*) from t1 /* must be zero */;
-count(*)
-0
-drop table t2,t1;
-include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test b/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test
deleted file mode 100644
index 120ad0d5c1e..00000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test
+++ /dev/null
@@ -1,3 +0,0 @@
--- source include/have_tokudb.inc
-let $engine_type=TokuDB;
--- source extra/rpl_tests/rpl_foreign_key.test
diff --git a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
index 5769ee74071..8b53f89efa3 100644
--- a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
+++ b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
@@ -25,7 +25,7 @@ TokuDB_background_job_status	CREATE TEMPORARY TABLE `TokuDB_background_job_statu
   `scheduler` varchar(32) NOT NULL DEFAULT '',
   `scheduled_time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
   `started_time` datetime DEFAULT NULL,
-  `status` varchar(256) DEFAULT NULL
+  `status` varchar(1024) DEFAULT NULL
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8
 create table t1 (a int not null auto_increment, b int, c int, primary key(a), key kb(b), key kc(c), key kabc(a,b,c), key kab(a,b), key kbc(b,c));
 insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3);
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
index 6100d9aeec2..8b6df4966f4 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
@@ -12,33 +12,11 @@ let $MYSQLD_DATADIR= `SELECT @@datadir`;
 create table foo (a int, b int);
 create table bar (a int, key(a));
 
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
 remove_file $MYSQLD_DATADIR/test/foo.frm;
 copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
 remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
 
 show create table foo;
 show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
index e1acea13ed7..53c1037b051 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
@@ -15,33 +15,11 @@ create table bar (a int);
 alter table foo drop column a;
 alter table bar add column b int, add column c int;
 
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
 remove_file $MYSQLD_DATADIR/test/foo.frm;
 copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
 remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
 
 show create table foo;
 show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
index 17a124249da..0421b8e9d26 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
@@ -14,33 +14,11 @@ create table bar (a bigint)engine=TokuDB;
 alter table foo drop index b;
 alter table bar add index (a);
 
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
 remove_file $MYSQLD_DATADIR/test/foo.frm;
 copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
 remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
 
 show create table foo;
 show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
index 42dbb30058a..4c40339be5a 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
@@ -7,17 +7,7 @@ set default_storage_engine='tokudb';
 # capture the datadir
 let $MYSQLD_DATADIR= `SELECT @@datadir`;
 
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
 # remove all tokudb file in the datadir
 system mkdir $MYSQLD_DATADIR/save;
 system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save;
@@ -25,13 +15,7 @@ system mkdir $MYSQLD_DATADIR/test;
 
 # install 6.6.8 tokudb test files
 system cp -r std_data/tokudb_drop_part_table_668/data/* $MYSQLD_DATADIR;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
 
 create table tc (a int, b int, c int, primary key(a), key(b)) engine=tokudb partition by hash(a) partitions 2;
 
@@ -45,26 +29,9 @@ select dictionary_name from information_schema.tokudb_file_map;
 # check that the test dir is empty
 list_files $MYSQLD_DATADIR/test *.frm;
 
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
 # restore saved datadir
 system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test;
 system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR;
 system rmdir $MYSQLD_DATADIR/save;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
-
+--source include/start_mysqld.inc
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
index 3903c2cef9f..0340b960fa5 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
@@ -6,17 +6,7 @@ set default_storage_engine='tokudb';
 # capture the datadir
 let $MYSQLD_DATADIR= `SELECT @@datadir`;
 
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
 # remove all tokudb file in the datadir
 system mkdir $MYSQLD_DATADIR/save;
 system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save;
@@ -24,13 +14,7 @@ system mkdir $MYSQLD_DATADIR/test;
 
 # install 6.6.8 tokudb test files
 system cp -r std_data/tokudb_drop_simple_table_668/data/* $MYSQLD_DATADIR;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
 
 create table tc (id int, x int, primary key(id), key(x));
 
@@ -46,26 +30,9 @@ select dictionary_name from information_schema.tokudb_file_map;
 # check that the test dir is empty
 list_files $MYSQLD_DATADIR/test *.frm;
 
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
 # restore saved datadir
 system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test;
 system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR;
 system rmdir $MYSQLD_DATADIR/save;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
-
+--source include/start_mysqld.inc
diff --git a/storage/tokudb/tokudb_background.cc b/storage/tokudb/tokudb_background.cc
index d8ef54a5972..e019e41c788 100644
--- a/storage/tokudb/tokudb_background.cc
+++ b/storage/tokudb/tokudb_background.cc
@@ -8,7 +8,7 @@ This file is part of TokuDB
 
 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
-    TokuDBis is free software: you can redistribute it and/or modify
+    TokuDB is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License, version 2,
     as published by the Free Software Foundation.
 
@@ -68,7 +68,8 @@ void job_manager_t::destroy() {
     while (_background_jobs.size()) {
         _mutex.lock();
         job_t* job = _background_jobs.front();
-        cancel(job);
+        if (!job->cancelled())
+            cancel(job);
         _background_jobs.pop_front();
         delete job;
         _mutex.unlock();
@@ -148,11 +149,8 @@ bool job_manager_t::cancel_job(const char* key) {
          it != _background_jobs.end(); it++) {
         job_t* job = *it;
 
-        if (!job->cancelled() &&
-            strcmp(job->key(), key) == 0) {
-
+        if (!job->cancelled() && strcmp(job->key(), key) == 0) {
             cancel(job);
-
             ret = true;
         }
     }
@@ -162,8 +160,6 @@ bool job_manager_t::cancel_job(const char* key) {
 }
 void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const {
 
-    char database[256], table[256], type[256], params[256], status[256];
-
     _mutex.lock();
 
     for (jobs_t::const_iterator it = _background_jobs.begin();
@@ -171,19 +167,7 @@ void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const {
          it++) {
         job_t* job = *it;
         if (!job->cancelled()) {
-            database[0] = table[0] = type[0] = params[0] = status[0] = '\0';
-            job->status(database, table, type, params, status);
-            callback(
-                job->id(),
-                database,
-                table,
-                type,
-                params,
-                status,
-                job->user_scheduled(),
-                job->scheduled_time(),
-                job->started_time(),
-                extra);
+            callback(job, extra);
         }
     }
 
@@ -233,6 +217,7 @@ void job_manager_t::run(job_t* job) {
 }
 void job_manager_t::cancel(job_t* job) {
     assert_debug(_mutex.is_owned_by_me());
+    assert_always(!job->cancelled());
     job->cancel();
 }
 job_manager_t* _job_manager = NULL;
diff --git a/storage/tokudb/tokudb_background.h b/storage/tokudb/tokudb_background.h
index 3786701fd0f..29991ab325d 100644
--- a/storage/tokudb/tokudb_background.h
+++ b/storage/tokudb/tokudb_background.h
@@ -7,7 +7,7 @@ This file is part of TokuDB
 
 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
-    TokuDBis is free software: you can redistribute it and/or modify
+    TokuDB is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License, version 2,
     as published by the Free Software Foundation.
 
@@ -58,13 +58,20 @@ public:
         // (or jobs) usually used to find jobs to cancel
         virtual const char* key() = 0;
 
-        // method to get info for information schema, 255 chars per buffer
-        virtual void status(
-            char* database,
-            char* table,
-            char* type,
-            char* params,
-            char* status) = 0;
+        // method to obtain the database name the job is scheduled on
+        virtual const char* database() = 0;
+
+        // method to obtain the table name the job is scheduled on
+        virtual const char* table() = 0;
+
+        // method to obtain the type of job
+        virtual const char* type() = 0;
+
+        // method to obtain a stringized list of job parameters
+        virtual const char* parameters() = 0;
+
+        // method to obtain a sting identifying the current status of the job
+        virtual const char* status() = 0;
 
         inline bool running() const;
 
@@ -99,17 +106,7 @@ public:
     };
 
     // pfn for iterate callback
-    typedef void (*pfn_iterate_t)(
-        uint64_t,
-        const char*,
-        const char*,
-        const char*,
-        const char*,
-        const char*,
-        bool,
-        time_t,
-        time_t,
-        void*);
+    typedef void (*pfn_iterate_t)(class job_t*, void*);
 
 public:
     void* operator new(size_t sz);
@@ -144,6 +141,11 @@ public:
     // data passed when the job was scheduled
     void iterate_jobs(pfn_iterate_t callback, void* extra) const;
 
+    // lock the bjm, this prevents anyone from running, cancelling or iterating
+    // jobs in the bjm.
+    inline void lock();
+    inline void unlock();
+
 private:
     static void* thread_func(void* v);
 
@@ -170,6 +172,15 @@ extern job_manager_t*    _job_manager;
 bool initialize();
 bool destroy();
 
+inline void job_manager_t::lock() {
+    assert_debug(!_mutex.is_owned_by_me());
+    _mutex.lock();
+}
+inline void job_manager_t::unlock() {
+    assert_debug(_mutex.is_owned_by_me());
+    _mutex.unlock();
+}
+
 inline void job_manager_t::job_t::run() {
     if (!_cancelled) {
         _running = true;
diff --git a/storage/tokudb/tokudb_information_schema.cc b/storage/tokudb/tokudb_information_schema.cc
index e69a7899b45..b3d77eef2d9 100644
--- a/storage/tokudb/tokudb_information_schema.cc
+++ b/storage/tokudb/tokudb_information_schema.cc
@@ -1085,7 +1085,7 @@ ST_FIELD_INFO background_job_status_field_info[] = {
     {"scheduler", 32, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"scheduled_time", 0, MYSQL_TYPE_DATETIME, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"started_time", 0, MYSQL_TYPE_DATETIME, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE },
-    {"status", 256, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE },
+    {"status", 1024, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -1095,15 +1095,7 @@ struct background_job_status_extra {
 };
 
 void background_job_status_callback(
-    uint64_t id,
-    const char* database_name,
-    const char* table_name,
-    const char* type,
-    const char* params,
-    const char* status,
-    bool user_scheduled,
-    time_t scheduled_time,
-    time_t started_time,
+    tokudb::background::job_manager_t::job_t* job,
     void* extra) {
 
     background_job_status_extra* e =
@@ -1111,24 +1103,33 @@ void background_job_status_callback(
 
     THD* thd = e->thd;
     TABLE* table = e->table;
+    const char* tmp = NULL;
 
-    table->field[0]->store(id, false);
-    table->field[1]->store(
-        database_name,
-        strlen(database_name),
-        system_charset_info);
-    table->field[2]->store(table_name, strlen(table_name), system_charset_info);
-    table->field[3]->store(type, strlen(type), system_charset_info);
-    table->field[4]->store(params, strlen(params), system_charset_info);
-    if (user_scheduled)
+    table->field[0]->store(job->id(), false);
+
+    tmp = job->database();
+    table->field[1]->store(tmp, strlen(tmp),  system_charset_info);
+
+    tmp = job->table();
+    table->field[2]->store(tmp, strlen(tmp),  system_charset_info);
+
+    tmp = job->type();
+    table->field[3]->store(tmp, strlen(tmp),  system_charset_info);
+
+    tmp = job->parameters();
+    table->field[4]->store(tmp, strlen(tmp),  system_charset_info);
+
+    if (job->user_scheduled())
         table->field[5]->store("USER", strlen("USER"), system_charset_info);
     else
         table->field[5]->store("AUTO", strlen("AUTO"), system_charset_info);
 
-    field_store_time_t(table->field[6], scheduled_time);
-    field_store_time_t(table->field[7], started_time);
-    if (status[0] != '\0') {
-        table->field[8]->store(status, strlen(status), system_charset_info);
+    field_store_time_t(table->field[6], job->scheduled_time());
+    field_store_time_t(table->field[7], job->started_time());
+
+    tmp = job->status();
+    if (tmp && tmp[0] != '\0') {
+        table->field[8]->store(tmp, strlen(tmp), system_charset_info);
         table->field[8]->set_notnull();
     } else {
         table->field[8]->store(NULL, 0, system_charset_info);
diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc
index c2a70cce7aa..bce81f95ead 100644
--- a/storage/xtradb/btr/btr0btr.cc
+++ b/storage/xtradb/btr/btr0btr.cc
@@ -80,7 +80,7 @@ btr_corruption_report(
 			       buf_block_get_zip_size(block),
 			       BUF_PAGE_PRINT_NO_CRASH);
 	}
-	buf_page_print(buf_block_get_frame_fast(block), 0, 0);
+	buf_page_print(buf_nonnull_block_get_frame(block), 0, 0);
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -827,11 +827,12 @@ btr_height_get(
 
         /* S latches the page */
         root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+	ut_ad(root_block); // The index must not be corrupted
 
 	if (root_block) {
 
-		height = btr_page_get_level(buf_block_get_frame_fast(root_block), mtr);
-
+		height = btr_page_get_level(buf_nonnull_block_get_frame(root_block),
+					    mtr);
 		/* Release the S latch on the root page. */
 		mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
 #ifdef UNIV_SYNC_DEBUG
@@ -2912,7 +2913,7 @@ btr_attach_half_pages(
 	}
 
 	/* Get the level of the split pages */
-	level = btr_page_get_level(buf_block_get_frame_fast(block), mtr);
+	level = btr_page_get_level(buf_nonnull_block_get_frame(block), mtr);
 	ut_ad(level
 	      == btr_page_get_level(buf_block_get_frame(new_block), mtr));
 
@@ -4289,8 +4290,10 @@ btr_discard_page(
 
 	/* Decide the page which will inherit the locks */
 
-	left_page_no = btr_page_get_prev(buf_block_get_frame_fast(block), mtr);
-	right_page_no = btr_page_get_next(buf_block_get_frame_fast(block), mtr);
+	left_page_no = btr_page_get_prev(buf_nonnull_block_get_frame(block),
+					 mtr);
+	right_page_no = btr_page_get_next(buf_nonnull_block_get_frame(block),
+					  mtr);
 
 	if (left_page_no != FIL_NULL) {
 		merge_block = btr_block_get(space, zip_size, left_page_no,
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index a5ce3f3f983..873edec62b4 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -305,6 +305,8 @@ buf_flush_init_flush_rbt(void)
 
 		buf_flush_list_mutex_enter(buf_pool);
 
+		ut_ad(buf_pool->flush_rbt == NULL);
+
 		/* Create red black tree for speedy insertions in flush list. */
 		buf_pool->flush_rbt = rbt_create(
 			sizeof(buf_page_t*), buf_flush_block_cmp);
diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc
index f21fd560235..c13d4583fef 100644
--- a/storage/xtradb/dict/dict0stats.cc
+++ b/storage/xtradb/dict/dict0stats.cc
@@ -736,7 +736,7 @@ dict_stats_copy(
 				if (dst_idx->type & DICT_FTS) {
 					continue;
 				}
-				dict_stats_empty_index(dst_idx);
+				dict_stats_empty_index(dst_idx, true);
 			} else {
 				continue;
 			}
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 81f26b27662..93df92e6e63 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -1787,6 +1787,9 @@ fil_close_all_files(void)
 {
 	fil_space_t*	space;
 
+	// Must check both flags as it's possible for this to be called during
+	// server startup with srv_track_changed_pages == true but
+	// srv_redo_log_thread_started == false
 	if (srv_track_changed_pages && srv_redo_log_thread_started)
 		os_event_wait(srv_redo_log_tracked_event);
 
@@ -1826,6 +1829,9 @@ fil_close_log_files(
 {
 	fil_space_t*	space;
 
+	// Must check both flags as it's possible for this to be called during
+	// server startup with srv_track_changed_pages == true but
+	// srv_redo_log_thread_started == false
 	if (srv_track_changed_pages && srv_redo_log_thread_started)
 		os_event_wait(srv_redo_log_tracked_event);
 
diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc
index 5e008b37b8d..0507be04412 100644
--- a/storage/xtradb/fts/fts0fts.cc
+++ b/storage/xtradb/fts/fts0fts.cc
@@ -265,13 +265,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in,out]	sync		sync state
 @param[in]	unlock_cache	whether unlock cache lock when write node
 @param[in]	wait		whether wait when a sync is in progress
+@param[in]      has_dict        whether has dict operation lock
 @return DB_SUCCESS if all OK */
 static
 dberr_t
 fts_sync(
 	fts_sync_t*	sync,
 	bool		unlock_cache,
-	bool		wait);
+	bool		wait,
+	bool		has_dict);
 
 /****************************************************************//**
 Release all resources help by the words rb tree e.g., the node ilist. */
@@ -3567,7 +3569,7 @@ fts_add_doc_by_id(
 
 				DBUG_EXECUTE_IF(
 					"fts_instrument_sync_debug",
-					fts_sync(cache->sync, true, true);
+					fts_sync(cache->sync, true, true, false);
 				);
 
 				DEBUG_SYNC_C("fts_instrument_sync_request");
@@ -4379,13 +4381,11 @@ fts_sync_index(
 }
 
 /** Check if index cache has been synced completely
-@param[in,out]	sync		sync state
 @param[in,out]	index_cache	index cache
 @return true if index is synced, otherwise false. */
 static
 bool
 fts_sync_index_check(
-	fts_sync_t*		sync,
 	fts_index_cache_t*	index_cache)
 {
 	const ib_rbt_node_t*	rbt_node;
@@ -4408,14 +4408,36 @@ fts_sync_index_check(
 	return(true);
 }
 
-/*********************************************************************//**
-Commit the SYNC, change state of processed doc ids etc.
+/** Reset synced flag in index cache when rollback
+@param[in,out]	index_cache	index cache */
+static
+void
+fts_sync_index_reset(
+	fts_index_cache_t*	index_cache)
+{
+	const ib_rbt_node_t*	rbt_node;
+
+	for (rbt_node = rbt_first(index_cache->words);
+	     rbt_node != NULL;
+	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
+
+		fts_tokenizer_word_t*	word;
+		word = rbt_value(fts_tokenizer_word_t, rbt_node);
+
+		fts_node_t*	fts_node;
+		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
+
+		fts_node->synced = false;
+	}
+}
+
+/** Commit the SYNC, change state of processed doc ids etc.
+@param[in,out]	sync	sync state
 @return DB_SUCCESS if all OK */
 static  MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 fts_sync_commit(
-/*============*/
-	fts_sync_t*	sync)			/*!< in: sync state */
+	fts_sync_t*	sync)
 {
 	dberr_t		error;
 	trx_t*		trx = sync->trx;
@@ -4468,6 +4490,8 @@ fts_sync_commit(
 			(double) n_nodes/ (double) elapsed_time);
 	}
 
+	/* Avoid assertion in trx_free(). */
+	trx->dict_operation_lock_mode = 0;
 	trx_free_for_background(trx);
 
 	return(error);
@@ -4490,6 +4514,10 @@ fts_sync_rollback(
 		index_cache = static_cast<fts_index_cache_t*>(
 			ib_vector_get(cache->indexes, i));
 
+		/* Reset synced flag so nodes will not be skipped
+		in the next sync, see fts_sync_write_words(). */
+		fts_sync_index_reset(index_cache);
+
 		for (j = 0; fts_index_selector[j].value; ++j) {
 
 			if (index_cache->ins_graph[j] != NULL) {
@@ -4515,6 +4543,9 @@ fts_sync_rollback(
 	rw_lock_x_unlock(&cache->lock);
 
 	fts_sql_rollback(trx);
+
+	/* Avoid assertion in trx_free(). */
+	trx->dict_operation_lock_mode = 0;
 	trx_free_for_background(trx);
 }
 
@@ -4523,13 +4554,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in,out]	sync		sync state
 @param[in]	unlock_cache	whether unlock cache lock when write node
 @param[in]	wait		whether wait when a sync is in progress
+@param[in]      has_dict        whether has dict operation lock
 @return DB_SUCCESS if all OK */
 static
 dberr_t
 fts_sync(
 	fts_sync_t*	sync,
 	bool		unlock_cache,
-	bool		wait)
+	bool		wait,
+	bool		has_dict)
 {
 	ulint		i;
 	dberr_t		error = DB_SUCCESS;
@@ -4558,6 +4591,12 @@ fts_sync(
 	DEBUG_SYNC_C("fts_sync_begin");
 	fts_sync_begin(sync);
 
+	/* When sync in background, we hold dict operation lock
+	to prevent DDL like DROP INDEX, etc. */
+	if (has_dict) {
+		sync->trx->dict_operation_lock_mode = RW_S_LATCH;
+	}
+
 begin_sync:
 	if (cache->total_size > fts_max_cache_size) {
 		/* Avoid the case: sync never finish when
@@ -4598,7 +4637,7 @@ begin_sync:
 			ib_vector_get(cache->indexes, i));
 
 		if (index_cache->index->to_be_dropped
-		    || fts_sync_index_check(sync, index_cache)) {
+		    || fts_sync_index_check(index_cache)) {
 			continue;
 		}
 
@@ -4613,6 +4652,7 @@ end_sync:
 	}
 
 	rw_lock_x_lock(&cache->lock);
+	sync->interrupted = false;
 	sync->in_progress = false;
 	os_event_set(sync->event);
 	rw_lock_x_unlock(&cache->lock);
@@ -4636,20 +4676,23 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in,out]	table		fts table
 @param[in]	unlock_cache	whether unlock cache when write node
 @param[in]	wait		whether wait for existing sync to finish
+@param[in]	has_dict	whether has dict operation lock
 @return DB_SUCCESS on success, error code on failure. */
 UNIV_INTERN
 dberr_t
 fts_sync_table(
 	dict_table_t*	table,
 	bool		unlock_cache,
-	bool		wait)
+	bool		wait,
+	bool		has_dict)
 {
 	dberr_t	err = DB_SUCCESS;
 
 	ut_ad(table->fts);
 
 	if (!dict_table_is_discarded(table) && table->fts->cache) {
-		err = fts_sync(table->fts->cache->sync, unlock_cache, wait);
+		err = fts_sync(table->fts->cache->sync,
+			       unlock_cache, wait, has_dict);
 	}
 
 	return(err);
diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc
index d9f2532578e..ea937c20752 100644
--- a/storage/xtradb/fts/fts0opt.cc
+++ b/storage/xtradb/fts/fts0opt.cc
@@ -2986,7 +2986,7 @@ fts_optimize_sync_table(
 
 	if (table) {
 		if (dict_table_has_fts_index(table) && table->fts->cache) {
-			fts_sync_table(table, true, false);
+			fts_sync_table(table, true, false, true);
 		}
 
 		dict_table_close(table, FALSE, FALSE);
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 14870659b0e..320b900d019 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -864,6 +864,19 @@ innobase_is_fake_change(
 	THD*		thd) __attribute__((unused));	/*!< in: MySQL thread handle of the user for
 				  whom the transaction is being committed */
 
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd		The thread handle
+@param path		Path to the table
+@param f_key_list[out]	The list of foreign keys
+
+@return error code or zero for success */
+static
+int
+innobase_get_parent_fk_list(
+	THD*			thd,
+	const char*		path,
+	List<FOREIGN_KEY_INFO>*	f_key_list);
 
 /******************************************************************//**
 Maps a MySQL trx isolation level code to the InnoDB isolation level code
@@ -8398,6 +8411,7 @@ dberr_t
 ha_innobase::innobase_lock_autoinc(void)
 /*====================================*/
 {
+	DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
 	dberr_t		error = DB_SUCCESS;
 
 	ut_ad(!srv_read_only_mode);
@@ -8437,6 +8451,8 @@ ha_innobase::innobase_lock_autoinc(void)
 		/* Fall through to old style locking. */
 
 	case AUTOINC_OLD_STYLE_LOCKING:
+		DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
+				ut_ad(0););
 		error = row_lock_table_autoinc_for_mysql(prebuilt);
 
 		if (error == DB_SUCCESS) {
@@ -8450,7 +8466,7 @@ ha_innobase::innobase_lock_autoinc(void)
 		ut_error;
 	}
 
-	return(error);
+	DBUG_RETURN(error);
 }
 
 /********************************************************************//**
@@ -14469,7 +14485,7 @@ ha_innobase::optimize(
 	if (innodb_optimize_fulltext_only) {
 		if (prebuilt->table->fts && prebuilt->table->fts->cache
 		    && !dict_table_is_discarded(prebuilt->table)) {
-			fts_sync_table(prebuilt->table, false, true);
+			fts_sync_table(prebuilt->table, false, true, false);
 			fts_optimize_table(prebuilt->table);
 		}
 		return(HA_ADMIN_OK);
@@ -14686,7 +14702,14 @@ ha_innobase::check(
 
 		prebuilt->select_lock_type = LOCK_NONE;
 
-		if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
+		bool check_result
+			= row_check_index_for_mysql(prebuilt, index, &n_rows);
+		DBUG_EXECUTE_IF(
+				"dict_set_index_corrupted",
+				if (!(index->type & DICT_CLUSTERED)) {
+					check_result = false;
+				});
+		if (!check_result) {
 			innobase_format_name(
 				index_name, sizeof index_name,
 				index->name, TRUE);
@@ -15013,6 +15036,75 @@ get_foreign_key_info(
 	return(pf_key_info);
 }
 
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd		The thread handle
+@param path		Path to the table
+@param f_key_list[out]	The list of foreign keys */
+static
+void
+fill_foreign_key_list(THD* thd,
+		      const dict_table_t* table,
+		      List<FOREIGN_KEY_INFO>* f_key_list)
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	for (dict_foreign_set::iterator it = table->referenced_set.begin();
+	     it != table->referenced_set.end(); ++it) {
+
+		dict_foreign_t* foreign = *it;
+
+		FOREIGN_KEY_INFO* pf_key_info
+			= get_foreign_key_info(thd, foreign);
+		if (pf_key_info) {
+			f_key_list->push_back(pf_key_info);
+		}
+	}
+}
+
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd		The thread handle
+@param path		Path to the table
+@param f_key_list[out]	The list of foreign keys
+
+@return error code or zero for success */
+static
+int
+innobase_get_parent_fk_list(
+	THD*			thd,
+	const char*		path,
+	List<FOREIGN_KEY_INFO>*	f_key_list)
+{
+	ut_a(strlen(path) <= FN_REFLEN);
+	char	norm_name[FN_REFLEN + 1];
+	normalize_table_name(norm_name, path);
+
+	trx_t*	parent_trx = check_trx_exists(thd);
+	parent_trx->op_info = "getting list of referencing foreign keys";
+	trx_search_latch_release_if_reserved(parent_trx);
+
+	mutex_enter(&dict_sys->mutex);
+
+	dict_table_t*	table
+		= dict_table_open_on_name(norm_name, TRUE, FALSE,
+					  static_cast<dict_err_ignore_t>(
+						  DICT_ERR_IGNORE_INDEX_ROOT
+						  | DICT_ERR_IGNORE_CORRUPT));
+	if (!table) {
+		mutex_exit(&dict_sys->mutex);
+		return(HA_ERR_NO_SUCH_TABLE);
+	}
+
+	fill_foreign_key_list(thd, table, f_key_list);
+
+	dict_table_close(table, TRUE, FALSE);
+
+	mutex_exit(&dict_sys->mutex);
+	parent_trx->op_info = "";
+	return(0);
+}
+
 /*******************************************************************//**
 Gets the list of foreign keys in this table.
 @return always 0, that is, always succeeds */
@@ -15065,9 +15157,6 @@ ha_innobase::get_parent_foreign_key_list(
 	THD*			thd,		/*!< in: user thread handle */
 	List<FOREIGN_KEY_INFO>*	f_key_list)	/*!< out: foreign key list */
 {
-	FOREIGN_KEY_INFO*	pf_key_info;
-	dict_foreign_t*		foreign;
-
 	ut_a(prebuilt != NULL);
 	update_thd(ha_thd());
 
@@ -15076,20 +15165,7 @@ ha_innobase::get_parent_foreign_key_list(
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 
 	mutex_enter(&(dict_sys->mutex));
-
-	for (dict_foreign_set::iterator it
-		= prebuilt->table->referenced_set.begin();
-	     it != prebuilt->table->referenced_set.end();
-	     ++it) {
-
-		foreign = *it;
-
-		pf_key_info = get_foreign_key_info(thd, foreign);
-		if (pf_key_info) {
-			f_key_list->push_back(pf_key_info);
-		}
-	}
-
+	fill_foreign_key_list(thd, prebuilt->table, f_key_list);
 	mutex_exit(&(dict_sys->mutex));
 
 	prebuilt->trx->op_info = "";
@@ -18892,7 +18968,6 @@ innodb_track_changed_pages_validate(
 						for update function */
 	struct st_mysql_value*		value)	/*!< in: incoming bool */
 {
-	static bool     enabled_on_startup = false;
 	long long	intbuf = 0;
 
 	if (value->val_int(value, &intbuf)) {
@@ -18900,8 +18975,7 @@ innodb_track_changed_pages_validate(
 		return 1;
 	}
 
-	if (srv_track_changed_pages || enabled_on_startup) {
-		enabled_on_startup = true;
+	if (srv_redo_log_thread_started) {
 		*reinterpret_cast<ulong*>(save)
 			= static_cast<ulong>(intbuf);
 		return 0;
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index d0e26f1352c..d96ff377b4a 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2016, Oracle and/or its affiliates.
 Copyrigth (c) 2014, 2016, MariaDB Corporation
 
 This program is free software; you can redistribute it and/or modify it under
@@ -2935,15 +2935,26 @@ i_s_fts_deleted_generic_fill(
 		DBUG_RETURN(0);
 	}
 
-	deleted = fts_doc_ids_create();
+	/* Prevent DDL to drop fts aux tables. */
+	rw_lock_s_lock(&dict_operation_lock);
 
 	user_table = dict_table_open_on_name(
 		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
+		rw_lock_s_unlock(&dict_operation_lock);
+
+		DBUG_RETURN(0);
+	} else if (!dict_table_has_fts_index(user_table)) {
+		dict_table_close(user_table, FALSE, FALSE);
+
+		rw_lock_s_unlock(&dict_operation_lock);
+
 		DBUG_RETURN(0);
 	}
 
+	deleted = fts_doc_ids_create();
+
 	trx = trx_allocate_for_background();
 	trx->op_info = "Select for FTS DELETE TABLE";
 
@@ -2971,6 +2982,8 @@ i_s_fts_deleted_generic_fill(
 
 	dict_table_close(user_table, FALSE, FALSE);
 
+	rw_lock_s_unlock(&dict_operation_lock);
+
 	DBUG_RETURN(0);
 }
 
@@ -3342,6 +3355,12 @@ i_s_fts_index_cache_fill(
 		DBUG_RETURN(0);
 	}
 
+	if (user_table->fts == NULL || user_table->fts->cache == NULL) {
+		dict_table_close(user_table, FALSE, FALSE);
+
+		DBUG_RETURN(0);
+	}
+
 	cache = user_table->fts->cache;
 
 	ut_a(cache);
@@ -3775,10 +3794,15 @@ i_s_fts_index_table_fill(
 		DBUG_RETURN(0);
 	}
 
+	/* Prevent DDL to drop fts aux tables. */
+	rw_lock_s_lock(&dict_operation_lock);
+
 	user_table = dict_table_open_on_name(
 		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
+		rw_lock_s_unlock(&dict_operation_lock);
+
 		DBUG_RETURN(0);
 	}
 
@@ -3791,6 +3815,8 @@ i_s_fts_index_table_fill(
 
 	dict_table_close(user_table, FALSE, FALSE);
 
+	rw_lock_s_unlock(&dict_operation_lock);
+
 	DBUG_RETURN(0);
 }
 
@@ -3925,14 +3951,21 @@ i_s_fts_config_fill(
 
 	fields = table->field;
 
+	/* Prevent DDL to drop fts aux tables. */
+	rw_lock_s_lock(&dict_operation_lock);
+
 	user_table = dict_table_open_on_name(
 		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
+		rw_lock_s_unlock(&dict_operation_lock);
+
 		DBUG_RETURN(0);
 	} else if (!dict_table_has_fts_index(user_table)) {
 		dict_table_close(user_table, FALSE, FALSE);
 
+		rw_lock_s_unlock(&dict_operation_lock);
+
 		DBUG_RETURN(0);
 	}
 
@@ -3988,6 +4021,8 @@ i_s_fts_config_fill(
 
 	dict_table_close(user_table, FALSE, FALSE);
 
+	rw_lock_s_unlock(&dict_operation_lock);
+
 	DBUG_RETURN(0);
 }
 
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index f599997be02..6924481af49 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -1084,10 +1084,20 @@ buf_block_get_frame(
 /*================*/
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
 	MY_ATTRIBUTE((pure));
-# define buf_block_get_frame_fast(block) buf_block_get_frame(block)
+
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block, where block is known not to be
+NULL.
+@return	pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_nonnull_block_get_frame(
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	MY_ATTRIBUTE((pure));
+
 #else /* UNIV_DEBUG */
 # define buf_block_get_frame(block) (block ? (block)->frame : 0)
-# define buf_block_get_frame_fast(block) (block)->frame
+# define buf_nonnull_block_get_frame(block) ((block)->frame)
 #endif /* UNIV_DEBUG */
 /*********************************************************************//**
 Gets the space id of a block.
diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic
index 7b1c66f2a05..20721b28ef2 100644
--- a/storage/xtradb/include/buf0buf.ic
+++ b/storage/xtradb/include/buf0buf.ic
@@ -744,6 +744,19 @@ buf_block_get_frame(
 
 	SRV_CORRUPT_TABLE_CHECK(block, return(0););
 
+	return(buf_nonnull_block_get_frame(block));
+}
+
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block, where block is known not to be
+NULL.
+@return	pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_nonnull_block_get_frame(
+/*========================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
 	switch (buf_block_get_state(block)) {
 	case BUF_BLOCK_POOL_WATCH:
 	case BUF_BLOCK_ZIP_PAGE:
@@ -768,6 +781,7 @@ buf_block_get_frame(
 ok:
 	return((buf_frame_t*) block->frame);
 }
+
 #endif /* UNIV_DEBUG */
 
 /*********************************************************************//**
diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h
index 68d4d333245..87b5787d416 100644
--- a/storage/xtradb/include/fts0fts.h
+++ b/storage/xtradb/include/fts0fts.h
@@ -840,13 +840,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in,out]	table		fts table
 @param[in]	unlock_cache	whether unlock cache when write node
 @param[in]	wait		whether wait for existing sync to finish
+@param[in]      has_dict        whether has dict operation lock
 @return DB_SUCCESS on success, error code on failure. */
 UNIV_INTERN
 dberr_t
 fts_sync_table(
 	dict_table_t*	table,
 	bool		unlock_cache,
-	bool		wait);
+	bool		wait,
+	bool		has_dict);
 
 /****************************************************************//**
 Free the query graph but check whether dict_sys->mutex is already
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index d95adf00814..f60cfde1264 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -225,8 +225,10 @@ extern os_event_t	srv_checkpoint_completed_event;
 log tracking iteration */
 extern os_event_t	srv_redo_log_tracked_event;
 
-/** srv_redo_log_follow_thread spawn flag */
-extern bool srv_redo_log_thread_started;
+/** Whether the redo log tracker thread has been started. Does not take into
+account whether the tracking is currently enabled (see srv_track_changed_pages
+for that) */
+extern bool		srv_redo_log_thread_started;
 
 /* If the last data file is auto-extended, we add this many pages to it
 at a time */
@@ -344,6 +346,10 @@ extern char**	srv_data_file_names;
 extern ulint*	srv_data_file_sizes;
 extern ulint*	srv_data_file_is_raw_partition;
 
+
+/** Whether the redo log tracking is currently enabled. Note that it is
+possible for the log tracker thread to be running and the tracking to be
+disabled */
 extern my_bool		srv_track_changed_pages;
 extern ulonglong	srv_max_bitmap_file_size;
 
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 5320776c042..a42b8b8bc25 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -45,10 +45,10 @@ Created 1/20/1994 Heikki Tuuri
 
 #define INNODB_VERSION_MAJOR	5
 #define INNODB_VERSION_MINOR	6
-#define INNODB_VERSION_BUGFIX	31
+#define INNODB_VERSION_BUGFIX	32
 
 #ifndef PERCONA_INNODB_VERSION
-#define PERCONA_INNODB_VERSION 77.0
+#define PERCONA_INNODB_VERSION 78.1
 #endif
 
 /* Enable UNIV_LOG_ARCHIVE in XtraDB */
diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc
index 0b5d27b8fd1..411fed91ac5 100644
--- a/storage/xtradb/log/log0log.cc
+++ b/storage/xtradb/log/log0log.cc
@@ -3752,7 +3752,7 @@ loop:
 
 		/* Wake the log tracking thread which will then immediatelly
 		quit because of srv_shutdown_state value */
-		if (srv_track_changed_pages) {
+		if (srv_redo_log_thread_started) {
 			os_event_reset(srv_redo_log_tracked_event);
 			os_event_set(srv_checkpoint_completed_event);
 		}
@@ -3831,7 +3831,7 @@ loop:
 	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
 
 	/* Signal the log following thread to quit */
-	if (srv_track_changed_pages) {
+	if (srv_redo_log_thread_started) {
 		os_event_reset(srv_redo_log_tracked_event);
 		os_event_set(srv_checkpoint_completed_event);
 	}
diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc
index 63f1ef39568..167d46e2ae8 100644
--- a/storage/xtradb/log/log0online.cc
+++ b/storage/xtradb/log/log0online.cc
@@ -1788,20 +1788,20 @@ log_online_purge_changed_page_bitmaps(
 		lsn = LSN_MAX;
 	}
 
-	if (srv_track_changed_pages) {
+	if (srv_redo_log_thread_started) {
 		/* User requests might happen with both enabled and disabled
 		tracking */
 		mutex_enter(&log_bmp_sys->mutex);
 	}
 
 	if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, LSN_MAX)) {
-		if (srv_track_changed_pages) {
+		if (srv_redo_log_thread_started) {
 			mutex_exit(&log_bmp_sys->mutex);
 		}
 		return TRUE;
 	}
 
-	if (srv_track_changed_pages && lsn > log_bmp_sys->end_lsn) {
+	if (srv_redo_log_thread_started && lsn > log_bmp_sys->end_lsn) {
 		/* If we have to delete the current output file, close it
 		first. */
 		os_file_close(log_bmp_sys->out.file);
@@ -1834,7 +1834,7 @@ log_online_purge_changed_page_bitmaps(
 		}
 	}
 
-	if (srv_track_changed_pages) {
+	if (srv_redo_log_thread_started) {
 		if (lsn > log_bmp_sys->end_lsn) {
 			lsn_t	new_file_lsn;
 			if (lsn == LSN_MAX) {
@@ -1845,9 +1845,7 @@ log_online_purge_changed_page_bitmaps(
 				new_file_lsn = log_bmp_sys->end_lsn;
 			}
 			if (!log_online_rotate_bitmap_file(new_file_lsn)) {
-				/* If file create failed, signal the log
-				tracking thread to quit next time it wakes
-				up.  */
+				/* If file create failed, stop log tracking */
 				srv_track_changed_pages = FALSE;
 			}
 		}
diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc
index 759687e3fe5..092c2ed88dc 100644
--- a/storage/xtradb/log/log0recv.cc
+++ b/storage/xtradb/log/log0recv.cc
@@ -392,12 +392,6 @@ recv_sys_init(
 	}
 
 #ifndef UNIV_HOTBACKUP
-	/* Initialize red-black tree for fast insertions into the
-	flush_list during recovery process.
-	As this initialization is done while holding the buffer pool
-	mutex we perform it before acquiring recv_sys->mutex. */
-	buf_flush_init_flush_rbt();
-
 	mutex_enter(&(recv_sys->mutex));
 
 	recv_sys->heap = mem_heap_create_typed(256,
@@ -490,9 +484,6 @@ recv_sys_debug_free(void)
 	recv_sys->last_block_buf_start = NULL;
 
 	mutex_exit(&(recv_sys->mutex));
-
-	/* Free up the flush_rbt. */
-	buf_flush_free_flush_rbt();
 }
 # endif /* UNIV_LOG_DEBUG */
 
@@ -3140,6 +3131,11 @@ recv_recovery_from_checkpoint_start_func(
 	byte*		log_hdr_buf_base = reinterpret_cast<byte *>
 		(alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 	dberr_t		err;
+
+	/* Initialize red-black tree for fast insertions into the
+	flush_list during recovery process. */
+	buf_flush_init_flush_rbt();
+
 	ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr);
 
 	log_hdr_buf = static_cast<byte *>
@@ -3568,6 +3564,9 @@ recv_recovery_from_checkpoint_finish(void)
 #ifndef UNIV_LOG_DEBUG
 	recv_sys_debug_free();
 #endif
+	/* Free up the flush_rbt. */
+	buf_flush_free_flush_rbt();
+
 	/* Roll back any recovered data dictionary transactions, so
 	that the data dictionary tables will be free of any locks.
 	The data dictionary latch should guarantee that there is at
diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc
index f5967ede3e7..3d7a5d2ef5d 100644
--- a/storage/xtradb/row/row0merge.cc
+++ b/storage/xtradb/row/row0merge.cc
@@ -2177,7 +2177,7 @@ wait_again:
 		/* Sync fts cache for other fts indexes to keep all
 		fts indexes consistent in sync_doc_id. */
 		err = fts_sync_table(const_cast<dict_table_t*>(new_table),
-				     false, true);
+				     false, true, false);
 
 		if (err == DB_SUCCESS) {
 			fts_update_next_doc_id(
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
index 1e0d21d4a9e..7c2e549e188 100644
--- a/storage/xtradb/srv/srv0mon.cc
+++ b/storage/xtradb/srv/srv0mon.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 Copyright (c) 2013, 2016, MariaDB Corporation.
 
@@ -1499,7 +1499,10 @@ srv_mon_set_module_control(
 				module */
 				set_current_module = FALSE;
 			} else if (module_id == MONITOR_ALL_COUNTER) {
-				continue;
+				if (!(innodb_counter_info[ix].monitor_type
+				      & MONITOR_GROUP_MODULE)) {
+					continue;
+				}
 			} else {
 				/* Hitting the next module, stop */
 				break;
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index a836442eb70..f9c75ffe576 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -219,6 +219,9 @@ UNIV_INTERN char**	srv_data_file_names = NULL;
 /* size in database pages */
 UNIV_INTERN ulint*	srv_data_file_sizes = NULL;
 
+/** Whether the redo log tracking is currently enabled. Note that it is
+possible for the log tracker thread to be running and the tracking to be
+disabled */
 UNIV_INTERN my_bool	srv_track_changed_pages = FALSE;
 
 UNIV_INTERN ulonglong	srv_max_bitmap_file_size = 100 * 1024 * 1024;
@@ -848,6 +851,9 @@ UNIV_INTERN os_event_t	srv_checkpoint_completed_event;
 
 UNIV_INTERN os_event_t	srv_redo_log_tracked_event;
 
+/** Whether the redo log tracker thread has been started. Does not take into
+account whether the tracking is currently enabled (see srv_track_changed_pages
+for that) */
 UNIV_INTERN bool	srv_redo_log_thread_started = false;
 
 /*********************************************************************//**
@@ -2546,13 +2552,8 @@ DECLARE_THREAD(srv_redo_log_follow_thread)(
 		os_event_wait(srv_checkpoint_completed_event);
 		os_event_reset(srv_checkpoint_completed_event);
 
-#ifdef UNIV_DEBUG
-		if (!srv_track_changed_pages) {
-			continue;
-		}
-#endif
-
-		if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
+		if (srv_track_changed_pages
+		    && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
 			if (!log_online_follow_redo_log()) {
 				/* TODO: sync with I_S log tracking status? */
 				ib_logf(IB_LOG_LEVEL_ERROR,
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index cae85f38c12..f2bcf69bbc6 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1,5 +1,5 @@
 /* Copyright (c) 2003, 2013, Oracle and/or its affiliates
-   Copyright (c) 2009, 2014, SkySQL Ab.
+   Copyright (c) 2009, 2016, MariaDB
 
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index e4eb2832dff..900e2d3500a 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1,5 +1,5 @@
 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
-   Copyright (c) 2009, 2013, Monty Program Ab
+   Copyright (c) 2009, 2016, MariaDB
 
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh
index e5cdbfd3ce8..54f9ff55e3b 100644
--- a/support-files/mysql.server.sh
+++ b/support-files/mysql.server.sh
@@ -308,7 +308,7 @@ case "$mode" in
     then
       # Give extra arguments to mysqld with the my.cnf file. This script
       # may be overwritten at next upgrade.
-      $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" >/dev/null 2>&1 &
+      $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" >/dev/null &
       wait_for_ready; return_value=$?
 
       # Make lock for RedHat / SuSE
diff --git a/tests/async_queries.c b/tests/async_queries.c
index 76e884e6a69..a8889fc8d5a 100644
--- a/tests/async_queries.c
+++ b/tests/async_queries.c
@@ -425,7 +425,7 @@ main(int argc, char *argv[])
 
   event_dispatch();
 
-  free(sds);
+  my_free(sds);
 
   mysql_library_end();
 
diff --git a/win/packaging/CMakeLists.txt b/win/packaging/CMakeLists.txt
index 0535a486d57..1682bae6986 100644
--- a/win/packaging/CMakeLists.txt
+++ b/win/packaging/CMakeLists.txt
@@ -24,10 +24,13 @@ ENDIF()
 
 
 SET(MANUFACTURER "MariaDB Corporation Ab")
-FIND_PATH(WIX_DIR heat.exe
-  "$ENV{ProgramFiles}/WiX Toolset v3.9/bin"
-  "$ENV{ProgramFiles}/WiX Toolset v3.10/bin"
-)
+SET(WIX_BIN_PATHS)
+FOREACH(WIX_VER 3.9 3.10 3.11)
+  LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles}/WiX Toolset v${WIX_VER}/bin")
+  LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles} (x86)/WiX Toolset v${WIX_VER}/bin")
+ENDFOREACH()
+
+FIND_PATH(WIX_DIR heat.exe ${WIX_BIN_PATHS})
 SET(CPACK_WIX_PACKAGE_BASE_NAME "MariaDB")
 IF(CMAKE_SIZEOF_VOID_P EQUAL 4)
  SET(CPACK_WIX_UPGRADE_CODE "49EB7A6A-1CEF-4A1E-9E89-B9A4993963E3")
diff --git a/win/packaging/create_msi.cmake.in b/win/packaging/create_msi.cmake.in
index c2ab648a6db..1f847a39695 100644
--- a/win/packaging/create_msi.cmake.in
+++ b/win/packaging/create_msi.cmake.in
@@ -434,6 +434,7 @@ EXECUTE_PROCESS(
 IF(SIGNCODE)
   EXECUTE_PROCESS(
   COMMAND ${SIGNTOOL_EXECUTABLE} sign ${SIGNTOOL_PARAMETERS} 
+  /d ${CPACK_PACKAGE_FILE_NAME}.msi
   ${CPACK_PACKAGE_FILE_NAME}.msi
 )
 ENDIF()
author	Sergei Golubchik <serg@mariadb.org>	2016-09-28 17:55:28 +0200
committer	Sergei Golubchik <serg@mariadb.org>	2016-09-28 17:55:28 +0200
commit	66d9696596edbc20ad36bf3d5bffb5595e8235c3 (patch)
tree	bbef37c9a90b63d25bee59386cac04298a13846f
parent	66a58f46e937cdc3d7e0529b52ad8b658d9b2cd4 (diff)
parent	23af6f5942e7235a7c14a36cb8dd0d2796b5ef37 (diff)
download	mariadb-git-66d9696596edbc20ad36bf3d5bffb5595e8235c3.tar.gz