summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CREDITS1
-rw-r--r--VERSION2
-rw-r--r--client/mysql.cc56
-rw-r--r--client/mysqldump.c60
-rw-r--r--client/mysqltest.cc4
-rw-r--r--cmake/cpack_rpm.cmake3
-rw-r--r--cmake/package_name.cmake4
-rw-r--r--extra/innochecksum.cc55
-rw-r--r--extra/yassl/README18
-rw-r--r--extra/yassl/certs/dsa-cert.pem38
-rw-r--r--extra/yassl/include/openssl/ssl.h2
-rw-r--r--extra/yassl/src/ssl.cpp60
-rw-r--r--extra/yassl/taocrypt/include/aes.hpp58
-rw-r--r--extra/yassl/taocrypt/include/integer.hpp3
-rw-r--r--extra/yassl/taocrypt/src/aes.cpp172
-rw-r--r--extra/yassl/taocrypt/src/asn.cpp24
-rw-r--r--extra/yassl/taocrypt/src/dsa.cpp16
-rw-r--r--extra/yassl/taocrypt/src/integer.cpp5
-rw-r--r--extra/yassl/taocrypt/test/test.cpp3
-rw-r--r--extra/yassl/testsuite/test.hpp2
-rw-r--r--include/byte_order_generic_x86.h10
-rw-r--r--include/byte_order_generic_x86_64.h8
-rw-r--r--include/my_global.h3
-rw-r--r--include/my_sys.h4
-rw-r--r--libmysql/libmysql.c5
-rw-r--r--mysql-test/extra/binlog_tests/database.test2
-rw-r--r--mysql-test/include/index_merge2.inc1
-rw-r--r--mysql-test/include/search_pattern_in_file.inc15
-rw-r--r--mysql-test/lib/My/CoreDump.pm6
-rw-r--r--mysql-test/lib/My/Platform.pm49
-rw-r--r--mysql-test/lib/mtr_cases.pm4
-rw-r--r--mysql-test/lib/mtr_io.pl9
-rw-r--r--mysql-test/lib/mtr_report.pm3
-rwxr-xr-xmysql-test/mysql-test-run.pl10
-rw-r--r--mysql-test/r/alter_table.result55
-rw-r--r--mysql-test/r/contributors.result1
-rw-r--r--mysql-test/r/create_or_replace.result11
-rw-r--r--mysql-test/r/ctype_utf32.result3
-rw-r--r--mysql-test/r/drop.result6
-rw-r--r--mysql-test/r/group_min_max_innodb.result16
-rw-r--r--mysql-test/r/index_merge_innodb.result3
-rw-r--r--mysql-test/r/index_merge_myisam.result3
-rw-r--r--mysql-test/r/information_schema.result8
-rw-r--r--mysql-test/r/lowercase_fs_on.result1
-rw-r--r--mysql-test/r/merge.result17
-rw-r--r--mysql-test/r/mysql.result8
-rw-r--r--mysql-test/r/mysql_not_windows.result6
-rw-r--r--mysql-test/r/mysqldump-nl.result126
-rw-r--r--mysql-test/r/mysqldump.result3
-rw-r--r--mysql-test/r/mysqltest.result6
-rw-r--r--mysql-test/r/named_pipe.result1
-rw-r--r--mysql-test/r/ps.result33
-rw-r--r--mysql-test/r/selectivity.result71
-rw-r--r--mysql-test/r/selectivity_innodb.result113
-rw-r--r--mysql-test/r/type_uint.result19
-rw-r--r--mysql-test/r/view.result1
-rw-r--r--mysql-test/r/wait_timeout_not_windows.result1
-rw-r--r--mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result1
-rw-r--r--mysql-test/suite/innodb/r/innodb_bug54044.result3
-rw-r--r--mysql-test/suite/innodb/r/system_tables.result8
-rw-r--r--mysql-test/suite/innodb/t/innodb_bug54044.test6
-rw-r--r--mysql-test/suite/innodb/t/system_tables.test12
-rw-r--r--mysql-test/suite/perfschema/r/aggregate.result121
-rw-r--r--mysql-test/suite/perfschema/t/aggregate.test197
-rw-r--r--mysql-test/suite/plugins/r/server_audit.result3
-rw-r--r--mysql-test/suite/plugins/r/thread_pool_server_audit.result3
-rw-r--r--mysql-test/suite/rpl/r/rpl_checksum.result1
-rw-r--r--mysql-test/suite/rpl/r/rpl_gtid_errorlog.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_stop_slave_error.result6
-rw-r--r--mysql-test/suite/rpl/t/rpl_drop_db.test4
-rw-r--r--mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt1
-rw-r--r--mysql-test/suite/rpl/t/rpl_stop_slave_error.test17
-rw-r--r--mysql-test/t/alter_table.test25
-rw-r--r--mysql-test/t/create_or_replace.test12
-rw-r--r--mysql-test/t/ctype_utf32.test5
-rw-r--r--mysql-test/t/drop.test9
-rw-r--r--mysql-test/t/group_min_max_innodb.test13
-rw-r--r--mysql-test/t/information_schema.test8
-rw-r--r--mysql-test/t/merge.test13
-rw-r--r--mysql-test/t/mysql.test8
-rw-r--r--mysql-test/t/mysql_not_windows.test9
-rw-r--r--mysql-test/t/mysqldump-nl.test38
-rw-r--r--mysql-test/t/mysqltest.test9
-rw-r--r--mysql-test/t/ps.test29
-rw-r--r--mysql-test/t/selectivity.test52
-rw-r--r--mysql-test/t/selectivity_innodb.test25
-rw-r--r--mysql-test/t/type_uint.test7
-rw-r--r--mysql-test/unstable-tests92
-rw-r--r--mysql-test/valgrind.supp121
-rw-r--r--mysys/my_fopen.c6
-rw-r--r--mysys/my_redel.c7
-rw-r--r--mysys/my_static.c1
-rw-r--r--plugin/feedback/utils.cc13
-rw-r--r--plugin/server_audit/server_audit.c5
-rw-r--r--scripts/mysqld_safe.sh6
-rw-r--r--sql/contributors.h1
-rw-r--r--sql/field.cc2
-rw-r--r--sql/item.cc23
-rw-r--r--sql/item_subselect.cc4
-rw-r--r--sql/log.cc2
-rw-r--r--sql/mysqld.cc1
-rw-r--r--sql/net_serv.cc4
-rw-r--r--sql/opt_range.cc9
-rw-r--r--sql/parse_file.h6
-rw-r--r--sql/signal_handler.cc4
-rw-r--r--sql/slave.cc9
-rw-r--r--sql/sql_admin.cc14
-rw-r--r--sql/sql_base.cc1
-rw-r--r--sql/sql_class.cc2
-rw-r--r--sql/sql_class.h8
-rw-r--r--sql/sql_db.cc26
-rw-r--r--sql/sql_parse.cc12
-rw-r--r--sql/sql_plugin.cc151
-rw-r--r--sql/sql_select.cc30
-rw-r--r--sql/sql_statistics.cc15
-rw-r--r--sql/sql_statistics.h5
-rw-r--r--sql/sql_table.cc4
-rw-r--r--sql/table_cache.cc2
-rw-r--r--sql/threadpool_common.cc98
-rw-r--r--storage/connect/JdbcInterface.java16
-rw-r--r--storage/connect/filamdbf.cpp86
-rw-r--r--storage/connect/filamdbf.h2
-rw-r--r--storage/connect/ha_connect.cc84
-rw-r--r--storage/connect/jdbconn.cpp249
-rw-r--r--storage/connect/jdbconn.h1
-rw-r--r--storage/connect/json.cpp2
-rw-r--r--storage/connect/reldef.cpp8
-rw-r--r--storage/connect/tabjdbc.cpp19
-rw-r--r--storage/innobase/dict/dict0stats.cc29
-rw-r--r--storage/innobase/fts/fts0fts.cc31
-rw-r--r--storage/innobase/handler/ha_innodb.cc9
-rw-r--r--storage/innobase/handler/handler0alter.cc9
-rw-r--r--storage/innobase/handler/i_s.cc2
-rw-r--r--storage/innobase/include/fts0fts.h10
-rw-r--r--storage/innobase/include/univ.i2
-rw-r--r--storage/innobase/row/row0log.cc14
-rw-r--r--storage/innobase/row/row0mysql.cc34
-rw-r--r--storage/oqgraph/graphcore.cc2
-rw-r--r--storage/oqgraph/oqgraph_shim.h48
-rw-r--r--storage/perfschema/ha_perfschema.cc2
-rw-r--r--storage/tokudb/CMakeLists.txt2
-rw-r--r--storage/tokudb/PerconaFT/buildheader/make_tdb.cc7
-rw-r--r--storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake4
-rw-r--r--storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake3
-rw-r--r--storage/tokudb/PerconaFT/ft/CMakeLists.txt2
-rw-r--r--storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h2
-rw-r--r--storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc16
-rw-r--r--storage/tokudb/PerconaFT/ft/cachetable/cachetable.h6
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-flusher.cc4
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.cc366
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.h2
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-recount-rows.cc29
-rw-r--r--storage/tokudb/PerconaFT/ft/ft.cc17
-rw-r--r--storage/tokudb/PerconaFT/ft/ft.h6
-rw-r--r--storage/tokudb/PerconaFT/ft/loader/loader-internal.h2
-rw-r--r--storage/tokudb/PerconaFT/ft/loader/loader.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/logformat.cc9
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/recover.cc78
-rw-r--r--storage/tokudb/PerconaFT/ft/node.cc125
-rw-r--r--storage/tokudb/PerconaFT/ft/node.h1
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc473
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_allocator.h162
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc224
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_table.cc632
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_table.h143
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/compress.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc350
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc69
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc833
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h355
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc126
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc380
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc403
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc281
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc231
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc831
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/ft-test.cc11
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc4
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc6
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc (renamed from storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h)86
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc103
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/roll.cc120
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc23
-rw-r--r--storage/tokudb/PerconaFT/ft/ule.cc4
-rw-r--r--storage/tokudb/PerconaFT/portability/CMakeLists.txt3
-rw-r--r--storage/tokudb/PerconaFT/portability/file.cc6
-rw-r--r--storage/tokudb/PerconaFT/portability/huge_page_detection.cc6
-rw-r--r--storage/tokudb/PerconaFT/portability/memory.cc9
-rw-r--r--storage/tokudb/PerconaFT/portability/memory.h4
-rw-r--r--storage/tokudb/PerconaFT/portability/portability.cc9
-rw-r--r--storage/tokudb/PerconaFT/portability/tests/test-max-data.cc2
-rw-r--r--storage/tokudb/PerconaFT/portability/tests/test-xid.cc9
-rw-r--r--storage/tokudb/PerconaFT/portability/toku_config.h.in2
-rw-r--r--storage/tokudb/PerconaFT/portability/toku_portability.h2
-rw-r--r--storage/tokudb/PerconaFT/portability/toku_time.h8
-rw-r--r--storage/tokudb/PerconaFT/src/indexer-internal.h2
-rw-r--r--storage/tokudb/PerconaFT/src/indexer-undo-do.cc4
-rw-r--r--storage/tokudb/PerconaFT/src/tests/CMakeLists.txt42
-rw-r--r--storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc153
-rw-r--r--storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc4
-rw-r--r--storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/test_stress0.cc2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc3
-rw-r--r--storage/tokudb/PerconaFT/src/ydb-internal.h5
-rw-r--r--storage/tokudb/PerconaFT/src/ydb.cc50
-rw-r--r--storage/tokudb/PerconaFT/src/ydb_db.cc99
-rw-r--r--storage/tokudb/PerconaFT/src/ydb_db.h16
-rw-r--r--storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess449
-rw-r--r--storage/tokudb/PerconaFT/tools/CMakeLists.txt3
-rw-r--r--storage/tokudb/PerconaFT/tools/ba_replay.cc629
-rw-r--r--storage/tokudb/PerconaFT/tools/ftverify.cc2
-rw-r--r--storage/tokudb/PerconaFT/tools/tokuftdump.cc1
-rw-r--r--storage/tokudb/PerconaFT/util/tests/x1764-test.cc2
-rw-r--r--storage/tokudb/ha_tokudb.cc26
-rw-r--r--storage/tokudb/ha_tokudb_admin.cc278
-rw-r--r--storage/tokudb/hatoku_defines.h7
-rw-r--r--storage/tokudb/hatoku_hton.cc1
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result51
-rw-r--r--storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test4
-rw-r--r--storage/tokudb/mysql-test/tokudb/disabled.def1
-rw-r--r--storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc1
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/background_job_manager.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result10
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/dir_per_db.result180
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result12
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/row_format.result51
-rw-r--r--storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result1
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt1
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test16
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/dir_per_db.test76
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc9
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test29
-rw-r--r--storage/tokudb/mysql-test/tokudb/t/row_format.test41
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/r/db938.result2
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/db938.test4
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test26
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test26
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test26
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test41
-rw-r--r--storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test41
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc1
-rw-r--r--storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test4
-rw-r--r--storage/tokudb/tokudb_background.cc27
-rw-r--r--storage/tokudb/tokudb_background.h49
-rw-r--r--storage/tokudb/tokudb_information_schema.cc47
-rw-r--r--storage/tokudb/tokudb_sysvars.cc14
-rw-r--r--storage/tokudb/tokudb_sysvars.h1
-rw-r--r--storage/xtradb/btr/btr0btr.cc16
-rw-r--r--storage/xtradb/btr/btr0cur.cc4
-rw-r--r--storage/xtradb/buf/buf0buf.cc4
-rw-r--r--storage/xtradb/buf/buf0dblwr.cc2
-rw-r--r--storage/xtradb/buf/buf0flu.cc7
-rw-r--r--storage/xtradb/dict/dict0boot.cc4
-rw-r--r--storage/xtradb/dict/dict0crea.cc583
-rw-r--r--storage/xtradb/dict/dict0dict.cc158
-rw-r--r--storage/xtradb/dict/dict0load.cc159
-rw-r--r--storage/xtradb/dict/dict0stats.cc29
-rw-r--r--storage/xtradb/fil/fil0fil.cc8
-rw-r--r--storage/xtradb/fts/fts0fts.cc98
-rw-r--r--storage/xtradb/fts/fts0opt.cc2
-rw-r--r--storage/xtradb/handler/ha_innodb.cc571
-rw-r--r--storage/xtradb/handler/ha_innodb.h37
-rw-r--r--storage/xtradb/handler/handler0alter.cc75
-rw-r--r--storage/xtradb/handler/i_s.cc41
-rw-r--r--storage/xtradb/handler/xtradb_i_s.cc354
-rw-r--r--storage/xtradb/handler/xtradb_i_s.h2
-rw-r--r--storage/xtradb/ibuf/ibuf0ibuf.cc4
-rw-r--r--storage/xtradb/include/buf0buf.h14
-rw-r--r--storage/xtradb/include/buf0buf.ic14
-rw-r--r--storage/xtradb/include/data0type.h14
-rw-r--r--storage/xtradb/include/data0type.ic16
-rw-r--r--storage/xtradb/include/dict0boot.h32
-rw-r--r--storage/xtradb/include/dict0crea.h91
-rw-r--r--storage/xtradb/include/dict0dict.h46
-rw-r--r--storage/xtradb/include/dict0load.h29
-rw-r--r--storage/xtradb/include/fts0fts.h14
-rw-r--r--storage/xtradb/include/os0thread.h15
-rw-r--r--storage/xtradb/include/rem0types.h3
-rw-r--r--storage/xtradb/include/row0mysql.h85
-rw-r--r--storage/xtradb/include/srv0srv.h15
-rw-r--r--storage/xtradb/include/univ.i4
-rw-r--r--storage/xtradb/log/log0log.cc24
-rw-r--r--storage/xtradb/log/log0online.cc45
-rw-r--r--storage/xtradb/log/log0recv.cc17
-rw-r--r--storage/xtradb/mach/mach0data.cc13
-rw-r--r--storage/xtradb/os/os0thread.cc33
-rw-r--r--storage/xtradb/rem/rem0rec.cc23
-rw-r--r--storage/xtradb/row/row0ftsort.cc2
-rw-r--r--storage/xtradb/row/row0log.cc14
-rw-r--r--storage/xtradb/row/row0merge.cc20
-rw-r--r--storage/xtradb/row/row0mysql.cc634
-rw-r--r--storage/xtradb/row/row0sel.cc45
-rw-r--r--storage/xtradb/srv/srv0mon.cc7
-rw-r--r--storage/xtradb/srv/srv0srv.cc15
-rw-r--r--storage/xtradb/srv/srv0start.cc6
-rw-r--r--strings/ctype-ucs2.c2
-rw-r--r--strings/ctype-utf8.c2
-rw-r--r--support-files/mysql.server.sh2
-rw-r--r--tests/async_queries.c2
-rw-r--r--win/packaging/CMakeLists.txt11
-rw-r--r--win/packaging/create_msi.cmake.in1
307 files changed, 10671 insertions, 5195 deletions
diff --git a/CREDITS b/CREDITS
index f0e6de7f08f..35ab4d48a8f 100644
--- a/CREDITS
+++ b/CREDITS
@@ -10,6 +10,7 @@ Visma http://visma.com (2015 - 2016)
Acronis http://acronis.com (2016)
Nexedi https://www.nexedi.com (2016)
Automattic https://automattic.com (2014 - 2016)
+Tencent Game DBA http://tencentdba.com/about (2016)
Verkkokauppa.com https://www.verkkokauppa.com (2015 - 2016)
Virtuozzo https://virtuozzo.com (2016)
diff --git a/VERSION b/VERSION
index e748b1bda54..a82a4e4d77d 100644
--- a/VERSION
+++ b/VERSION
@@ -1,3 +1,3 @@
MYSQL_VERSION_MAJOR=10
MYSQL_VERSION_MINOR=0
-MYSQL_VERSION_PATCH=27
+MYSQL_VERSION_PATCH=28
diff --git a/client/mysql.cc b/client/mysql.cc
index 89f9a75ec11..4b20e4d98cb 100644
--- a/client/mysql.cc
+++ b/client/mysql.cc
@@ -245,7 +245,8 @@ static void end_pager();
static void init_tee(const char *);
static void end_tee();
static const char* construct_prompt();
-static char *get_arg(char *line, my_bool get_next_arg);
+enum get_arg_mode { CHECK, GET, GET_NEXT};
+static char *get_arg(char *line, get_arg_mode mode);
static void init_username();
static void add_int_to_prompt(int toadd);
static int get_result_width(MYSQL_RES *res);
@@ -2257,7 +2258,7 @@ static COMMANDS *find_command(char *name)
if (!my_strnncoll(&my_charset_latin1, (uchar*) name, len,
(uchar*) commands[i].name, len) &&
(commands[i].name[len] == '\0') &&
- (!end || commands[i].takes_params))
+ (!end || (commands[i].takes_params && get_arg(name, CHECK))))
{
index= i;
break;
@@ -3177,7 +3178,7 @@ com_charset(String *buffer __attribute__((unused)), char *line)
char buff[256], *param;
CHARSET_INFO * new_cs;
strmake_buf(buff, line);
- param= get_arg(buff, 0);
+ param= get_arg(buff, GET);
if (!param || !*param)
{
return put_info("Usage: \\C charset_name | charset charset_name",
@@ -4263,12 +4264,12 @@ com_connect(String *buffer, char *line)
#ifdef EXTRA_DEBUG
tmp[1]= 0;
#endif
- tmp= get_arg(buff, 0);
+ tmp= get_arg(buff, GET);
if (tmp && *tmp)
{
my_free(current_db);
current_db= my_strdup(tmp, MYF(MY_WME));
- tmp= get_arg(buff, 1);
+ tmp= get_arg(buff, GET_NEXT);
if (tmp)
{
my_free(current_host);
@@ -4371,7 +4372,7 @@ com_delimiter(String *buffer __attribute__((unused)), char *line)
char buff[256], *tmp;
strmake_buf(buff, line);
- tmp= get_arg(buff, 0);
+ tmp= get_arg(buff, GET);
if (!tmp || !*tmp)
{
@@ -4402,7 +4403,7 @@ com_use(String *buffer __attribute__((unused)), char *line)
bzero(buff, sizeof(buff));
strmake_buf(buff, line);
- tmp= get_arg(buff, 0);
+ tmp= get_arg(buff, GET);
if (!tmp || !*tmp)
{
put_info("USE must be followed by a database name", INFO_ERROR);
@@ -4487,23 +4488,22 @@ com_nowarnings(String *buffer __attribute__((unused)),
}
/*
- Gets argument from a command on the command line. If get_next_arg is
- not defined, skips the command and returns the first argument. The
- line is modified by adding zero to the end of the argument. If
- get_next_arg is defined, then the function searches for end of string
- first, after found, returns the next argument and adds zero to the
- end. If you ever wish to use this feature, remember to initialize all
- items in the array to zero first.
+ Gets argument from a command on the command line. If mode is not GET_NEXT,
+ skips the command and returns the first argument. The line is modified by
+ adding zero to the end of the argument. If mode is GET_NEXT, then the
+ function searches for end of string first, after found, returns the next
+ argument and adds zero to the end. If you ever wish to use this feature,
+ remember to initialize all items in the array to zero first.
*/
-char *get_arg(char *line, my_bool get_next_arg)
+static char *get_arg(char *line, get_arg_mode mode)
{
char *ptr, *start;
- my_bool quoted= 0, valid_arg= 0;
+ bool short_cmd= false;
char qtype= 0;
ptr= line;
- if (get_next_arg)
+ if (mode == GET_NEXT)
{
for (; *ptr; ptr++) ;
if (*(ptr + 1))
@@ -4514,7 +4514,7 @@ char *get_arg(char *line, my_bool get_next_arg)
/* skip leading white spaces */
while (my_isspace(charset_info, *ptr))
ptr++;
- if (*ptr == '\\') // short command was used
+ if ((short_cmd= *ptr == '\\')) // short command was used
ptr+= 2;
else
while (*ptr &&!my_isspace(charset_info, *ptr)) // skip command
@@ -4527,24 +4527,28 @@ char *get_arg(char *line, my_bool get_next_arg)
if (*ptr == '\'' || *ptr == '\"' || *ptr == '`')
{
qtype= *ptr;
- quoted= 1;
ptr++;
}
for (start=ptr ; *ptr; ptr++)
{
- if (*ptr == '\\' && ptr[1]) // escaped character
+ if ((*ptr == '\\' && ptr[1]) || // escaped character
+ (!short_cmd && qtype && *ptr == qtype && ptr[1] == qtype)) // quote
{
- // Remove the backslash
- strmov_overlapp(ptr, ptr+1);
+ // Remove (or skip) the backslash (or a second quote)
+ if (mode != CHECK)
+ strmov_overlapp(ptr, ptr+1);
+ else
+ ptr++;
}
- else if ((!quoted && *ptr == ' ') || (quoted && *ptr == qtype))
+ else if (*ptr == (qtype ? qtype : ' '))
{
- *ptr= 0;
+ qtype= 0;
+ if (mode != CHECK)
+ *ptr= 0;
break;
}
}
- valid_arg= ptr != start;
- return valid_arg ? start : NullS;
+ return ptr != start && !qtype ? start : NullS;
}
diff --git a/client/mysqldump.c b/client/mysqldump.c
index 153761ed510..64ed21ac7fc 100644
--- a/client/mysqldump.c
+++ b/client/mysqldump.c
@@ -575,9 +575,7 @@ static int dump_all_tablespaces();
static int dump_tablespaces_for_tables(char *db, char **table_names, int tables);
static int dump_tablespaces_for_databases(char** databases);
static int dump_tablespaces(char* ts_where);
-static void print_comment(FILE *sql_file, my_bool is_error, const char *format,
- ...);
-
+static void print_comment(FILE *, my_bool, const char *, ...);
/*
Print the supplied message if in verbose mode
@@ -655,6 +653,30 @@ static void short_usage(FILE *f)
}
+/** returns a string fixed to be safely printed inside a -- comment
+
+ that is, any new line in it gets prefixed with --
+*/
+static const char *fix_for_comment(const char *ident)
+{
+ static char buf[1024];
+ char c, *s= buf;
+
+ while ((c= *s++= *ident++))
+ {
+ if (s >= buf + sizeof(buf) - 10)
+ {
+ strmov(s, "...");
+ break;
+ }
+ if (c == '\n')
+ s= strmov(s, "-- ");
+ }
+
+ return buf;
+}
+
+
static void write_header(FILE *sql_file, char *db_name)
{
if (opt_xml)
@@ -677,8 +699,8 @@ static void write_header(FILE *sql_file, char *db_name)
DUMP_VERSION, MYSQL_SERVER_VERSION, SYSTEM_TYPE,
MACHINE_TYPE);
print_comment(sql_file, 0, "-- Host: %s Database: %s\n",
- current_host ? current_host : "localhost",
- db_name ? db_name : "");
+ fix_for_comment(current_host ? current_host : "localhost"),
+ fix_for_comment(db_name ? db_name : ""));
print_comment(sql_file, 0,
"-- ------------------------------------------------------\n"
);
@@ -2224,7 +2246,8 @@ static uint dump_events_for_db(char *db)
/* nice comments */
print_comment(sql_file, 0,
- "\n--\n-- Dumping events for database '%s'\n--\n", db);
+ "\n--\n-- Dumping events for database '%s'\n--\n",
+ fix_for_comment(db));
/*
not using "mysql_query_with_error_report" because we may have not
@@ -2436,7 +2459,8 @@ static uint dump_routines_for_db(char *db)
/* nice comments */
print_comment(sql_file, 0,
- "\n--\n-- Dumping routines for database '%s'\n--\n", db);
+ "\n--\n-- Dumping routines for database '%s'\n--\n",
+ fix_for_comment(db));
/*
not using "mysql_query_with_error_report" because we may have not
@@ -2731,11 +2755,11 @@ static uint get_table_structure(char *table, char *db, char *table_type,
if (strcmp (table_type, "VIEW") == 0) /* view */
print_comment(sql_file, 0,
"\n--\n-- Temporary table structure for view %s\n--\n\n",
- result_table);
+ fix_for_comment(result_table));
else
print_comment(sql_file, 0,
"\n--\n-- Table structure for table %s\n--\n\n",
- result_table);
+ fix_for_comment(result_table));
if (opt_drop)
{
@@ -2977,7 +3001,7 @@ static uint get_table_structure(char *table, char *db, char *table_type,
print_comment(sql_file, 0,
"\n--\n-- Table structure for table %s\n--\n\n",
- result_table);
+ fix_for_comment(result_table));
if (opt_drop)
fprintf(sql_file, "DROP TABLE IF EXISTS %s;\n", result_table);
if (!opt_xml)
@@ -3684,21 +3708,21 @@ static void dump_table(char *table, char *db)
{
print_comment(md_result_file, 0,
"\n--\n-- Dumping data for table %s\n--\n",
- result_table);
+ fix_for_comment(result_table));
dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ * FROM ");
dynstr_append_checked(&query_string, result_table);
if (where)
{
- print_comment(md_result_file, 0, "-- WHERE: %s\n", where);
+ print_comment(md_result_file, 0, "-- WHERE: %s\n", fix_for_comment(where));
dynstr_append_checked(&query_string, " WHERE ");
dynstr_append_checked(&query_string, where);
}
if (order_by)
{
- print_comment(md_result_file, 0, "-- ORDER BY: %s\n", order_by);
+ print_comment(md_result_file, 0, "-- ORDER BY: %s\n", fix_for_comment(order_by));
dynstr_append_checked(&query_string, " ORDER BY ");
dynstr_append_checked(&query_string, order_by);
@@ -4208,7 +4232,7 @@ static int dump_tablespaces(char* ts_where)
if (first)
{
print_comment(md_result_file, 0, "\n--\n-- Logfile group: %s\n--\n",
- row[0]);
+ fix_for_comment(row[0]));
fprintf(md_result_file, "\nCREATE");
}
@@ -4277,7 +4301,8 @@ static int dump_tablespaces(char* ts_where)
first= 1;
if (first)
{
- print_comment(md_result_file, 0, "\n--\n-- Tablespace: %s\n--\n", row[0]);
+ print_comment(md_result_file, 0, "\n--\n-- Tablespace: %s\n--\n",
+ fix_for_comment(row[0]));
fprintf(md_result_file, "\nCREATE");
}
else
@@ -4481,7 +4506,8 @@ static int init_dumping(char *database, int init_func(char*))
char *qdatabase= quote_name(database,quoted_database_buf,opt_quoted);
print_comment(md_result_file, 0,
- "\n--\n-- Current Database: %s\n--\n", qdatabase);
+ "\n--\n-- Current Database: %s\n--\n",
+ fix_for_comment(qdatabase));
/* Call the view or table specific function */
init_func(qdatabase);
@@ -5672,7 +5698,7 @@ static my_bool get_view_structure(char *table, char* db)
print_comment(sql_file, 0,
"\n--\n-- Final view structure for view %s\n--\n\n",
- result_table);
+ fix_for_comment(result_table));
/* Table might not exist if this view was dumped with --tab. */
fprintf(sql_file, "/*!50001 DROP TABLE IF EXISTS %s*/;\n", opt_quoted_table);
diff --git a/client/mysqltest.cc b/client/mysqltest.cc
index 66bcb6462e7..dede6527d11 100644
--- a/client/mysqltest.cc
+++ b/client/mysqltest.cc
@@ -3373,10 +3373,6 @@ void do_exec(struct st_command *command)
#endif
#endif
- /* exec command is interpreted externally and will not take newlines */
- while(replace(&ds_cmd, "\n", 1, " ", 1) == 0)
- ;
-
DBUG_PRINT("info", ("Executing '%s' as '%s'",
command->first_argument, ds_cmd.str));
diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake
index 174548502d8..00f21c1cd8b 100644
--- a/cmake/cpack_rpm.cmake
+++ b/cmake/cpack_rpm.cmake
@@ -221,6 +221,9 @@ SETA(CPACK_RPM_test_PACKAGE_PROVIDES
"perl(mtr_io.pl)"
"perl(mtr_match)"
"perl(mtr_misc.pl)"
+ "perl(mtr_gcov.pl)"
+ "perl(mtr_gprof.pl)"
+ "perl(mtr_process.pl)"
"perl(mtr_report)"
"perl(mtr_results)"
"perl(mtr_unique)")
diff --git a/cmake/package_name.cmake b/cmake/package_name.cmake
index 87db39d68d4..30f5199441f 100644
--- a/cmake/package_name.cmake
+++ b/cmake/package_name.cmake
@@ -30,6 +30,10 @@ IF(NOT VERSION)
SET(64BIT 1)
ENDIF()
+ IF(NOT 64BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^mips64")
+ SET(DEFAULT_MACHINE "mips")
+ ENDIF()
+
IF(CMAKE_SYSTEM_NAME MATCHES "Windows")
SET(NEED_DASH_BETWEEN_PLATFORM_AND_MACHINE 0)
SET(DEFAULT_PLATFORM "win")
diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc
index 6018a4884ea..c09458630c8 100644
--- a/extra/innochecksum.cc
+++ b/extra/innochecksum.cc
@@ -243,10 +243,9 @@ int main(int argc, char **argv)
time_t lastt; /* last time */
ulint oldcsum, oldcsumfield, csum, csumfield, crc32, logseq, logseqfield;
/* ulints for checksum storage */
- struct stat st; /* for stat, if you couldn't guess */
unsigned long long int size; /* size of file (has to be 64 bits) */
ulint pages; /* number of pages in file */
- off_t offset= 0;
+ long long offset= 0;
int fd;
printf("InnoDB offline file checksum utility.\n");
@@ -269,6 +268,47 @@ int main(int argc, char **argv)
goto error;
}
+#ifdef _WIN32
+ /* Switch off OS file buffering for the file. */
+
+ HANDLE h = CreateFile(filename, GENERIC_READ,
+ FILE_SHARE_READ|FILE_SHARE_WRITE, 0,
+ OPEN_EXISTING, FILE_FLAG_NO_BUFFERING, 0);
+
+ if (!h)
+ {
+ fprintf(stderr, "Error; cant open file\n");
+ goto error;
+ }
+
+ if (!GetFileSizeEx(h, (LARGE_INTEGER *)&size))
+ {
+ fprintf(stderr, "Error; GetFileSize() failed\n");
+ goto error;
+ }
+
+ fd = _open_osfhandle ((intptr_t) h, _O_RDONLY);
+ if (fd < 0)
+ {
+ fprintf(stderr, "Error; _open_osfhandle() failed\n");
+ goto error;
+ }
+
+ f = _fdopen(fd, "rb");
+ if (!f)
+ {
+ fprintf(stderr, "Error; fdopen() failed\n");
+ goto error;
+ }
+
+ /*
+ Disable stdio buffering (FILE_FLAG_NO_BUFFERING requires properly IO buffers
+ which stdio does not guarantee.
+ */
+ setvbuf(f, NULL, _IONBF, 0);
+
+#else
+ struct stat st;
/* stat the file to get size and page count */
if (stat(filename, &st))
{
@@ -279,6 +319,8 @@ int main(int argc, char **argv)
/* Open the file for reading */
f= fopen(filename, "rb");
+#endif
+
if (f == NULL)
{
fprintf(stderr, "Error; %s cannot be opened", filename);
@@ -323,7 +365,7 @@ int main(int argc, char **argv)
}
else if (verbose)
{
- printf("file %s = %llu bytes (%lu pages)...\n", filename, size, pages);
+ printf("file %s = %llu bytes (%lu pages)...\n", filename, size, (ulong)pages);
if (do_one_page)
printf("InnoChecksum; checking page %lu\n", do_page);
else
@@ -348,9 +390,12 @@ int main(int argc, char **argv)
goto error;
}
- offset= (off_t)start_page * (off_t)physical_page_size;
-
+ offset= (longlong)start_page * (longlong)physical_page_size;
+#ifdef _WIN32
+ if (_lseeki64(fd, offset, SEEK_SET) != offset)
+#else
if (lseek(fd, offset, SEEK_SET) != offset)
+#endif
{
perror("Error; Unable to seek to necessary offset");
goto error;
diff --git a/extra/yassl/README b/extra/yassl/README
index b5eb88824fb..a3d4f60f561 100644
--- a/extra/yassl/README
+++ b/extra/yassl/README
@@ -12,6 +12,24 @@ before calling SSL_new();
*** end Note ***
+yaSSL Release notes, version 2.4.2 (9/22/2016)
+ This release of yaSSL fixes a medium security vulnerability. A fix for
+ potential AES side channel leaks is included that a local user monitoring
+ the same CPU core cache could exploit. VM users, hyper-threading users,
+ and users where potential attackers have access to the CPU cache will need
+ to update if they utilize AES.
+
+ DSA padding fixes for unusual sizes is included as well. Users with DSA
+ certficiates should update.
+
+yaSSL Release notes, version 2.4.0 (5/20/2016)
+ This release of yaSSL fixes the OpenSSL compatibility function
+ SSL_CTX_load_verify_locations() when using the path directory to allow
+ unlimited path sizes. Minor Windows build fixes are included.
+ No high level security fixes in this version but we always recommend
+ updating.
+
+
yaSSL Release notes, version 2.3.9b (2/03/2016)
This release of yaSSL fixes the OpenSSL compatibility function
X509_NAME_get_index_by_NID() to use the actual index of the common name
diff --git a/extra/yassl/certs/dsa-cert.pem b/extra/yassl/certs/dsa-cert.pem
index 10d533edc88..10794cbee73 100644
--- a/extra/yassl/certs/dsa-cert.pem
+++ b/extra/yassl/certs/dsa-cert.pem
@@ -1,22 +1,22 @@
-----BEGIN CERTIFICATE-----
-MIIDqzCCA2ugAwIBAgIJAMGqrgDU6DyhMAkGByqGSM44BAMwgY4xCzAJBgNVBAYT
+MIIDrzCCA2+gAwIBAgIJAK1zRM7YFcNjMAkGByqGSM44BAMwgZAxCzAJBgNVBAYT
AlVTMQ8wDQYDVQQIDAZPcmVnb24xETAPBgNVBAcMCFBvcnRsYW5kMRAwDgYDVQQK
-DAd3b2xmU1NMMRAwDgYDVQQLDAd0ZXN0aW5nMRYwFAYDVQQDDA13d3cueWFzc2wu
-Y29tMR8wHQYJKoZIhvcNAQkBFhBpbmZvQHdvbGZzc2wuY29tMB4XDTEzMDQyMjIw
-MDk0NFoXDTE2MDExNzIwMDk0NFowgY4xCzAJBgNVBAYTAlVTMQ8wDQYDVQQIDAZP
-cmVnb24xETAPBgNVBAcMCFBvcnRsYW5kMRAwDgYDVQQKDAd3b2xmU1NMMRAwDgYD
-VQQLDAd0ZXN0aW5nMRYwFAYDVQQDDA13d3cueWFzc2wuY29tMR8wHQYJKoZIhvcN
-AQkBFhBpbmZvQHdvbGZzc2wuY29tMIIBuDCCASwGByqGSM44BAEwggEfAoGBAL1R
-7koy4IrH6sbh6nDEUUPPKgfhxxLCWCVexF2+qzANEr+hC9M002haJXFOfeS9DyoO
-WFbL0qMZOuqv+22CaHnoUWl7q3PjJOAI3JH0P54ZyUPuU1909RzgTdIDp5+ikbr7
-KYjnltL73FQVMbjTZQKthIpPn3MjYcF+4jp2W2zFAhUAkcntYND6MGf+eYzIJDN2
-L7SonHUCgYEAklpxErfqznIZjVvqqHFaq+mgAL5J8QrKVmdhYZh/Y8z4jCjoCA8o
-TDoFKxf7s2ZzgaPKvglaEKiYqLqic9qY78DYJswzQMLFvjsF4sFZ+pYCBdWPQI4N
-PgxCiznK6Ce+JH9ikSBvMvG+tevjr2UpawDIHX3+AWYaZBZwKADAaboDgYUAAoGB
-AJ3LY89yHyvQ/TsQ6zlYbovjbk/ogndsMqPdNUvL4RuPTgJP/caaDDa0XJ7ak6A7
-TJ+QheLNwOXoZPYJC4EGFSDAXpYniGhbWIrVTCGe6lmZDfnx40WXS0kk3m/DHaC0
-3ElLAiybxVGxyqoUfbT3Zv1JwftWMuiqHH5uADhdXuXVo1AwTjAdBgNVHQ4EFgQU
-IJjk416o4v8qpH9LBtXlR9v8gccwHwYDVR0jBBgwFoAUIJjk416o4v8qpH9LBtXl
-R9v8gccwDAYDVR0TBAUwAwEB/zAJBgcqhkjOOAQDAy8AMCwCFCjGKIdOSV12LcTu
-k08owGM6YkO1AhQe+K173VuaO/OsDNsxZlKpyH8+1g==
+DAd3b2xmU1NMMRAwDgYDVQQLDAd0ZXN0aW5nMRgwFgYDVQQDDA93d3cud29sZnNz
+bC5jb20xHzAdBgkqhkiG9w0BCQEWEGluZm9Ad29sZnNzbC5jb20wHhcNMTYwOTIy
+MjEyMzA0WhcNMjIwMzE1MjEyMzA0WjCBkDELMAkGA1UEBhMCVVMxDzANBgNVBAgM
+Bk9yZWdvbjERMA8GA1UEBwwIUG9ydGxhbmQxEDAOBgNVBAoMB3dvbGZTU0wxEDAO
+BgNVBAsMB3Rlc3RpbmcxGDAWBgNVBAMMD3d3dy53b2xmc3NsLmNvbTEfMB0GCSqG
+SIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTCCAbgwggEsBgcqhkjOOAQBMIIBHwKB
+gQC9Ue5KMuCKx+rG4epwxFFDzyoH4ccSwlglXsRdvqswDRK/oQvTNNNoWiVxTn3k
+vQ8qDlhWy9KjGTrqr/ttgmh56FFpe6tz4yTgCNyR9D+eGclD7lNfdPUc4E3SA6ef
+opG6+ymI55bS+9xUFTG402UCrYSKT59zI2HBfuI6dltsxQIVAJHJ7WDQ+jBn/nmM
+yCQzdi+0qJx1AoGBAJJacRK36s5yGY1b6qhxWqvpoAC+SfEKylZnYWGYf2PM+Iwo
+6AgPKEw6BSsX+7Nmc4Gjyr4JWhComKi6onPamO/A2CbMM0DCxb47BeLBWfqWAgXV
+j0CODT4MQos5yugnviR/YpEgbzLxvrXr469lKWsAyB19/gFmGmQWcCgAwGm6A4GF
+AAKBgQCdy2PPch8r0P07EOs5WG6L425P6IJ3bDKj3TVLy+Ebj04CT/3Gmgw2tFye
+2pOgO0yfkIXizcDl6GT2CQuBBhUgwF6WJ4hoW1iK1UwhnupZmQ358eNFl0tJJN5v
+wx2gtNxJSwIsm8VRscqqFH2092b9ScH7VjLoqhx+bgA4XV7l1aNQME4wHQYDVR0O
+BBYEFCCY5ONeqOL/KqR/SwbV5Ufb/IHHMB8GA1UdIwQYMBaAFCCY5ONeqOL/KqR/
+SwbV5Ufb/IHHMAwGA1UdEwQFMAMBAf8wCQYHKoZIzjgEAwMvADAsAhQRYSCVN/Ge
+agV3mffU3qNZ92fI0QIUPH7Jp+iASI7U1ocaYDc10qXGaGY=
-----END CERTIFICATE-----
diff --git a/extra/yassl/include/openssl/ssl.h b/extra/yassl/include/openssl/ssl.h
index c95eb1ed887..9ec99b46c1f 100644
--- a/extra/yassl/include/openssl/ssl.h
+++ b/extra/yassl/include/openssl/ssl.h
@@ -34,7 +34,7 @@
#include "rsa.h"
-#define YASSL_VERSION "2.3.9b"
+#define YASSL_VERSION "2.4.2"
#if defined(__cplusplus)
diff --git a/extra/yassl/src/ssl.cpp b/extra/yassl/src/ssl.cpp
index 57542f174c9..7069140dcda 100644
--- a/extra/yassl/src/ssl.cpp
+++ b/extra/yassl/src/ssl.cpp
@@ -162,7 +162,7 @@ int read_file(SSL_CTX* ctx, const char* file, int format, CertType type)
TaoCrypt::DSA_PrivateKey dsaKey;
dsaKey.Initialize(dsaSource);
- if (rsaSource.GetError().What()) {
+ if (dsaSource.GetError().What()) {
// neither worked
ret = SSL_FAILURE;
}
@@ -785,40 +785,67 @@ int SSL_CTX_load_verify_locations(SSL_CTX* ctx, const char* file,
WIN32_FIND_DATA FindFileData;
HANDLE hFind;
- char name[MAX_PATH + 1]; // directory specification
- strncpy(name, path, MAX_PATH - 3);
- strncat(name, "\\*", 3);
+ const int DELIMITER_SZ = 2;
+ const int DELIMITER_STAR_SZ = 3;
+ int pathSz = (int)strlen(path);
+ int nameSz = pathSz + DELIMITER_STAR_SZ + 1; // plus 1 for terminator
+ char* name = NEW_YS char[nameSz]; // directory specification
+ memset(name, 0, nameSz);
+ strncpy(name, path, nameSz - DELIMITER_STAR_SZ - 1);
+ strncat(name, "\\*", DELIMITER_STAR_SZ);
hFind = FindFirstFile(name, &FindFileData);
- if (hFind == INVALID_HANDLE_VALUE) return SSL_BAD_PATH;
+ if (hFind == INVALID_HANDLE_VALUE) {
+ ysArrayDelete(name);
+ return SSL_BAD_PATH;
+ }
do {
- if (FindFileData.dwFileAttributes != FILE_ATTRIBUTE_DIRECTORY) {
- strncpy(name, path, MAX_PATH - 2 - HALF_PATH);
- strncat(name, "\\", 2);
- strncat(name, FindFileData.cFileName, HALF_PATH);
+ if (!(FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ int curSz = (int)strlen(FindFileData.cFileName);
+ if (pathSz + curSz + DELIMITER_SZ + 1 > nameSz) {
+ ysArrayDelete(name);
+ // plus 1 for terminator
+ nameSz = pathSz + curSz + DELIMITER_SZ + 1;
+ name = NEW_YS char[nameSz];
+ }
+ memset(name, 0, nameSz);
+ strncpy(name, path, nameSz - curSz - DELIMITER_SZ - 1);
+ strncat(name, "\\", DELIMITER_SZ);
+ strncat(name, FindFileData.cFileName,
+ nameSz - pathSz - DELIMITER_SZ - 1);
ret = read_file(ctx, name, SSL_FILETYPE_PEM, CA);
}
} while (ret == SSL_SUCCESS && FindNextFile(hFind, &FindFileData));
+ ysArrayDelete(name);
FindClose(hFind);
#else // _WIN32
-
- const int MAX_PATH = 260;
-
DIR* dir = opendir(path);
if (!dir) return SSL_BAD_PATH;
struct dirent* entry;
struct stat buf;
- char name[MAX_PATH + 1];
+ const int DELIMITER_SZ = 1;
+ int pathSz = (int)strlen(path);
+ int nameSz = pathSz + DELIMITER_SZ + 1; //plus 1 for null terminator
+ char* name = NEW_YS char[nameSz]; // directory specification
while (ret == SSL_SUCCESS && (entry = readdir(dir))) {
- strncpy(name, path, MAX_PATH - 1 - HALF_PATH);
- strncat(name, "/", 1);
- strncat(name, entry->d_name, HALF_PATH);
+ int curSz = (int)strlen(entry->d_name);
+ if (pathSz + curSz + DELIMITER_SZ + 1 > nameSz) {
+ ysArrayDelete(name);
+ nameSz = pathSz + DELIMITER_SZ + curSz + 1;
+ name = NEW_YS char[nameSz];
+ }
+ memset(name, 0, nameSz);
+ strncpy(name, path, nameSz - curSz - 1);
+ strncat(name, "/", DELIMITER_SZ);
+ strncat(name, entry->d_name, nameSz - pathSz - DELIMITER_SZ - 1);
+
if (stat(name, &buf) < 0) {
+ ysArrayDelete(name);
closedir(dir);
return SSL_BAD_STAT;
}
@@ -827,6 +854,7 @@ int SSL_CTX_load_verify_locations(SSL_CTX* ctx, const char* file,
ret = read_file(ctx, name, SSL_FILETYPE_PEM, CA);
}
+ ysArrayDelete(name);
closedir(dir);
#endif
diff --git a/extra/yassl/taocrypt/include/aes.hpp b/extra/yassl/taocrypt/include/aes.hpp
index 01763033156..bccf6e73fc7 100644
--- a/extra/yassl/taocrypt/include/aes.hpp
+++ b/extra/yassl/taocrypt/include/aes.hpp
@@ -60,6 +60,7 @@ private:
static const word32 Te[5][256];
static const word32 Td[5][256];
+ static const byte CTd4[256];
static const word32* Te0;
static const word32* Te1;
@@ -80,11 +81,68 @@ private:
void ProcessAndXorBlock(const byte*, const byte*, byte*) const;
+ word32 PreFetchTe() const;
+ word32 PreFetchTd() const;
+ word32 PreFetchCTd4() const;
+
AES(const AES&); // hide copy
AES& operator=(const AES&); // and assign
};
+#if defined(__x86_64__) || defined(_M_X64) || \
+ (defined(__ILP32__) && (__ILP32__ >= 1))
+ #define TC_CACHE_LINE_SZ 64
+#else
+ /* default cache line size */
+ #define TC_CACHE_LINE_SZ 32
+#endif
+
+inline word32 AES::PreFetchTe() const
+{
+ word32 x = 0;
+
+ /* 4 tables of 256 entries */
+ for (int i = 0; i < 4; i++) {
+ /* each entry is 4 bytes */
+ for (int j = 0; j < 256; j += TC_CACHE_LINE_SZ/4) {
+ x &= Te[i][j];
+ }
+ }
+
+ return x;
+}
+
+
+inline word32 AES::PreFetchTd() const
+{
+ word32 x = 0;
+
+ /* 4 tables of 256 entries */
+ for (int i = 0; i < 4; i++) {
+ /* each entry is 4 bytes */
+ for (int j = 0; j < 256; j += TC_CACHE_LINE_SZ/4) {
+ x &= Td[i][j];
+ }
+ }
+
+ return x;
+}
+
+
+inline word32 AES::PreFetchCTd4() const
+{
+ word32 x = 0;
+ int i;
+
+ for (i = 0; i < 256; i += TC_CACHE_LINE_SZ) {
+ x &= CTd4[i];
+ }
+
+ return x;
+}
+
+
typedef BlockCipher<ENCRYPTION, AES, ECB> AES_ECB_Encryption;
typedef BlockCipher<DECRYPTION, AES, ECB> AES_ECB_Decryption;
diff --git a/extra/yassl/taocrypt/include/integer.hpp b/extra/yassl/taocrypt/include/integer.hpp
index 75a3ee3d3df..05fe189fd58 100644
--- a/extra/yassl/taocrypt/include/integer.hpp
+++ b/extra/yassl/taocrypt/include/integer.hpp
@@ -119,6 +119,9 @@ namespace TaoCrypt {
+#ifdef _WIN32
+ #undef max // avoid name clash
+#endif
// general MAX
template<typename T> inline
const T& max(const T& a, const T& b)
diff --git a/extra/yassl/taocrypt/src/aes.cpp b/extra/yassl/taocrypt/src/aes.cpp
index e47765b87d0..2321c72554c 100644
--- a/extra/yassl/taocrypt/src/aes.cpp
+++ b/extra/yassl/taocrypt/src/aes.cpp
@@ -109,10 +109,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
{
temp = rk[3];
rk[4] = rk[0] ^
- (Te4[GETBYTE(temp, 2)] & 0xff000000) ^
- (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^
- (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^
- (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^
+ (Te2[GETBYTE(temp, 2)] & 0xff000000) ^
+ (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^
+ (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^
+ (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^
rcon_[i];
rk[5] = rk[1] ^ rk[4];
rk[6] = rk[2] ^ rk[5];
@@ -128,10 +128,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
{
temp = rk[ 5];
rk[ 6] = rk[ 0] ^
- (Te4[GETBYTE(temp, 2)] & 0xff000000) ^
- (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^
- (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^
- (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^
+ (Te2[GETBYTE(temp, 2)] & 0xff000000) ^
+ (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^
+ (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^
+ (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^
rcon_[i];
rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7];
@@ -149,10 +149,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
{
temp = rk[ 7];
rk[ 8] = rk[ 0] ^
- (Te4[GETBYTE(temp, 2)] & 0xff000000) ^
- (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^
- (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^
- (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^
+ (Te2[GETBYTE(temp, 2)] & 0xff000000) ^
+ (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^
+ (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^
+ (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^
rcon_[i];
rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9];
@@ -161,10 +161,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
break;
temp = rk[11];
rk[12] = rk[ 4] ^
- (Te4[GETBYTE(temp, 3)] & 0xff000000) ^
- (Te4[GETBYTE(temp, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(temp, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(temp, 0)] & 0x000000ff);
+ (Te2[GETBYTE(temp, 3)] & 0xff000000) ^
+ (Te3[GETBYTE(temp, 2)] & 0x00ff0000) ^
+ (Te0[GETBYTE(temp, 1)] & 0x0000ff00) ^
+ (Te1[GETBYTE(temp, 0)] & 0x000000ff);
rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14];
@@ -191,25 +191,25 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
for (i = 1; i < rounds_; i++) {
rk += 4;
rk[0] =
- Td0[Te4[GETBYTE(rk[0], 3)] & 0xff] ^
- Td1[Te4[GETBYTE(rk[0], 2)] & 0xff] ^
- Td2[Te4[GETBYTE(rk[0], 1)] & 0xff] ^
- Td3[Te4[GETBYTE(rk[0], 0)] & 0xff];
+ Td0[Te1[GETBYTE(rk[0], 3)] & 0xff] ^
+ Td1[Te1[GETBYTE(rk[0], 2)] & 0xff] ^
+ Td2[Te1[GETBYTE(rk[0], 1)] & 0xff] ^
+ Td3[Te1[GETBYTE(rk[0], 0)] & 0xff];
rk[1] =
- Td0[Te4[GETBYTE(rk[1], 3)] & 0xff] ^
- Td1[Te4[GETBYTE(rk[1], 2)] & 0xff] ^
- Td2[Te4[GETBYTE(rk[1], 1)] & 0xff] ^
- Td3[Te4[GETBYTE(rk[1], 0)] & 0xff];
+ Td0[Te1[GETBYTE(rk[1], 3)] & 0xff] ^
+ Td1[Te1[GETBYTE(rk[1], 2)] & 0xff] ^
+ Td2[Te1[GETBYTE(rk[1], 1)] & 0xff] ^
+ Td3[Te1[GETBYTE(rk[1], 0)] & 0xff];
rk[2] =
- Td0[Te4[GETBYTE(rk[2], 3)] & 0xff] ^
- Td1[Te4[GETBYTE(rk[2], 2)] & 0xff] ^
- Td2[Te4[GETBYTE(rk[2], 1)] & 0xff] ^
- Td3[Te4[GETBYTE(rk[2], 0)] & 0xff];
+ Td0[Te1[GETBYTE(rk[2], 3)] & 0xff] ^
+ Td1[Te1[GETBYTE(rk[2], 2)] & 0xff] ^
+ Td2[Te1[GETBYTE(rk[2], 1)] & 0xff] ^
+ Td3[Te1[GETBYTE(rk[2], 0)] & 0xff];
rk[3] =
- Td0[Te4[GETBYTE(rk[3], 3)] & 0xff] ^
- Td1[Te4[GETBYTE(rk[3], 2)] & 0xff] ^
- Td2[Te4[GETBYTE(rk[3], 1)] & 0xff] ^
- Td3[Te4[GETBYTE(rk[3], 0)] & 0xff];
+ Td0[Te1[GETBYTE(rk[3], 3)] & 0xff] ^
+ Td1[Te1[GETBYTE(rk[3], 2)] & 0xff] ^
+ Td2[Te1[GETBYTE(rk[3], 1)] & 0xff] ^
+ Td3[Te1[GETBYTE(rk[3], 0)] & 0xff];
}
}
}
@@ -244,6 +244,7 @@ void AES::encrypt(const byte* inBlock, const byte* xorBlock,
s2 ^= rk[2];
s3 ^= rk[3];
+ s0 |= PreFetchTe();
/*
* Nr - 1 full rounds:
*/
@@ -312,28 +313,28 @@ void AES::encrypt(const byte* inBlock, const byte* xorBlock,
*/
s0 =
- (Te4[GETBYTE(t0, 3)] & 0xff000000) ^
- (Te4[GETBYTE(t1, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(t2, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(t3, 0)] & 0x000000ff) ^
+ (Te2[GETBYTE(t0, 3)] & 0xff000000) ^
+ (Te3[GETBYTE(t1, 2)] & 0x00ff0000) ^
+ (Te0[GETBYTE(t2, 1)] & 0x0000ff00) ^
+ (Te1[GETBYTE(t3, 0)] & 0x000000ff) ^
rk[0];
s1 =
- (Te4[GETBYTE(t1, 3)] & 0xff000000) ^
- (Te4[GETBYTE(t2, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(t3, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(t0, 0)] & 0x000000ff) ^
+ (Te2[GETBYTE(t1, 3)] & 0xff000000) ^
+ (Te3[GETBYTE(t2, 2)] & 0x00ff0000) ^
+ (Te0[GETBYTE(t3, 1)] & 0x0000ff00) ^
+ (Te1[GETBYTE(t0, 0)] & 0x000000ff) ^
rk[1];
s2 =
- (Te4[GETBYTE(t2, 3)] & 0xff000000) ^
- (Te4[GETBYTE(t3, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(t0, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(t1, 0)] & 0x000000ff) ^
+ (Te2[GETBYTE(t2, 3)] & 0xff000000) ^
+ (Te3[GETBYTE(t3, 2)] & 0x00ff0000) ^
+ (Te0[GETBYTE(t0, 1)] & 0x0000ff00) ^
+ (Te1[GETBYTE(t1, 0)] & 0x000000ff) ^
rk[2];
s3 =
- (Te4[GETBYTE(t3, 3)] & 0xff000000) ^
- (Te4[GETBYTE(t0, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(t1, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(t2, 0)] & 0x000000ff) ^
+ (Te2[GETBYTE(t3, 3)] & 0xff000000) ^
+ (Te3[GETBYTE(t0, 2)] & 0x00ff0000) ^
+ (Te0[GETBYTE(t1, 1)] & 0x0000ff00) ^
+ (Te1[GETBYTE(t2, 0)] & 0x000000ff) ^
rk[3];
@@ -358,6 +359,8 @@ void AES::decrypt(const byte* inBlock, const byte* xorBlock,
s2 ^= rk[2];
s3 ^= rk[3];
+ s0 |= PreFetchTd();
+
/*
* Nr - 1 full rounds:
*/
@@ -423,29 +426,32 @@ void AES::decrypt(const byte* inBlock, const byte* xorBlock,
* apply last round and
* map cipher state to byte array block:
*/
+
+ t0 |= PreFetchCTd4();
+
s0 =
- (Td4[GETBYTE(t0, 3)] & 0xff000000) ^
- (Td4[GETBYTE(t3, 2)] & 0x00ff0000) ^
- (Td4[GETBYTE(t2, 1)] & 0x0000ff00) ^
- (Td4[GETBYTE(t1, 0)] & 0x000000ff) ^
+ ((word32)CTd4[GETBYTE(t0, 3)] << 24) ^
+ ((word32)CTd4[GETBYTE(t3, 2)] << 16) ^
+ ((word32)CTd4[GETBYTE(t2, 1)] << 8) ^
+ ((word32)CTd4[GETBYTE(t1, 0)]) ^
rk[0];
s1 =
- (Td4[GETBYTE(t1, 3)] & 0xff000000) ^
- (Td4[GETBYTE(t0, 2)] & 0x00ff0000) ^
- (Td4[GETBYTE(t3, 1)] & 0x0000ff00) ^
- (Td4[GETBYTE(t2, 0)] & 0x000000ff) ^
+ ((word32)CTd4[GETBYTE(t1, 3)] << 24) ^
+ ((word32)CTd4[GETBYTE(t0, 2)] << 16) ^
+ ((word32)CTd4[GETBYTE(t3, 1)] << 8) ^
+ ((word32)CTd4[GETBYTE(t2, 0)]) ^
rk[1];
s2 =
- (Td4[GETBYTE(t2, 3)] & 0xff000000) ^
- (Td4[GETBYTE(t1, 2)] & 0x00ff0000) ^
- (Td4[GETBYTE(t0, 1)] & 0x0000ff00) ^
- (Td4[GETBYTE(t3, 0)] & 0x000000ff) ^
+ ((word32)CTd4[GETBYTE(t2, 3)] << 24 ) ^
+ ((word32)CTd4[GETBYTE(t1, 2)] << 16 ) ^
+ ((word32)CTd4[GETBYTE(t0, 1)] << 8 ) ^
+ ((word32)CTd4[GETBYTE(t3, 0)]) ^
rk[2];
s3 =
- (Td4[GETBYTE(t3, 3)] & 0xff000000) ^
- (Td4[GETBYTE(t2, 2)] & 0x00ff0000) ^
- (Td4[GETBYTE(t1, 1)] & 0x0000ff00) ^
- (Td4[GETBYTE(t0, 0)] & 0x000000ff) ^
+ ((word32)CTd4[GETBYTE(t3, 3)] << 24) ^
+ ((word32)CTd4[GETBYTE(t2, 2)] << 16) ^
+ ((word32)CTd4[GETBYTE(t1, 1)] << 8) ^
+ ((word32)CTd4[GETBYTE(t0, 0)]) ^
rk[3];
gpBlock::Put(xorBlock, outBlock)(s0)(s1)(s2)(s3);
@@ -1826,18 +1832,52 @@ const word32 AES::Td[5][256] = {
}
};
+const byte AES::CTd4[256] =
+{
+ 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+ 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+ 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+ 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+ 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+ 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+ 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+ 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+ 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+ 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+ 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+ 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+ 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+ 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+ 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+ 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+ 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+ 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+ 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+ 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+ 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+ 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+ 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+ 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+ 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+ 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+ 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+ 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+ 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+ 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+ 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+ 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
+};
+
const word32* AES::Te0 = AES::Te[0];
const word32* AES::Te1 = AES::Te[1];
const word32* AES::Te2 = AES::Te[2];
const word32* AES::Te3 = AES::Te[3];
-const word32* AES::Te4 = AES::Te[4];
const word32* AES::Td0 = AES::Td[0];
const word32* AES::Td1 = AES::Td[1];
const word32* AES::Td2 = AES::Td[2];
const word32* AES::Td3 = AES::Td[3];
-const word32* AES::Td4 = AES::Td[4];
diff --git a/extra/yassl/taocrypt/src/asn.cpp b/extra/yassl/taocrypt/src/asn.cpp
index 0474e7c21d5..80bcd612d27 100644
--- a/extra/yassl/taocrypt/src/asn.cpp
+++ b/extra/yassl/taocrypt/src/asn.cpp
@@ -1219,17 +1219,17 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz)
}
word32 rLen = GetLength(source);
if (rLen != 20) {
- if (rLen == 21) { // zero at front, eat
+ while (rLen > 20 && source.remaining() > 0) { // zero's at front, eat
source.next();
--rLen;
}
- else if (rLen == 19) { // add zero to front so 20 bytes
+ if (rLen < 20) { // add zero's to front so 20 bytes
+ word32 tmpLen = rLen;
+ while (tmpLen < 20) {
decoded[0] = 0;
decoded++;
+ tmpLen++;
}
- else {
- source.SetError(DSA_SZ_E);
- return 0;
}
}
memcpy(decoded, source.get_buffer() + source.get_index(), rLen);
@@ -1242,17 +1242,17 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz)
}
word32 sLen = GetLength(source);
if (sLen != 20) {
- if (sLen == 21) {
- source.next(); // zero at front, eat
+ while (sLen > 20 && source.remaining() > 0) {
+ source.next(); // zero's at front, eat
--sLen;
}
- else if (sLen == 19) {
- decoded[rLen] = 0; // add zero to front so 20 bytes
+ if (sLen < 20) { // add zero's to front so 20 bytes
+ word32 tmpLen = sLen;
+ while (tmpLen < 20) {
+ decoded[rLen] = 0;
decoded++;
+ tmpLen++;
}
- else {
- source.SetError(DSA_SZ_E);
- return 0;
}
}
memcpy(decoded + rLen, source.get_buffer() + source.get_index(), sLen);
diff --git a/extra/yassl/taocrypt/src/dsa.cpp b/extra/yassl/taocrypt/src/dsa.cpp
index 72221441b2b..fda01881df5 100644
--- a/extra/yassl/taocrypt/src/dsa.cpp
+++ b/extra/yassl/taocrypt/src/dsa.cpp
@@ -172,6 +172,7 @@ word32 DSA_Signer::Sign(const byte* sha_digest, byte* sig,
const Integer& q = key_.GetSubGroupOrder();
const Integer& g = key_.GetSubGroupGenerator();
const Integer& x = key_.GetPrivatePart();
+ byte* tmpPtr = sig; // initial signature output
Integer k(rng, 1, q - 1);
@@ -187,22 +188,23 @@ word32 DSA_Signer::Sign(const byte* sha_digest, byte* sig,
return (word32) -1;
int rSz = r_.ByteCount();
+ int tmpSz = rSz;
- if (rSz == 19) {
- sig[0] = 0;
- sig++;
+ while (tmpSz++ < SHA::DIGEST_SIZE) {
+ *sig++ = 0;
}
r_.Encode(sig, rSz);
+ sig = tmpPtr + SHA::DIGEST_SIZE; // advance sig output to s
int sSz = s_.ByteCount();
+ tmpSz = sSz;
- if (sSz == 19) {
- sig[rSz] = 0;
- sig++;
+ while (tmpSz++ < SHA::DIGEST_SIZE) {
+ *sig++ = 0;
}
- s_.Encode(sig + rSz, sSz);
+ s_.Encode(sig, sSz);
return 40;
}
diff --git a/extra/yassl/taocrypt/src/integer.cpp b/extra/yassl/taocrypt/src/integer.cpp
index fb8d9276bd9..dd8425396ed 100644
--- a/extra/yassl/taocrypt/src/integer.cpp
+++ b/extra/yassl/taocrypt/src/integer.cpp
@@ -193,8 +193,9 @@ DWord() {}
"a" (a), "rm" (b) : "cc");
#elif defined(__mips64)
- __asm__("dmultu %2,%3" : "=d" (r.halfs_.high), "=l" (r.halfs_.low)
- : "r" (a), "r" (b));
+ unsigned __int128 t = (unsigned __int128) a * b;
+ r.halfs_.high = t >> 64;
+ r.halfs_.low = (word) t;
#elif defined(_M_IX86)
// for testing
diff --git a/extra/yassl/taocrypt/test/test.cpp b/extra/yassl/taocrypt/test/test.cpp
index c23d981924d..b07a9eb9f29 100644
--- a/extra/yassl/taocrypt/test/test.cpp
+++ b/extra/yassl/taocrypt/test/test.cpp
@@ -1281,6 +1281,9 @@ int dsa_test()
if (!verifier.Verify(digest, decoded))
return -90;
+ if (!verifier.Verify(digest, signature))
+ return -91;
+
return 0;
}
diff --git a/extra/yassl/testsuite/test.hpp b/extra/yassl/testsuite/test.hpp
index 5374edd0e2a..a65a212cf99 100644
--- a/extra/yassl/testsuite/test.hpp
+++ b/extra/yassl/testsuite/test.hpp
@@ -22,7 +22,6 @@
#define yaSSL_TEST_HPP
#include "runtime.hpp"
-#include "openssl/ssl.h" /* openssl compatibility test */
#include "error.hpp"
#include <stdio.h>
#include <stdlib.h>
@@ -56,6 +55,7 @@
#endif
#define SOCKET_T int
#endif /* _WIN32 */
+#include "openssl/ssl.h" /* openssl compatibility test */
#ifdef _MSC_VER
diff --git a/include/byte_order_generic_x86.h b/include/byte_order_generic_x86.h
index 0a71a17829b..a97dd0f43a3 100644
--- a/include/byte_order_generic_x86.h
+++ b/include/byte_order_generic_x86.h
@@ -27,19 +27,9 @@
((uint32) (uchar) (A)[0])))
#define sint4korr(A) (*((const long *) (A)))
#define uint2korr(A) (*((const uint16 *) (A)))
-
-/*
- Attention: Please, note, uint3korr reads 4 bytes (not 3)!
- It means, that you have to provide enough allocated space.
-*/
-#if defined(HAVE_valgrind) && !defined(_WIN32)
#define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\
(((uint32) ((uchar) (A)[1])) << 8) +\
(((uint32) ((uchar) (A)[2])) << 16))
-#else
-#define uint3korr(A) (long) (*((const unsigned int *) (A)) & 0xFFFFFF)
-#endif
-
#define uint4korr(A) (*((const uint32 *) (A)))
#define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\
(((uint32) ((uchar) (A)[1])) << 8) +\
diff --git a/include/byte_order_generic_x86_64.h b/include/byte_order_generic_x86_64.h
index b6b0c5d8ea5..8c7493965a9 100644
--- a/include/byte_order_generic_x86_64.h
+++ b/include/byte_order_generic_x86_64.h
@@ -27,17 +27,9 @@
((uint32) (uchar) (A)[0])))
#define sint4korr(A) (int32) (*((int32 *) (A)))
#define uint2korr(A) (uint16) (*((uint16 *) (A)))
-/*
- Attention: Please, note, uint3korr reads 4 bytes (not 3)!
- It means, that you have to provide enough allocated space.
-*/
-#if defined(HAVE_valgrind) && !defined(_WIN32)
#define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\
(((uint32) ((uchar) (A)[1])) << 8) +\
(((uint32) ((uchar) (A)[2])) << 16))
-#else
-#define uint3korr(A) (uint32) (*((unsigned int *) (A)) & 0xFFFFFF)
-#endif
#define uint4korr(A) (uint32) (*((uint32 *) (A)))
#define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\
(((uint32) ((uchar) (A)[1])) << 8) +\
diff --git a/include/my_global.h b/include/my_global.h
index 191e08d9218..6222467901e 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -888,8 +888,7 @@ typedef long long my_ptrdiff_t;
and related routines are refactored.
*/
-#define my_offsetof(TYPE, MEMBER) \
- ((size_t)((char *)&(((TYPE *)0x10)->MEMBER) - (char*)0x10))
+#define my_offsetof(TYPE, MEMBER) PTR_BYTE_DIFF(&((TYPE *)0x10)->MEMBER, 0x10)
#define NullS (char *) 0
diff --git a/include/my_sys.h b/include/my_sys.h
index a0b7f4cc554..f6bf57e50a4 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -1,5 +1,5 @@
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
- Copyright (c) 2010, 2013, Monty Program Ab.
+ Copyright (c) 2010, 2016, Monty Program Ab.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -271,7 +271,7 @@ extern my_bool my_use_symdir;
extern ulong my_default_record_cache_size;
extern my_bool my_disable_locking, my_disable_async_io,
my_disable_flush_key_blocks, my_disable_symlinks;
-extern my_bool my_disable_sync;
+extern my_bool my_disable_sync, my_disable_copystat_in_redel;
extern char wild_many,wild_one,wild_prefix;
extern const char *charsets_dir;
extern my_bool timed_mutexes;
diff --git a/libmysql/libmysql.c b/libmysql/libmysql.c
index 446f1da0b0c..3a08ea26b1d 100644
--- a/libmysql/libmysql.c
+++ b/libmysql/libmysql.c
@@ -450,8 +450,9 @@ void read_user_name(char *name)
void read_user_name(char *name)
{
- char *str=getenv("USER"); /* ODBC will send user variable */
- strmake(name,str ? str : "ODBC", USERNAME_LENGTH);
+ DWORD len= USERNAME_LENGTH;
+ if (!GetUserName(name, &len))
+ strmov(name,"UNKNOWN_USER");
}
#endif
diff --git a/mysql-test/extra/binlog_tests/database.test b/mysql-test/extra/binlog_tests/database.test
index 82e8b396357..17f8e069fa3 100644
--- a/mysql-test/extra/binlog_tests/database.test
+++ b/mysql-test/extra/binlog_tests/database.test
@@ -52,7 +52,7 @@ eval SELECT 'hello' INTO OUTFILE 'fake_file.$prefix';
# Use '/' instead of '\' in the error message. On windows platform, dir is
# formed with '\'.
---replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /File exists/Directory not empty/
+--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /93/39/ /17/39/ /247/39/ /File exists/Directory not empty/
--error 1010
DROP DATABASE testing_1;
let $wait_binlog_event= DROP TABLE IF EXIST;
diff --git a/mysql-test/include/index_merge2.inc b/mysql-test/include/index_merge2.inc
index c50a45a9923..03afa49d323 100644
--- a/mysql-test/include/index_merge2.inc
+++ b/mysql-test/include/index_merge2.inc
@@ -341,6 +341,7 @@ while ($1)
alter table t1 add index i2(key2);
alter table t1 add index i3(key3);
update t1 set key2=key1,key3=key1;
+analyze table t1;
# to test the bug, the following must use "sort_union":
--replace_column 9 REF
diff --git a/mysql-test/include/search_pattern_in_file.inc b/mysql-test/include/search_pattern_in_file.inc
index 0d09cdcd36e..84237026ed0 100644
--- a/mysql-test/include/search_pattern_in_file.inc
+++ b/mysql-test/include/search_pattern_in_file.inc
@@ -60,12 +60,12 @@
perl;
use strict;
- my $search_file= $ENV{'SEARCH_FILE'} or die "SEARCH_FILE not set";
- my $search_pattern= $ENV{'SEARCH_PATTERN'} or die "SEARCH_PATTERN not set";
- my $search_range= $ENV{'SEARCH_RANGE'};
+ my $search_file= $ENV{'SEARCH_FILE'} or die "SEARCH_FILE not set";
+ my $search_pattern= $ENV{'SEARCH_PATTERN'} or die "SEARCH_PATTERN not set";
+ my $search_range= $ENV{'SEARCH_RANGE'};
my $file_content;
$search_range= 50000 unless $search_range =~ /-?[0-9]+/;
- open(FILE, "$search_file") or die("Unable to open '$search_file': $!\n");
+ open(FILE, '<', $search_file) or die("Unable to open '$search_file': $!\n");
if ($search_range >= 0) {
read(FILE, $file_content, $search_range, 0);
} else {
@@ -75,7 +75,10 @@ perl;
read(FILE, $file_content, -$search_range, 0);
}
close(FILE);
- if ( not $file_content =~ m{$search_pattern} ) {
- die("# ERROR: The file '$search_file' does not contain the expected pattern $search_pattern\n->$file_content<-\n");
+ $search_file =~ s{^.*?([^/\\]+)$}{$1};
+ if ($file_content =~ m{$search_pattern}) {
+ print "FOUND /$search_pattern/ in $search_file\n"
+ } else {
+ print "NOT FOUND /$search_pattern/ in $search_file\n"
}
EOF
diff --git a/mysql-test/lib/My/CoreDump.pm b/mysql-test/lib/My/CoreDump.pm
index 0e90967ef95..f9f7b3d8d4b 100644
--- a/mysql-test/lib/My/CoreDump.pm
+++ b/mysql-test/lib/My/CoreDump.pm
@@ -261,11 +261,7 @@ sub show {
# On Windows, rely on cdb to be there...
if (IS_WINDOWS)
{
- # Starting cdb is unsafe when used with --parallel > 1 option
- if ( $parallel < 2 )
- {
- _cdb($core_name);
- }
+ _cdb($core_name);
return;
}
diff --git a/mysql-test/lib/My/Platform.pm b/mysql-test/lib/My/Platform.pm
index 1776f1008da..110cf8a20e0 100644
--- a/mysql-test/lib/My/Platform.pm
+++ b/mysql-test/lib/My/Platform.pm
@@ -24,7 +24,7 @@ use File::Path;
use base qw(Exporter);
our @EXPORT= qw(IS_CYGWIN IS_WINDOWS IS_WIN32PERL
native_path posix_path mixed_path
- check_socket_path_length process_alive);
+ check_socket_path_length process_alive open_for_append);
BEGIN {
if ($^O eq "cygwin") {
@@ -161,4 +161,51 @@ sub process_alive {
}
+
+use Symbol qw( gensym );
+
+use if $^O eq 'MSWin32', 'Win32API::File', qw( CloseHandle CreateFile GetOsFHandle OsFHandleOpen OPEN_ALWAYS FILE_APPEND_DATA
+ FILE_SHARE_READ FILE_SHARE_WRITE FILE_SHARE_DELETE );
+use if $^O eq 'MSWin32', 'Win32::API';
+
+use constant WIN32API_FILE_NULL => [];
+
+# Open a file for append
+# On Windows we use CreateFile with FILE_APPEND_DATA
+# to insure that writes are atomic, not interleaved
+# with writes by another processes.
+sub open_for_append
+{
+ my ($file) = @_;
+ my $fh = gensym();
+
+ if (IS_WIN32PERL)
+ {
+ my $handle;
+ if (!($handle = CreateFile(
+ $file,
+ FILE_APPEND_DATA(),
+ FILE_SHARE_READ()|FILE_SHARE_WRITE()|FILE_SHARE_DELETE(),
+ WIN32API_FILE_NULL,
+ OPEN_ALWAYS(),# Create if doesn't exist.
+ 0,
+ WIN32API_FILE_NULL,
+ )))
+ {
+ return undef;
+ }
+
+ if (!OsFHandleOpen($fh, $handle, 'wat'))
+ {
+ CloseHandle($handle);
+ return undef;
+ }
+ return $fh;
+ }
+
+ open($fh,">>",$file) or return undef;
+ return $fh;
+}
+
+
1;
diff --git a/mysql-test/lib/mtr_cases.pm b/mysql-test/lib/mtr_cases.pm
index 124aff92895..5ec7553674c 100644
--- a/mysql-test/lib/mtr_cases.pm
+++ b/mysql-test/lib/mtr_cases.pm
@@ -60,8 +60,6 @@ use My::Test;
use My::Find;
use My::Suite;
-require "mtr_misc.pl";
-
# locate plugin suites, depending on whether it's a build tree or installed
my @plugin_suitedirs;
my $plugin_suitedir_regex;
@@ -1122,7 +1120,7 @@ sub get_tags_from_file($$) {
$file_to_tags{$file}= $tags;
$file_to_master_opts{$file}= $master_opts;
$file_to_slave_opts{$file}= $slave_opts;
- $file_combinations{$file}= [ uniq(@combinations) ];
+ $file_combinations{$file}= [ ::uniq(@combinations) ];
$file_in_overlay{$file} = 1 if $in_overlay;
return @{$tags};
}
diff --git a/mysql-test/lib/mtr_io.pl b/mysql-test/lib/mtr_io.pl
index 8c2803f0427..0de4d9612ac 100644
--- a/mysql-test/lib/mtr_io.pl
+++ b/mysql-test/lib/mtr_io.pl
@@ -21,6 +21,7 @@
use strict;
use Carp;
+use My::Platform;
sub mtr_fromfile ($);
sub mtr_tofile ($@);
@@ -45,10 +46,10 @@ sub mtr_fromfile ($) {
sub mtr_tofile ($@) {
my $file= shift;
-
- open(FILE,">>",$file) or mtr_error("can't open file \"$file\": $!");
- print FILE join("", @_);
- close FILE;
+ my $fh= open_for_append $file;
+ mtr_error("can't open file \"$file\": $!") unless defined($fh);
+ print $fh join("", @_);
+ close $fh;
}
diff --git a/mysql-test/lib/mtr_report.pm b/mysql-test/lib/mtr_report.pm
index 9ab82c454ed..97ace54f0fb 100644
--- a/mysql-test/lib/mtr_report.pm
+++ b/mysql-test/lib/mtr_report.pm
@@ -34,7 +34,6 @@ use mtr_match;
use My::Platform;
use POSIX qw[ _exit ];
use IO::Handle qw[ flush ];
-require "mtr_io.pl";
use mtr_results;
my $tot_real_time= 0;
@@ -92,7 +91,7 @@ sub mtr_report_test_passed ($) {
my $timer_str= "";
if ( $timer and -f "$::opt_vardir/log/timer" )
{
- $timer_str= mtr_fromfile("$::opt_vardir/log/timer");
+ $timer_str= ::mtr_fromfile("$::opt_vardir/log/timer");
$tinfo->{timer}= $timer_str;
resfile_test_info('duration', $timer_str) if $::opt_resfile;
}
diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl
index 752b62fb915..b6263415d9d 100755
--- a/mysql-test/mysql-test-run.pl
+++ b/mysql-test/mysql-test-run.pl
@@ -102,11 +102,11 @@ use mtr_results;
use IO::Socket::INET;
use IO::Select;
-require "lib/mtr_process.pl";
-require "lib/mtr_io.pl";
-require "lib/mtr_gcov.pl";
-require "lib/mtr_gprof.pl";
-require "lib/mtr_misc.pl";
+require "mtr_process.pl";
+require "mtr_io.pl";
+require "mtr_gcov.pl";
+require "mtr_gprof.pl";
+require "mtr_misc.pl";
$SIG{INT}= sub { mtr_error("Got ^C signal"); };
$SIG{HUP}= sub { mtr_error("Hangup detected on controlling terminal"); };
diff --git a/mysql-test/r/alter_table.result b/mysql-test/r/alter_table.result
index e572fdb197c..2e371ac6ae6 100644
--- a/mysql-test/r/alter_table.result
+++ b/mysql-test/r/alter_table.result
@@ -2021,3 +2021,58 @@ ALTER TABLE t1 ADD PRIMARY KEY IF NOT EXISTS event_id (event_id,market_id);
Warnings:
Note 1061 Multiple primary key defined
DROP TABLE t1;
+#
+# MDEV-11126 Crash while altering persistent virtual column
+#
+CREATE TABLE `tab1` (
+`id` bigint(20) NOT NULL AUTO_INCREMENT,
+`field2` set('option1','option2','option3','option4') NOT NULL,
+`field3` set('option1','option2','option3','option4','option5') NOT NULL,
+`field4` set('option1','option2','option3','option4') NOT NULL,
+`field5` varchar(32) NOT NULL,
+`field6` varchar(32) NOT NULL,
+`field7` varchar(32) NOT NULL,
+`field8` varchar(32) NOT NULL,
+`field9` int(11) NOT NULL DEFAULT '1',
+`field10` varchar(16) NOT NULL,
+`field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1',
+`v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT,
+PRIMARY KEY (`id`)
+) DEFAULT CHARSET=latin1;
+ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128);
+SHOW CREATE TABLE `tab1`;
+Table Create Table
+tab1 CREATE TABLE `tab1` (
+ `id` bigint(20) NOT NULL AUTO_INCREMENT,
+ `field2` set('option1','option2','option3','option4') NOT NULL,
+ `field3` set('option1','option2','option3','option4','option5') NOT NULL,
+ `field4` set('option1','option2','option3','option4') NOT NULL,
+ `field5` varchar(32) NOT NULL,
+ `field6` varchar(32) NOT NULL,
+ `field7` varchar(32) NOT NULL,
+ `field8` varchar(32) NOT NULL,
+ `field9` int(11) NOT NULL DEFAULT '1',
+ `field10` varchar(16) NOT NULL,
+ `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1',
+ `v_col` varchar(128) DEFAULT NULL,
+ PRIMARY KEY (`id`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT;
+SHOW CREATE TABLE `tab1`;
+Table Create Table
+tab1 CREATE TABLE `tab1` (
+ `id` bigint(20) NOT NULL AUTO_INCREMENT,
+ `field2` set('option1','option2','option3','option4') NOT NULL,
+ `field3` set('option1','option2','option3','option4','option5') NOT NULL,
+ `field4` set('option1','option2','option3','option4') NOT NULL,
+ `field5` varchar(32) NOT NULL,
+ `field6` varchar(32) NOT NULL,
+ `field7` varchar(32) NOT NULL,
+ `field8` varchar(32) NOT NULL,
+ `field9` int(11) NOT NULL DEFAULT '1',
+ `field10` varchar(16) NOT NULL,
+ `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1',
+ `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT,
+ PRIMARY KEY (`id`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+DROP TABLE `tab1`;
diff --git a/mysql-test/r/contributors.result b/mysql-test/r/contributors.result
index 918ceaa496f..f3f5e227d3a 100644
--- a/mysql-test/r/contributors.result
+++ b/mysql-test/r/contributors.result
@@ -9,6 +9,7 @@ Acronis http://www.acronis.com Silver Sponsor of the MariaDB Foundation
Auttomattic https://automattic.com Bronze Sponsor of the MariaDB Foundation
Verkkokauppa.com https://virtuozzo.com Bronze Sponsor of the MariaDB Foundation
Virtuozzo https://virtuozzo.com/ Bronze Sponsor of the MariaDB Foundation
+Tencent Game DBA http://tencentdba.com/about/ Bronze Sponsor of the MariaDB Foundation
Google USA Sponsoring encryption, parallel replication and GTID
Facebook USA Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc
Ronald Bradford Brisbane, Australia EFF contribution for UC2006 Auction
diff --git a/mysql-test/r/create_or_replace.result b/mysql-test/r/create_or_replace.result
index 3a894e9fcb1..a43dc2eaca4 100644
--- a/mysql-test/r/create_or_replace.result
+++ b/mysql-test/r/create_or_replace.result
@@ -442,3 +442,14 @@ KILL QUERY con_id;
ERROR 70100: Query execution was interrupted
drop table t1;
DROP TABLE t2;
+#
+# MDEV-10824 - Crash in CREATE OR REPLACE TABLE t1 AS SELECT spfunc()
+#
+CREATE TABLE t1(a INT);
+CREATE FUNCTION f1() RETURNS VARCHAR(16383) RETURN 'test';
+CREATE OR REPLACE TABLE t1 AS SELECT f1();
+LOCK TABLE t1 WRITE;
+CREATE OR REPLACE TABLE t1 AS SELECT f1();
+UNLOCK TABLES;
+DROP FUNCTION f1;
+DROP TABLE t1;
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result
index 0ec89a50c0f..3269e6c900e 100644
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@@ -1658,6 +1658,9 @@ CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061))
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061))
1
+select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
+c
+FFFF0000
#
# End of 5.5 tests
#
diff --git a/mysql-test/r/drop.result b/mysql-test/r/drop.result
index c23ffbe327b..c25ae9e3055 100644
--- a/mysql-test/r/drop.result
+++ b/mysql-test/r/drop.result
@@ -209,3 +209,9 @@ INSERT INTO table1 VALUES (1);
ERROR 42S02: Unknown table 't.notable'
DROP TABLE table1,table2;
# End BUG#34750
+#
+# MDEV-11105 Table named 'db' has weird side effect.
+#
+CREATE DATABASE mysqltest;
+CREATE TABLE mysqltest.db(id INT);
+DROP DATABASE mysqltest;
diff --git a/mysql-test/r/group_min_max_innodb.result b/mysql-test/r/group_min_max_innodb.result
index 77c74fbc041..2803107b97e 100644
--- a/mysql-test/r/group_min_max_innodb.result
+++ b/mysql-test/r/group_min_max_innodb.result
@@ -286,3 +286,19 @@ F 28 28
F 29 29
F 30 30
DROP TABLE t0,t1,t2;
+#
+# MDEV-MariaDB daemon leaks memory with specific query
+#
+CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
+`language_id` int(11) unsigned NOT NULL DEFAULT '1'
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
+`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
+SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
+translation_resources serialized_c
+NULL cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+NULL bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+drop table t1,t2;
diff --git a/mysql-test/r/index_merge_innodb.result b/mysql-test/r/index_merge_innodb.result
index 5202c79f3c7..5bf56e213ab 100644
--- a/mysql-test/r/index_merge_innodb.result
+++ b/mysql-test/r/index_merge_innodb.result
@@ -311,6 +311,9 @@ set @d=@d*2;
alter table t1 add index i2(key2);
alter table t1 add index i3(key3);
update t1 set key2=key1,key3=key1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
diff --git a/mysql-test/r/index_merge_myisam.result b/mysql-test/r/index_merge_myisam.result
index fcd5eebefa4..c63ed132662 100644
--- a/mysql-test/r/index_merge_myisam.result
+++ b/mysql-test/r/index_merge_myisam.result
@@ -1146,6 +1146,9 @@ set @d=@d*2;
alter table t1 add index i2(key2);
alter table t1 add index i3(key3);
update t1 set key2=key1,key3=key1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
diff --git a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result
index 9d61850fae6..852b520d1fc 100644
--- a/mysql-test/r/information_schema.result
+++ b/mysql-test/r/information_schema.result
@@ -1004,19 +1004,19 @@ show grants;
Grants for user3@localhost
GRANT USAGE ON *.* TO 'user3'@'localhost'
GRANT SELECT ON `mysqltest`.* TO 'user3'@'localhost'
-select * from information_schema.column_privileges where grantee like '%user%'
+select * from information_schema.column_privileges where grantee like '\'user%'
order by grantee;
GRANTEE TABLE_CATALOG TABLE_SCHEMA TABLE_NAME COLUMN_NAME PRIVILEGE_TYPE IS_GRANTABLE
'user1'@'localhost' def mysqltest t1 f1 SELECT NO
-select * from information_schema.table_privileges where grantee like '%user%'
+select * from information_schema.table_privileges where grantee like '\'user%'
order by grantee;
GRANTEE TABLE_CATALOG TABLE_SCHEMA TABLE_NAME PRIVILEGE_TYPE IS_GRANTABLE
'user2'@'localhost' def mysqltest t2 SELECT NO
-select * from information_schema.schema_privileges where grantee like '%user%'
+select * from information_schema.schema_privileges where grantee like '\'user%'
order by grantee;
GRANTEE TABLE_CATALOG TABLE_SCHEMA PRIVILEGE_TYPE IS_GRANTABLE
'user3'@'localhost' def mysqltest SELECT NO
-select * from information_schema.user_privileges where grantee like '%user%'
+select * from information_schema.user_privileges where grantee like '\'user%'
order by grantee;
GRANTEE TABLE_CATALOG PRIVILEGE_TYPE IS_GRANTABLE
'user1'@'localhost' def USAGE NO
diff --git a/mysql-test/r/lowercase_fs_on.result b/mysql-test/r/lowercase_fs_on.result
index a090f46cfbf..b844b3f77dd 100644
--- a/mysql-test/r/lowercase_fs_on.result
+++ b/mysql-test/r/lowercase_fs_on.result
@@ -1,3 +1,4 @@
#
# Bug#20198490 : LOWER_CASE_TABLE_NAMES=0 ON WINDOWS LEADS TO PROBLEMS
#
+FOUND /\[ERROR\] The server option \'lower_case_table_names\' is configured to use case sensitive table names/ in my_restart.err
diff --git a/mysql-test/r/merge.result b/mysql-test/r/merge.result
index 89aaf48219e..31edbc0fdce 100644
--- a/mysql-test/r/merge.result
+++ b/mysql-test/r/merge.result
@@ -3832,6 +3832,23 @@ test.m1 repair error Corrupt
# Clean-up.
drop tables m1, t1, t4;
drop view t3;
+#
+# MDEV-10424 - Assertion `ticket == __null' failed in
+# MDL_request::set_type
+#
+CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
+CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
+PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
+EXECUTE stmt;
+Table Op Msg_type Msg_text
+test.tmerge analyze note The storage engine for the table doesn't support analyze
+test.t1 analyze status Table is already up to date
+EXECUTE stmt;
+Table Op Msg_type Msg_text
+test.tmerge analyze note The storage engine for the table doesn't support analyze
+test.t1 analyze status Table is already up to date
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1, tmerge;
End of 5.5 tests
#
# Additional coverage for refactoring which is made as part
diff --git a/mysql-test/r/mysql.result b/mysql-test/r/mysql.result
index cb705d285fe..dd0129df0d9 100644
--- a/mysql-test/r/mysql.result
+++ b/mysql-test/r/mysql.result
@@ -512,6 +512,14 @@ DROP DATABASE connected_db;
create database `aa``bb````cc`;
DATABASE()
aa`bb``cc
+DATABASE()
+test
+DATABASE()
+aa`bb``cc
+DATABASE()
+test
+DATABASE()
+aa`bb``cc
drop database `aa``bb````cc`;
a
>>\ndelimiter\n<<
diff --git a/mysql-test/r/mysql_not_windows.result b/mysql-test/r/mysql_not_windows.result
index d5670a1a9ca..1df62d9a12d 100644
--- a/mysql-test/r/mysql_not_windows.result
+++ b/mysql-test/r/mysql_not_windows.result
@@ -3,3 +3,9 @@ a
1
End of tests
+1
+1
+2
+2
+X
+3
diff --git a/mysql-test/r/mysqldump-nl.result b/mysql-test/r/mysqldump-nl.result
new file mode 100644
index 00000000000..6de439bdf3c
--- /dev/null
+++ b/mysql-test/r/mysqldump-nl.result
@@ -0,0 +1,126 @@
+create database `mysqltest1
+1tsetlqsym`;
+use `mysqltest1
+1tsetlqsym`;
+create table `t1
+1t` (`foobar
+raboof` int);
+create view `v1
+1v` as select * from `t1
+1t`;
+create procedure sp() select * from `v1
+1v`;
+flush tables;
+use test;
+
+--
+-- Current Database: `mysqltest1
+-- 1tsetlqsym`
+--
+
+/*!40000 DROP DATABASE IF EXISTS `mysqltest1
+1tsetlqsym`*/;
+
+CREATE DATABASE /*!32312 IF NOT EXISTS*/ `mysqltest1
+1tsetlqsym` /*!40100 DEFAULT CHARACTER SET latin1 */;
+
+USE `mysqltest1
+1tsetlqsym`;
+
+--
+-- Table structure for table `t1
+-- 1t`
+--
+
+/*!40101 SET @saved_cs_client = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TABLE `t1
+1t` (
+ `foobar
+raboof` int(11) DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+/*!40101 SET character_set_client = @saved_cs_client */;
+
+--
+-- Dumping data for table `t1
+-- 1t`
+--
+
+--
+-- Temporary table structure for view `v1
+-- 1v`
+--
+
+SET @saved_cs_client = @@character_set_client;
+SET character_set_client = utf8;
+/*!50001 CREATE TABLE `v1
+1v` (
+ `foobar
+raboof` tinyint NOT NULL
+) ENGINE=MyISAM */;
+SET character_set_client = @saved_cs_client;
+
+--
+-- Dumping routines for database 'mysqltest1
+-- 1tsetlqsym'
+--
+/*!50003 SET @saved_cs_client = @@character_set_client */ ;
+/*!50003 SET @saved_cs_results = @@character_set_results */ ;
+/*!50003 SET @saved_col_connection = @@collation_connection */ ;
+/*!50003 SET character_set_client = latin1 */ ;
+/*!50003 SET character_set_results = latin1 */ ;
+/*!50003 SET collation_connection = latin1_swedish_ci */ ;
+/*!50003 SET @saved_sql_mode = @@sql_mode */ ;
+/*!50003 SET sql_mode = '' */ ;
+DELIMITER ;;
+CREATE DEFINER=`root`@`localhost` PROCEDURE `sp`()
+select * from `v1
+1v` ;;
+DELIMITER ;
+/*!50003 SET sql_mode = @saved_sql_mode */ ;
+/*!50003 SET character_set_client = @saved_cs_client */ ;
+/*!50003 SET character_set_results = @saved_cs_results */ ;
+/*!50003 SET collation_connection = @saved_col_connection */ ;
+
+--
+-- Current Database: `mysqltest1
+-- 1tsetlqsym`
+--
+
+USE `mysqltest1
+1tsetlqsym`;
+
+--
+-- Final view structure for view `v1
+-- 1v`
+--
+
+/*!50001 DROP TABLE IF EXISTS `v1
+1v`*/;
+/*!50001 SET @saved_cs_client = @@character_set_client */;
+/*!50001 SET @saved_cs_results = @@character_set_results */;
+/*!50001 SET @saved_col_connection = @@collation_connection */;
+/*!50001 SET character_set_client = latin1 */;
+/*!50001 SET character_set_results = latin1 */;
+/*!50001 SET collation_connection = latin1_swedish_ci */;
+/*!50001 CREATE ALGORITHM=UNDEFINED */
+/*!50013 DEFINER=`root`@`localhost` SQL SECURITY DEFINER */
+/*!50001 VIEW `v1
+1v` AS select `t1
+1t`.`foobar
+raboof` AS `foobar
+raboof` from `t1
+1t` */;
+/*!50001 SET character_set_client = @saved_cs_client */;
+/*!50001 SET character_set_results = @saved_cs_results */;
+/*!50001 SET collation_connection = @saved_col_connection */;
+show tables from `mysqltest1
+1tsetlqsym`;
+Tables_in_mysqltest1
+1tsetlqsym
+t1
+1t
+v1
+1v
+drop database `mysqltest1
+1tsetlqsym`;
diff --git a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result
index b6de51c8b03..cb3c28f42cd 100644
--- a/mysql-test/r/mysqldump.result
+++ b/mysql-test/r/mysqldump.result
@@ -5236,9 +5236,6 @@ SET @@global.log_output="TABLE";
SET @@global.general_log='OFF';
SET @@global.slow_query_log='OFF';
DROP DATABASE mysql;
-Warnings:
-Error 1146 Table 'mysql.proc' doesn't exist
-Error 1146 Table 'mysql.event' doesn't exist
SHOW CREATE TABLE mysql.general_log;
Table Create Table
general_log CREATE TABLE `general_log` (
diff --git a/mysql-test/r/mysqltest.result b/mysql-test/r/mysqltest.result
index e258b1d156f..fa054d457f9 100644
--- a/mysql-test/r/mysqltest.result
+++ b/mysql-test/r/mysqltest.result
@@ -269,12 +269,6 @@ source database
echo message echo message
mysqltest: At line 1: Missing argument in exec
-1
-1
-2
-2
-X
-3
MySQL
"MySQL"
MySQL: The
diff --git a/mysql-test/r/named_pipe.result b/mysql-test/r/named_pipe.result
index ddd48f0ba91..43fb44beece 100644
--- a/mysql-test/r/named_pipe.result
+++ b/mysql-test/r/named_pipe.result
@@ -2154,3 +2154,4 @@ Privat (Private Nutzung) Mobilfunk
Warnings:
Warning 1052 Column 'kundentyp' in group statement is ambiguous
drop table t1;
+FOUND /\[ERROR\] Create named pipe failed/ in second-mysqld.err
diff --git a/mysql-test/r/ps.result b/mysql-test/r/ps.result
index 517e2d23915..eb5c8ca9377 100644
--- a/mysql-test/r/ps.result
+++ b/mysql-test/r/ps.result
@@ -4076,4 +4076,35 @@ id value
deallocate prepare stmt;
SET SESSION sql_mode = @save_sql_mode;
DROP TABLE t1,t2;
-# End of 10.0 tests
+#
+# MDEV-8833: Crash of server on prepared statement with
+# conversion to semi-join
+#
+CREATE TABLE t1 (column1 INT);
+INSERT INTO t1 VALUES (3),(9);
+CREATE TABLE t2 (column2 INT);
+INSERT INTO t2 VALUES (1),(4);
+CREATE TABLE t3 (column3 INT);
+INSERT INTO t3 VALUES (6),(8);
+CREATE TABLE t4 (column4 INT);
+INSERT INTO t4 VALUES (2),(5);
+PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1
+FROM t1 AS table1
+WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 )
+) AS sq
+FROM t3 AS table3, t4 AS table4";
+EXECUTE stmt;
+sq
+NULL
+NULL
+NULL
+NULL
+EXECUTE stmt;
+sq
+NULL
+NULL
+NULL
+NULL
+deallocate prepare stmt;
+drop table t1,t2,t3,t4;
+# End of 5.5 tests
diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result
index 620bdc6bd50..8fb5cd17c51 100644
--- a/mysql-test/r/selectivity.result
+++ b/mysql-test/r/selectivity.result
@@ -1446,3 +1446,74 @@ a b i
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
DROP TABLE t1,t2;
set use_stat_tables=@save_use_stat_tables;
+#
+# Bug mdev-11096: range condition over column without statistical data
+#
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1 persistent for columns () indexes ();
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+explain extended
+select * from t1 where col1 > 'b' and col1 < 'e';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e'))
+select * from t1 where col1 > 'b' and col1 < 'e';
+col1
+c
+d
+drop table t1;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
+#
+# Bug mdev-9628: unindexed blob column without min-max statistics
+# with optimizer_use_condition_selectivity=3
+#
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+create table t2(col1 text);
+insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+select * from t1 where col1 > 'b' and col1 < 'd';
+col1
+c
+explain extended
+select * from t1 where col1 > 'b' and col1 < 'd';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 8 28.57 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'd'))
+select * from t2 where col1 > 'b' and col1 < 'd';
+col1
+c
+explain extended
+select * from t2 where col1 > 'b' and col1 < 'd';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 8 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where ((`test`.`t2`.`col1` > 'b') and (`test`.`t2`.`col1` < 'd'))
+select * from t2 where col1 < 'b' and col1 > 'd';
+col1
+explain extended
+select * from t2 where col1 < 'b' and col1 > 'd';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
+Warnings:
+Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where 0
+drop table t1,t2;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result
index 0acbb465ba8..3d15131dbb5 100644
--- a/mysql-test/r/selectivity_innodb.result
+++ b/mysql-test/r/selectivity_innodb.result
@@ -802,9 +802,9 @@ insert into t2 values (2),(3);
explain extended
select * from t1 where a in ( select b from t2 ) AND ( a > 3 );
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 1 0.00 Using where
+1 PRIMARY t1 ALL NULL NULL NULL NULL 1 100.00 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1 100.00
-2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 0.00
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 100.00
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` > 3))
select * from t1 where a in ( select b from t2 ) AND ( a > 3 );
@@ -1450,6 +1450,77 @@ a b i
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
DROP TABLE t1,t2;
set use_stat_tables=@save_use_stat_tables;
+#
+# Bug mdev-11096: range condition over column without statistical data
+#
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1 persistent for columns () indexes ();
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+explain extended
+select * from t1 where col1 > 'b' and col1 < 'e';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e'))
+select * from t1 where col1 > 'b' and col1 < 'e';
+col1
+c
+d
+drop table t1;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
+#
+# Bug mdev-9628: unindexed blob column without min-max statistics
+# with optimizer_use_condition_selectivity=3
+#
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+create table t2(col1 text);
+insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+select * from t1 where col1 > 'b' and col1 < 'd';
+col1
+c
+explain extended
+select * from t1 where col1 > 'b' and col1 < 'd';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 8 28.57 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'd'))
+select * from t2 where col1 > 'b' and col1 < 'd';
+col1
+c
+explain extended
+select * from t2 where col1 > 'b' and col1 < 'd';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 8 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where ((`test`.`t2`.`col1` > 'b') and (`test`.`t2`.`col1` < 'd'))
+select * from t2 where col1 < 'b' and col1 > 'd';
+col1
+explain extended
+select * from t2 where col1 < 'b' and col1 > 'd';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
+Warnings:
+Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where 0
+drop table t1,t2;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
set optimizer_switch=@save_optimizer_switch_for_selectivity_test;
set @tmp_ust= @@use_stat_tables;
set @tmp_oucs= @@optimizer_use_condition_selectivity;
@@ -1536,6 +1607,44 @@ where t1.child_user_id=t3.id and t1.child_group_id is null and t2.lower_group_na
parent_id child_group_id child_user_id id lower_group_name directory_id id
drop table t1,t2,t3;
#
+# MDEV-9187: duplicate of bug mdev-9628
+#
+set use_stat_tables = preferably;
+set optimizer_use_condition_selectivity=3;
+CREATE TABLE t1 (f1 char(32)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES ('foo'),('bar'),('qux');
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+SELECT * FROM t1 WHERE f1 < 'm';
+f1
+foo
+bar
+EXPLAIN EXTENDED
+SELECT * FROM t1 WHERE f1 < 'm';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 3 72.09 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1` from `test`.`t1` where (`test`.`t1`.`f1` < 'm')
+CREATE TABLE t2 (f1 TEXT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES ('foo'),('bar'),('qux');
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+SELECT * FROM t2 WHERE f1 <> 'qux';
+f1
+foo
+bar
+EXPLAIN EXTENDED
+SELECT * FROM t2 WHERE f1 <> 'qux';
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`f1` AS `f1` from `test`.`t2` where (`test`.`t2`.`f1` <> 'qux')
+DROP TABLE t1,t2;
+#
# End of 10.0 tests
#
set use_stat_tables= @tmp_ust;
diff --git a/mysql-test/r/type_uint.result b/mysql-test/r/type_uint.result
index 10aa2f2f393..c970f2ff896 100644
--- a/mysql-test/r/type_uint.result
+++ b/mysql-test/r/type_uint.result
@@ -14,6 +14,25 @@ this
0
4294967295
drop table t1;
+create table t1 (a bigint unsigned, b mediumint unsigned);
+insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
+select coalesce(a,b), coalesce(b,a) from t1;
+coalesce(a,b) coalesce(b,a)
+1 2
+18446744073709551615 16777215
+create table t2 as select a from t1 union select b from t1;
+show create table t2;
+Table Create Table
+t2 CREATE TABLE `t2` (
+ `a` bigint(20) unsigned DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select * from t2;
+a
+1
+18446744073709551615
+2
+16777215
+drop table t1, t2;
#
# Start of 10.0 tests
#
diff --git a/mysql-test/r/view.result b/mysql-test/r/view.result
index 52c379d03af..924b3a11fef 100644
--- a/mysql-test/r/view.result
+++ b/mysql-test/r/view.result
@@ -5432,6 +5432,7 @@ DROP FUNCTION f1;
DROP VIEW v1;
DROP TABLE t1, t2;
create view v1 as select 1;
+FOUND /mariadb-version/ in v1.frm
drop view v1;
#
# MDEV-7260: Crash in get_best_combination when executing multi-table
diff --git a/mysql-test/r/wait_timeout_not_windows.result b/mysql-test/r/wait_timeout_not_windows.result
index df70aa99221..867787a8ed3 100644
--- a/mysql-test/r/wait_timeout_not_windows.result
+++ b/mysql-test/r/wait_timeout_not_windows.result
@@ -1,3 +1,4 @@
set global log_warnings=2;
set @@wait_timeout=1;
+FOUND /Aborted.*Got timeout reading communication packets/ in mysqld.1.err
set global log_warnings=@@log_warnings;
diff --git a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
index cc2a0373444..07e13008e27 100644
--- a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
+++ b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
@@ -33,6 +33,7 @@ INSERT INTO t1 VALUES(1,'X',1);
SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
SELECT b FROM t1 LIMIT 3;
ERROR HY000: Lost connection to MySQL server during query
+FOUND /Wrote log record for ibuf update in place operation/ in my_restart.err
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
diff --git a/mysql-test/suite/innodb/r/innodb_bug54044.result b/mysql-test/suite/innodb/r/innodb_bug54044.result
index d80c451c841..7d6133adb74 100644
--- a/mysql-test/suite/innodb/r/innodb_bug54044.result
+++ b/mysql-test/suite/innodb/r/innodb_bug54044.result
@@ -6,7 +6,8 @@ table_54044 CREATE TEMPORARY TABLE `table_54044` (
`IF(NULL IS NOT NULL, NULL, NULL)` binary(0) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1
DROP TABLE table_54044;
-CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
+CREATE TABLE tmp ENGINE = INNODB
+AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
SHOW CREATE TABLE tmp;
Table Create Table
tmp CREATE TABLE `tmp` (
diff --git a/mysql-test/suite/innodb/r/system_tables.result b/mysql-test/suite/innodb/r/system_tables.result
new file mode 100644
index 00000000000..79a24f7e455
--- /dev/null
+++ b/mysql-test/suite/innodb/r/system_tables.result
@@ -0,0 +1,8 @@
+alter table mysql.time_zone_name engine=InnoDB;
+create table envois3 (starttime datetime) engine=InnoDB;
+insert envois3 values ('2008-08-11 22:43:00');
+select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
+starttime
+2008-08-12 02:43:00
+drop table envois3;
+alter table mysql.time_zone_name engine=MyISAM;
diff --git a/mysql-test/suite/innodb/t/innodb_bug54044.test b/mysql-test/suite/innodb/t/innodb_bug54044.test
index aa19c51018c..61a09375ae1 100644
--- a/mysql-test/suite/innodb/t/innodb_bug54044.test
+++ b/mysql-test/suite/innodb/t/innodb_bug54044.test
@@ -10,7 +10,10 @@ CREATE TEMPORARY TABLE table_54044 ENGINE = INNODB
SHOW CREATE TABLE table_54044;
DROP TABLE table_54044;
-CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
+# This 'create table' should pass since it uses a Field_string of size 0.
+
+CREATE TABLE tmp ENGINE = INNODB
+ AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
SHOW CREATE TABLE tmp;
DROP TABLE tmp;
@@ -23,4 +26,3 @@ FLUSH TABLES;
--error 1005
CREATE TEMPORARY TABLE tmp ENGINE=InnoDB AS SELECT VALUES(a) FROM t1;
DROP TABLE t1;
-
diff --git a/mysql-test/suite/innodb/t/system_tables.test b/mysql-test/suite/innodb/t/system_tables.test
new file mode 100644
index 00000000000..90cb8c59fbd
--- /dev/null
+++ b/mysql-test/suite/innodb/t/system_tables.test
@@ -0,0 +1,12 @@
+--source include/have_innodb.inc
+
+#
+# MDEV-10775 System table in InnoDB format allowed in MariaDB could lead to crash
+#
+alter table mysql.time_zone_name engine=InnoDB;
+create table envois3 (starttime datetime) engine=InnoDB;
+insert envois3 values ('2008-08-11 22:43:00');
+--source include/restart_mysqld.inc
+select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
+drop table envois3;
+alter table mysql.time_zone_name engine=MyISAM;
diff --git a/mysql-test/suite/perfschema/r/aggregate.result b/mysql-test/suite/perfschema/r/aggregate.result
deleted file mode 100644
index c8fa1cc2b24..00000000000
--- a/mysql-test/suite/perfschema/r/aggregate.result
+++ /dev/null
@@ -1,121 +0,0 @@
-"General cleanup"
-set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
-set @@global.aria_checkpoint_interval= 0;
-drop table if exists t1;
-update performance_schema.setup_instruments set enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-truncate table performance_schema.file_summary_by_event_name;
-truncate table performance_schema.file_summary_by_instance;
-truncate table performance_schema.socket_summary_by_event_name;
-truncate table performance_schema.socket_summary_by_instance;
-truncate table performance_schema.events_waits_summary_global_by_event_name;
-truncate table performance_schema.events_waits_summary_by_instance;
-truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-set enabled = 'YES', timed = 'YES';
-create table t1 (
-id INT PRIMARY KEY,
-b CHAR(100) DEFAULT 'initial value')
-ENGINE=MyISAM;
-insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
-update performance_schema.setup_instruments SET enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-set @dump_all=FALSE;
-"Verifying file aggregate consistency"
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-EVENT_NAME COUNT_READ SUM(i.COUNT_READ)
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE)
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-EVENT_NAME COUNT_READ SUM(i.COUNT_READ)
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE)
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
-OR @dump_all;
-EVENT_NAME SUM_NUMBER_OF_BYTES_READ SUM(i.SUM_NUMBER_OF_BYTES_READ)
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
-OR @dump_all;
-EVENT_NAME SUM_NUMBER_OF_BYTES_WRITE SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-"Verifying waits aggregate consistency (instance)"
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME SUM_TIMER_WAIT SUM(i.SUM_TIMER_WAIT)
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
-AND (MIN(i.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-EVENT_NAME MIN_TIMER_WAIT MIN(i.MIN_TIMER_WAIT)
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME MAX_TIMER_WAIT MAX(i.MAX_TIMER_WAIT)
-"Verifying waits aggregate consistency (thread)"
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME SUM_TIMER_WAIT SUM(t.SUM_TIMER_WAIT)
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
-AND (MIN(t.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-EVENT_NAME MIN_TIMER_WAIT MIN(t.MIN_TIMER_WAIT)
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME MAX_TIMER_WAIT MAX(t.MAX_TIMER_WAIT)
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-set enabled = 'YES', timed = 'YES';
-drop table test.t1;
-set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
diff --git a/mysql-test/suite/perfschema/t/aggregate.test b/mysql-test/suite/perfschema/t/aggregate.test
deleted file mode 100644
index fe30a7b8697..00000000000
--- a/mysql-test/suite/perfschema/t/aggregate.test
+++ /dev/null
@@ -1,197 +0,0 @@
-# Tests for PERFORMANCE_SCHEMA
-# Verify that statistics aggregated by different criteria are consistent.
-
---source include/not_embedded.inc
---source include/have_perfschema.inc
-
---echo "General cleanup"
-
-# MDEV-7187 - test fails sporadically in buildbot
-set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
-set @@global.aria_checkpoint_interval= 0;
-
---disable_warnings
-drop table if exists t1;
---enable_warnings
-
-update performance_schema.setup_instruments set enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-
-# Cleanup statistics
-truncate table performance_schema.file_summary_by_event_name;
-truncate table performance_schema.file_summary_by_instance;
-truncate table performance_schema.socket_summary_by_event_name;
-truncate table performance_schema.socket_summary_by_instance;
-truncate table performance_schema.events_waits_summary_global_by_event_name;
-truncate table performance_schema.events_waits_summary_by_instance;
-truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
-
-# Start recording data
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
- set enabled = 'YES', timed = 'YES';
-
-
-create table t1 (
- id INT PRIMARY KEY,
- b CHAR(100) DEFAULT 'initial value')
- ENGINE=MyISAM;
-
-insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
-
-# Stop recording data, so the select below don't add noise.
-update performance_schema.setup_instruments SET enabled = 'NO';
-# Disable all consumers, for long standing waits
-update performance_schema.setup_consumers set enabled = 'NO';
-
-# Helper to debug
-set @dump_all=FALSE;
-
-# Note that in general:
-# - COUNT/SUM/MAX(file_summary_by_event_name) >=
-# COUNT/SUM/MAX(file_summary_by_instance).
-# - MIN(file_summary_by_event_name) <=
-# MIN(file_summary_by_instance).
-# There will be equality only when file instances are not removed,
-# aka when a file is not deleted from the file system,
-# because doing so removes a row in file_summary_by_instance.
-
-# Likewise:
-# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
-# COUNT/SUM/MAX(events_waits_summary_by_instance)
-# - MIN(events_waits_summary_global_by_event_name) <=
-# MIN(events_waits_summary_by_instance)
-# There will be equality only when an instrument instance
-# is not removed, which is next to impossible to predictably guarantee
-# in the server.
-# For example, a MyISAM table removed from the table cache
-# will cause a mysql_mutex_destroy on myisam/MYISAM_SHARE::intern_lock.
-# Another example, a thread terminating will cause a mysql_mutex_destroy
-# on sql/LOCK_delete
-# Both cause a row to be deleted from events_waits_summary_by_instance.
-
-# Likewise:
-# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
-# COUNT/SUM/MAX(events_waits_summary_by_thread_by_event_name)
-# - MIN(events_waits_summary_global_by_event_name) <=
-# MIN(events_waits_summary_by_thread_by_event_name)
-# There will be equality only when no thread is removed,
-# that is if no thread disconnects, or no sub thread (for example insert
-# delayed) ever completes.
-# A thread completing will cause rows in
-# events_waits_summary_by_thread_by_event_name to be removed.
-
---echo "Verifying file aggregate consistency"
-
-# Since the code generating the load in this test does:
-# - create table
-# - insert
-# - does not cause temporary tables to be used
-# we can test for equality here for file aggregates.
-
-# If any of these queries returns data, the test failed.
-
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
-OR @dump_all;
-
---echo "Verifying waits aggregate consistency (instance)"
-
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
-AND (MIN(i.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
-OR @dump_all;
-
---echo "Verifying waits aggregate consistency (thread)"
-
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
-AND (MIN(t.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
-OR @dump_all;
-
-
-# Cleanup
-
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
- set enabled = 'YES', timed = 'YES';
-
-drop table test.t1;
-
-set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
-
diff --git a/mysql-test/suite/plugins/r/server_audit.result b/mysql-test/suite/plugins/r/server_audit.result
index 83b88ed0480..ceb75176b43 100644
--- a/mysql-test/suite/plugins/r/server_audit.result
+++ b/mysql-test/suite/plugins/r/server_audit.result
@@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users
-server_audit_loc_info
server_audit_logging OFF
server_audit_mode 0
server_audit_output_type file
@@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users odin, root, dva, tri
-server_audit_loc_info
server_audit_logging ON
server_audit_mode 0
server_audit_output_type file
@@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users odin, root, dva, tri
-server_audit_loc_info
server_audit_logging ON
server_audit_mode 1
server_audit_output_type file
diff --git a/mysql-test/suite/plugins/r/thread_pool_server_audit.result b/mysql-test/suite/plugins/r/thread_pool_server_audit.result
index 83b88ed0480..ceb75176b43 100644
--- a/mysql-test/suite/plugins/r/thread_pool_server_audit.result
+++ b/mysql-test/suite/plugins/r/thread_pool_server_audit.result
@@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users
-server_audit_loc_info
server_audit_logging OFF
server_audit_mode 0
server_audit_output_type file
@@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users odin, root, dva, tri
-server_audit_loc_info
server_audit_logging ON
server_audit_mode 0
server_audit_output_type file
@@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF
server_audit_file_rotate_size 1000000
server_audit_file_rotations 9
server_audit_incl_users odin, root, dva, tri
-server_audit_loc_info
server_audit_logging ON
server_audit_mode 1
server_audit_output_type file
diff --git a/mysql-test/suite/rpl/r/rpl_checksum.result b/mysql-test/suite/rpl/r/rpl_checksum.result
index 94d215e596a..9e37fbf40b1 100644
--- a/mysql-test/suite/rpl/r/rpl_checksum.result
+++ b/mysql-test/suite/rpl/r/rpl_checksum.result
@@ -143,6 +143,7 @@ SET debug_dbug= @old_dbug;
INSERT INTO t4 VALUES (2);
include/wait_for_slave_sql_error.inc [errno=1590]
Last_SQL_Error = 'The incident LOST_EVENTS occurred on the master. Message: error writing to the binary log'
+FOUND /Slave SQL: The incident LOST_EVENTS occurred on the master\. Message: error writing to the binary log, Internal MariaDB error code: 1590/ in mysqld.2.err
SELECT * FROM t4 ORDER BY a;
a
1
diff --git a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result
index 204615201d9..e247ea9c2a7 100644
--- a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result
+++ b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result
@@ -38,5 +38,7 @@ a
3
4
5
+FOUND /Slave SQL: Error 'Duplicate entry .* on query\. .*Query: '.*', Gtid 0-1-100, Internal MariaDB error code:|Slave SQL: Could not execute Write_rows.*table test.t1; Duplicate entry.*, Gtid 0-1-100, Internal MariaDB error/ in mysqld.2.err
+FOUND /Slave SQL: The incident LOST_EVENTS occurred on the master\. Message: <none>, Internal MariaDB error code: 1590/ in mysqld.2.err
DROP TABLE t1;
include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_stop_slave_error.result b/mysql-test/suite/rpl/r/rpl_stop_slave_error.result
new file mode 100644
index 00000000000..2bd372a9a91
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_stop_slave_error.result
@@ -0,0 +1,6 @@
+include/master-slave.inc
+[connection master]
+include/stop_slave.inc
+NOT FOUND /Error reading packet from server: Lost connection/ in slave_log.err
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_drop_db.test b/mysql-test/suite/rpl/t/rpl_drop_db.test
index a67850a66dd..f66187b12f5 100644
--- a/mysql-test/suite/rpl/t/rpl_drop_db.test
+++ b/mysql-test/suite/rpl/t/rpl_drop_db.test
@@ -13,7 +13,7 @@ insert into mysqltest1.t1 values (1);
select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt';
create table mysqltest1.t2 (n int);
create table mysqltest1.t3 (n int);
---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
+--replace_result \\ / 66 39 93 39 17 39 247 39 "File exists" "Directory not empty"
--error 1010
drop database mysqltest1;
use mysqltest1;
@@ -30,7 +30,7 @@ while ($1)
}
--enable_query_log
---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
+--replace_result \\ / 66 39 93 39 17 39 247 39 "File exists" "Directory not empty"
--error 1010
drop database mysqltest1;
use mysqltest1;
diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt b/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt
new file mode 100644
index 00000000000..32c4527a915
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt
@@ -0,0 +1 @@
+--log-error=$MYSQLTEST_VARDIR/tmp/slave_log.err
diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_error.test b/mysql-test/suite/rpl/t/rpl_stop_slave_error.test
new file mode 100644
index 00000000000..a88981c15c4
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_stop_slave_error.test
@@ -0,0 +1,17 @@
+#
+# MDEV-8345 STOP SLAVE should not cause an ERROR to be logged to the error log
+#
+source include/have_binlog_format_mixed.inc; # don't repeat the test three times
+source include/master-slave.inc;
+
+connection master;
+sync_slave_with_master;
+source include/stop_slave.inc;
+let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/slave_log.err;
+let SEARCH_PATTERN=Error reading packet from server: Lost connection;
+let SEARCH_RANGE= -50000;
+source include/search_pattern_in_file.inc;
+
+source include/start_slave.inc;
+source include/rpl_end.inc;
+
diff --git a/mysql-test/t/alter_table.test b/mysql-test/t/alter_table.test
index 05d915ec478..d2b8a6082a6 100644
--- a/mysql-test/t/alter_table.test
+++ b/mysql-test/t/alter_table.test
@@ -1712,3 +1712,28 @@ CREATE TABLE t1 (
ALTER TABLE t1 ADD PRIMARY KEY IF NOT EXISTS event_id (event_id,market_id);
DROP TABLE t1;
+--echo #
+--echo # MDEV-11126 Crash while altering persistent virtual column
+--echo #
+
+CREATE TABLE `tab1` (
+ `id` bigint(20) NOT NULL AUTO_INCREMENT,
+ `field2` set('option1','option2','option3','option4') NOT NULL,
+ `field3` set('option1','option2','option3','option4','option5') NOT NULL,
+ `field4` set('option1','option2','option3','option4') NOT NULL,
+ `field5` varchar(32) NOT NULL,
+ `field6` varchar(32) NOT NULL,
+ `field7` varchar(32) NOT NULL,
+ `field8` varchar(32) NOT NULL,
+ `field9` int(11) NOT NULL DEFAULT '1',
+ `field10` varchar(16) NOT NULL,
+ `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1',
+ `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT,
+ PRIMARY KEY (`id`)
+) DEFAULT CHARSET=latin1;
+
+ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128);
+SHOW CREATE TABLE `tab1`;
+ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT;
+SHOW CREATE TABLE `tab1`;
+DROP TABLE `tab1`;
diff --git a/mysql-test/t/create_or_replace.test b/mysql-test/t/create_or_replace.test
index 7bba2b341c0..b37417f39d0 100644
--- a/mysql-test/t/create_or_replace.test
+++ b/mysql-test/t/create_or_replace.test
@@ -386,3 +386,15 @@ drop table t1;
# Cleanup
#
DROP TABLE t2;
+
+--echo #
+--echo # MDEV-10824 - Crash in CREATE OR REPLACE TABLE t1 AS SELECT spfunc()
+--echo #
+CREATE TABLE t1(a INT);
+CREATE FUNCTION f1() RETURNS VARCHAR(16383) RETURN 'test';
+CREATE OR REPLACE TABLE t1 AS SELECT f1();
+LOCK TABLE t1 WRITE;
+CREATE OR REPLACE TABLE t1 AS SELECT f1();
+UNLOCK TABLES;
+DROP FUNCTION f1;
+DROP TABLE t1;
diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test
index e6583f990ca..600880d6be5 100644
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@@ -889,6 +889,11 @@ SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061));
SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061));
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
+#
+# potential signedness issue
+#
+select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
+
--echo #
--echo # End of 5.5 tests
--echo #
diff --git a/mysql-test/t/drop.test b/mysql-test/t/drop.test
index d9784bc819a..a3e96953bac 100644
--- a/mysql-test/t/drop.test
+++ b/mysql-test/t/drop.test
@@ -313,3 +313,12 @@ INSERT INTO table1 VALUES (1);
DROP TABLE table1,table2;
--echo # End BUG#34750
+
+--echo #
+--echo # MDEV-11105 Table named 'db' has weird side effect.
+--echo #
+
+CREATE DATABASE mysqltest;
+CREATE TABLE mysqltest.db(id INT);
+DROP DATABASE mysqltest;
+
diff --git a/mysql-test/t/group_min_max_innodb.test b/mysql-test/t/group_min_max_innodb.test
index 6967f847147..91e0bd3279f 100644
--- a/mysql-test/t/group_min_max_innodb.test
+++ b/mysql-test/t/group_min_max_innodb.test
@@ -230,3 +230,16 @@ eval EXPLAIN $query;
eval $query;
DROP TABLE t0,t1,t2;
+
+--echo #
+--echo # MDEV-MariaDB daemon leaks memory with specific query
+--echo #
+
+CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
+ `language_id` int(11) unsigned NOT NULL DEFAULT '1'
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
+ `serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
+SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
+drop table t1,t2;
diff --git a/mysql-test/t/information_schema.test b/mysql-test/t/information_schema.test
index 30ae29844c2..ecfaa651076 100644
--- a/mysql-test/t/information_schema.test
+++ b/mysql-test/t/information_schema.test
@@ -612,13 +612,13 @@ select * from information_schema.schema_privileges order by grantee;
select * from information_schema.user_privileges order by grantee;
show grants;
connection con4;
-select * from information_schema.column_privileges where grantee like '%user%'
+select * from information_schema.column_privileges where grantee like '\'user%'
order by grantee;
-select * from information_schema.table_privileges where grantee like '%user%'
+select * from information_schema.table_privileges where grantee like '\'user%'
order by grantee;
-select * from information_schema.schema_privileges where grantee like '%user%'
+select * from information_schema.schema_privileges where grantee like '\'user%'
order by grantee;
-select * from information_schema.user_privileges where grantee like '%user%'
+select * from information_schema.user_privileges where grantee like '\'user%'
order by grantee;
show grants;
connection default;
diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test
index 77e896c7c05..0cf37a24f8e 100644
--- a/mysql-test/t/merge.test
+++ b/mysql-test/t/merge.test
@@ -2880,6 +2880,19 @@ drop tables m1, t1, t4;
drop view t3;
+--echo #
+--echo # MDEV-10424 - Assertion `ticket == __null' failed in
+--echo # MDL_request::set_type
+--echo #
+CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
+CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
+PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
+EXECUTE stmt;
+EXECUTE stmt;
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1, tmerge;
+
+
--echo End of 5.5 tests
diff --git a/mysql-test/t/mysql.test b/mysql-test/t/mysql.test
index 2b4b1e69ab6..263e1103e8b 100644
--- a/mysql-test/t/mysql.test
+++ b/mysql-test/t/mysql.test
@@ -586,8 +586,16 @@ DROP DATABASE connected_db;
# USE and names with backticks
#
--write_file $MYSQLTEST_VARDIR/tmp/backticks.sql
+\u aa`bb``cc
+SELECT DATABASE();
+USE test
+SELECT DATABASE();
USE aa`bb``cc
SELECT DATABASE();
+USE test
+SELECT DATABASE();
+USE `aa``bb````cc`
+SELECT DATABASE();
EOF
create database `aa``bb````cc`;
--exec $MYSQL < $MYSQLTEST_VARDIR/tmp/backticks.sql
diff --git a/mysql-test/t/mysql_not_windows.test b/mysql-test/t/mysql_not_windows.test
index 66853677f7b..591de74cbbf 100644
--- a/mysql-test/t/mysql_not_windows.test
+++ b/mysql-test/t/mysql_not_windows.test
@@ -13,3 +13,12 @@
--echo
--echo End of tests
+
+# Multi-line exec
+exec $MYSQL \
+ test -e "select 1";
+exec $MYSQL test -e "select
+ 2";
+let $query = select 3
+ as X;
+exec $MYSQL test -e "$query";
diff --git a/mysql-test/t/mysqldump-nl.test b/mysql-test/t/mysqldump-nl.test
new file mode 100644
index 00000000000..311996e77c3
--- /dev/null
+++ b/mysql-test/t/mysqldump-nl.test
@@ -0,0 +1,38 @@
+#
+# New lines in identifiers
+#
+
+# embedded server doesn't support external clients
+--source include/not_embedded.inc
+# cmd.exe doesn't like new lines on the command line
+--source include/not_windows.inc
+
+create database `mysqltest1
+1tsetlqsym`;
+use `mysqltest1
+1tsetlqsym`;
+
+create table `t1
+1t` (`foobar
+raboof` int);
+create view `v1
+1v` as select * from `t1
+1t`;
+
+create procedure sp() select * from `v1
+1v`;
+
+flush tables;
+use test;
+
+exec $MYSQL_DUMP --compact --comment --routines --add-drop-database --databases 'mysqltest1
+1tsetlqsym';
+
+exec $MYSQL_DUMP --compact --comment --routines --add-drop-database --databases 'mysqltest1
+1tsetlqsym' | $MYSQL;
+
+show tables from `mysqltest1
+1tsetlqsym`;
+
+drop database `mysqltest1
+1tsetlqsym`;
diff --git a/mysql-test/t/mysqltest.test b/mysql-test/t/mysqltest.test
index ae59c713c3d..e85d793b628 100644
--- a/mysql-test/t/mysqltest.test
+++ b/mysql-test/t/mysqltest.test
@@ -741,15 +741,6 @@ echo ;
--error 1
--exec echo "--exec " | $MYSQL_TEST 2>&1
-# Multi-line exec
-exec $MYSQL
- test -e "select 1";
-exec $MYSQL test -e "select
- 2";
-let $query = select 3
- as X;
-exec $MYSQL test -e "$query";
-
# ----------------------------------------------------------------------------
# Test let command
# ----------------------------------------------------------------------------
diff --git a/mysql-test/t/ps.test b/mysql-test/t/ps.test
index c45b27281a7..a6b238b84c6 100644
--- a/mysql-test/t/ps.test
+++ b/mysql-test/t/ps.test
@@ -3653,5 +3653,32 @@ deallocate prepare stmt;
SET SESSION sql_mode = @save_sql_mode;
DROP TABLE t1,t2;
+--echo #
+--echo # MDEV-8833: Crash of server on prepared statement with
+--echo # conversion to semi-join
+--echo #
+
+CREATE TABLE t1 (column1 INT);
+INSERT INTO t1 VALUES (3),(9);
+
+CREATE TABLE t2 (column2 INT);
+INSERT INTO t2 VALUES (1),(4);
+
+CREATE TABLE t3 (column3 INT);
+INSERT INTO t3 VALUES (6),(8);
+
+CREATE TABLE t4 (column4 INT);
+INSERT INTO t4 VALUES (2),(5);
+
+PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1
+FROM t1 AS table1
+WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 )
+) AS sq
+FROM t3 AS table3, t4 AS table4";
+EXECUTE stmt;
+EXECUTE stmt;
+deallocate prepare stmt;
+drop table t1,t2,t3,t4;
+
---echo # End of 10.0 tests
+--echo # End of 5.5 tests
diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test
index c46ff69295f..8efc5216ba0 100644
--- a/mysql-test/t/selectivity.test
+++ b/mysql-test/t/selectivity.test
@@ -970,6 +970,58 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit
DROP TABLE t1,t2;
+set use_stat_tables=@save_use_stat_tables;
+
+--echo #
+--echo # Bug mdev-11096: range condition over column without statistical data
+--echo #
+
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1 persistent for columns () indexes ();
+
+explain extended
+select * from t1 where col1 > 'b' and col1 < 'e';
+select * from t1 where col1 > 'b' and col1 < 'e';
+
+drop table t1;
+
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
+
+--echo #
+--echo # Bug mdev-9628: unindexed blob column without min-max statistics
+--echo # with optimizer_use_condition_selectivity=3
+--echo #
+
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1;
+
+create table t2(col1 text);
+insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t2;
+
+select * from t1 where col1 > 'b' and col1 < 'd';
+explain extended
+select * from t1 where col1 > 'b' and col1 < 'd';
+
+select * from t2 where col1 > 'b' and col1 < 'd';
+explain extended
+select * from t2 where col1 > 'b' and col1 < 'd';
+
+select * from t2 where col1 < 'b' and col1 > 'd';
+explain extended
+select * from t2 where col1 < 'b' and col1 > 'd';
+
+drop table t1,t2;
+
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set use_stat_tables=@save_use_stat_tables;
diff --git a/mysql-test/t/selectivity_innodb.test b/mysql-test/t/selectivity_innodb.test
index d6a77eac600..25aa0abbc3b 100644
--- a/mysql-test/t/selectivity_innodb.test
+++ b/mysql-test/t/selectivity_innodb.test
@@ -110,6 +110,31 @@ where t1.child_user_id=t3.id and t1.child_group_id is null and t2.lower_group_na
drop table t1,t2,t3;
--echo #
+--echo # MDEV-9187: duplicate of bug mdev-9628
+--echo #
+
+set use_stat_tables = preferably;
+set optimizer_use_condition_selectivity=3;
+
+CREATE TABLE t1 (f1 char(32)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES ('foo'),('bar'),('qux');
+ANALYZE TABLE t1;
+
+SELECT * FROM t1 WHERE f1 < 'm';
+EXPLAIN EXTENDED
+SELECT * FROM t1 WHERE f1 < 'm';
+
+CREATE TABLE t2 (f1 TEXT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES ('foo'),('bar'),('qux');
+ANALYZE TABLE t2;
+
+SELECT * FROM t2 WHERE f1 <> 'qux';
+EXPLAIN EXTENDED
+SELECT * FROM t2 WHERE f1 <> 'qux';
+
+DROP TABLE t1,t2;
+
+--echo #
--echo # End of 10.0 tests
--echo #
diff --git a/mysql-test/t/type_uint.test b/mysql-test/t/type_uint.test
index 3a949c5c47a..84fca993d09 100644
--- a/mysql-test/t/type_uint.test
+++ b/mysql-test/t/type_uint.test
@@ -16,6 +16,13 @@ drop table t1;
# End of 4.1 tests
+create table t1 (a bigint unsigned, b mediumint unsigned);
+insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
+select coalesce(a,b), coalesce(b,a) from t1;
+create table t2 as select a from t1 union select b from t1;
+show create table t2;
+select * from t2;
+drop table t1, t2;
--echo #
--echo # Start of 10.0 tests
diff --git a/mysql-test/unstable-tests b/mysql-test/unstable-tests
index 6a46602eb07..3e25115599f 100644
--- a/mysql-test/unstable-tests
+++ b/mysql-test/unstable-tests
@@ -23,77 +23,66 @@
#
##############################################################################
-main.bootstrap : Modified on 2016-06-18 (MDEV-9969)
main.create_delayed : MDEV-10605 - failed with timeout
-main.create_or_replace : Modified on 2016-06-23 (MDEV-9728)
-main.ctype_recoding : Modified on 2016-06-10 (MDEV-10181)
-main.ctype_utf8 : Modified on 2016-06-21 (merge)
-main.ctype_utf8mb4 : Modified on 2016-06-21 (merge)
-main.events_1 : Modified on 2016-06-21 (MDEV-9524)
+main.ctype_utf32 : Modified on 2016-09-27 (merge)
main.func_group : Modified on 2016-08-08 (MDEV-10468)
-main.func_in : Modified on 2016-06-20 (MDEV-10020)
main.func_math : Modified on 2016-08-10 (merge)
main.func_misc : Modified on 2016-08-10 (merge)
-main.grant2 : Modified on 2016-07-18 (MDEV-8569)
-main.help : Modified on 2016-06-21 (MDEV-9524)
+main.group_min_max_innodb : Modified on 2016-08-25 (MDEV-10595)
main.host_cache_size_functionality : MDEV-10606 - sporadic failure on shutdown
main.index_intersect_innodb : MDEV-10643 - failed with timeout
-main.index_merge_innodb : MDEV-7142 - sporadic wrong execution plan
+main.index_merge_myisam : Modified on 2016-09-05 (include file changed)
+main.index_merge_innodb : Modified on 2016-09-05 (MDEV-7142)
main.information_schema_stats : Modified on 2016-07-25 (MDEV-10428)
main.innodb_mysql_lock : MDEV-7861 - sporadic lock detection failure
-main.insert_innodb : Modified on 2016-06-14 (merge from upstream)
main.loaddata : Modified on 2016-08-10 (merge)
-main.locale : Modified on 2016-06-21 (merge)
main.mdev-504 : MDEV-10607 - sporadic "can't connect"
main.mdev375 : MDEV-10607 - sporadic "can't connect"
main.merge : MDEV-10607 - sporadic "can't connect"
-main.multi_update : Modified on 2016-06-20 (MDEV-5973)
main.myisam_enable_keys-10506 : New test, added on 2016-08-10 (MDEV-10506)
main.mysqlcheck : Modified on 2016-08-10 (merge)
main.mysqldump : MDEV-10512 - sporadic assertion failure
+main.mysqlhotcopy_myisam : MDEV-10995 - test hangs on debug build
main.mysqltest : MDEV-9269 - fails on Alpha
main.named_pipe : Modified on 2016-08-02 (MDEV-10383)
-main.openssl_1 : Modified on 2016-07-11 (MDEV-10211)
-main.parser : Modified on 2016-06-21 (merge)
main.pool_of_threads : MDEV-10100 - sporadic error on detecting max connections
-main.ps_1general : Modified on 2016-07-12 (merge)
+main.ps : MDEV-11017 - sporadic wrong Prepared_stmt_count
main.range : Modified on 2016-08-10 (merge)
main.range_mrr_icp : Modified on 2016-08-10 (merge)
main.query_cache : MDEV-10611 - sporadic mutex problem
-main.shutdown : MDEV-10612 - sporadic crashes
+main.shutdown : MDEV-10563 - sporadic crashes
main.sp-prelocking : Modified on 2016-08-10 (merge)
main.sp-security : MDEV-10607 - sporadic "can't connect"
-main.ssl : MDEV-10211 - different ciphers on some platforms
-main.ssl_ca : Modified on 2016-07-11 (MDEV-10211)
-main.ssl_compress : Modified on 2016-07-11 (MDEV-10211)
-main.ssl_timeout : Modified on 2016-07-11 (MDEV-10211)
+main.ssl_compress : MDEV-11110 - valgrind failures
main.stat_tables_par_innodb : MDEV-10515 - sporadic wrong results
-main.status_user : Modified on 2016-06-20 (MDEV-8633)
main.subselect_innodb : MDEV-10614 - sporadic wrong results
-main.temp_table : Modified on 2016-06-18 (MDEV-8569)
main.type_date : Modified on 2016-08-10 (merge)
-main.type_datetime : Modified on 2016-06-16 (MDEV-9374)
+main.type_uint : Modified on 2016-09-27 (merge)
main.view : Modified on 2016-08-10 (merge)
main.xtradb_mrr : Modified on 2016-08-04 (MDEV-9946)
#----------------------------------------------------------------
-archive.archive-big : MDEV-10615 - table is marked as crashed
-archive.discover : MDEV-10510 - table is marked as crashed
+archive.archive-big : MDEV-10615 - table is marked as crashed
+archive.discover : MDEV-10510 - table is marked as crashed
+archive.mysqlhotcopy_archive : MDEV-10995 - test hangs on debug build
#----------------------------------------------------------------
binlog.binlog_commit_wait : MDEV-10150 - Error: too much time elapsed
-binlog.binlog_dmls_on_tmp_tables_readonly : New test, added on 2016-05-04 (upstream)
binlog.binlog_xa_recover : MDEV-8517 - Extra checkpoint
#----------------------------------------------------------------
connect.tbl : MDEV-9844, MDEV-10179 - sporadic crashes, valgrind warnings, wrong results
-connect.jdbc : New test, added on 2016-07-15
-connect.jdbc-new : New test, added on 2016-07-14
-connect.jdbc-oracle : New test, added on 2016-07-13
-connect.jdbc-postgresql : New test, added on 2016-07-13
+
+#----------------------------------------------------------------
+
+engines/rr_trx.* : MDEV-10998 - tests not maintained
+
+#----------------------------------------------------------------
+
+extra/binlog_tests.database : Modified on 2016-10-21 (Upstream MIPS test fixes)
#----------------------------------------------------------------
@@ -104,20 +93,19 @@ federated.federated_transactions : MDEV-10617, MDEV-10417 - Wrong checksum, time
#----------------------------------------------------------------
-funcs_1.processlist_priv_no_prot : Include file modified on 2016-07-12 (merge)
-funcs_1.processlist_priv_ps : Include file modified on 2016-07-12 (merge)
+funcs_2/charset.* : MDEV-10999 - test not maintained
#----------------------------------------------------------------
innodb.binlog_consistent : MDEV-10618 - Server fails to start
innodb.innodb-alter-table : MDEV-10619 - Testcase timeout
innodb.innodb-alter-tempfile : Modified on 2016-08-09 (MDEV-10469)
-innodb.innodb_corrupt_bit : Modified on 2016-06-21 (merge)
innodb.innodb_bug30423 : MDEV-7311 - Wrong number of rows in the plan
-innodb.innodb-fk-warnings : Modified on 2016-07-18 (MDEV-8569)
-innodb.innodb-fkcheck : Modified on 2016-06-13 (MDEV-10083)
+innodb.innodb_bug54044 : Modified on 2016-09-27 (merge)
+innodb.innodb_monitor : MDEV-10939 - Testcase timeout
innodb.innodb-wl5522 : rdiff file modified on 2016-08-10 (merge)
innodb.innodb-wl5522-debug-zip : MDEV-10427 - Warning: database page corruption
+innodb.system_tables : Added on 2016-09-23 (MDEV-10775)
#----------------------------------------------------------------
@@ -142,21 +130,16 @@ parts.partition_int_myisam : MDEV-10621 - Testcase timeout
#----------------------------------------------------------------
-perfschema.digest_table_full : Modified on 2016-06-21 (merge)
perfschema.func_file_io : MDEV-5708 - fails for s390x
perfschema.func_mutex : MDEV-5708 - fails for s390x
-perfschema.rpl_gtid_func : Modified on 2016-06-21 (merge)
-perfschema.sizing_low : Modified on 2016-04-26 (5.6.30 merge)
+perfschema.hostcache_ipv6_ssl : MDEV-10696 - crash on shutdown
perfschema.socket_summary_by_event_name_func : MDEV-10622 - Socket summary tables do not match
-perfschema.start_server_low_digest : Modified on 2016-06-21 (merge)
-perfschema.statement_digest : Modified on 2016-06-21 (merge)
-perfschema.statement_digest_consumers : Modified on 2016-06-21 (merge)
-perfschema.statement_digest_long_query : Modified on 2016-06-21 (merge)
-perfschema.table_name : New test, added on 2016-04-26 (5.6.30 merge)
+
+perfschema_stress.* : MDEV-10996 - tests not maintained
#----------------------------------------------------------------
-plugins.feedback_plugin_send : MDEV-7932 - ssl failed for url
+plugins.feedback_plugin_send : MDEV-7932 - ssl failed for url, MDEV-11112 - valgrind warnings
plugins.pam : Modified on 2016-08-03 (MDEV-7329)
plugins.pam_cleartext : Modified on 2016-08-03
plugins.server_audit : MDEV-9562 - crashes on sol10-sparc
@@ -164,11 +147,6 @@ plugins.thread_pool_server_audit : MDEV-9562 - crashes on sol10-sparc
#----------------------------------------------------------------
-roles.rpl_grant_revoke_current_role-8638 : New test, added on 2016-06-20 (MDEV-8638)
-roles.set_role-9614 : New test, added on 2016-05-30 (MDEV-9614)
-
-#----------------------------------------------------------------
-
rpl.last_insert_id : MDEV-10625 - warnings in error log
rpl.rpl_auto_increment : MDEV-10417 - Fails on Mips
rpl.rpl_auto_increment_bug45679 : MDEV-10417 - Fails on Mips
@@ -177,11 +155,11 @@ rpl.rpl_binlog_index : MDEV-9501 - Warning: failed registering
rpl.rpl_checksum_cache : MDEV-10626 - Testcase timeout
rpl.rpl_circular_for_4_hosts : MDEV-10627 - Testcase timeout
rpl.rpl_ddl : MDEV-10417 - Fails on Mips
+rpl.rpl_drop_db : Modified on 2016-10-21 (Upstream MIPS test fixes)
rpl.rpl_gtid_crash : MDEV-9501 - Warning: failed registering on master
rpl.rpl_gtid_master_promote : MDEV-10628 - Timeout in sync_with_master
rpl.rpl_gtid_stop_start : MDEV-10629 - Crash on shutdown
rpl.rpl_gtid_until : MDEV-10625 - warnings in error log
-rpl.rpl_ignore_table : Modified on 2016-06-22
rpl.rpl_innodb_bug30888 : MDEV-10417 - Fails on Mips
rpl.rpl_insert : MDEV-9329 - Fails on Ubuntu/s390x
rpl.rpl_insert_delayed : MDEV-9329 - Fails on Ubuntu/s390x
@@ -201,6 +179,8 @@ rpl.rpl_temporary_error2 : MDEV-10634 - Wrong number of retries
rpl.sec_behind_master-5114 : MDEV-8518 - Wrong value of Seconds_Behind_Master
rpl.rpl_skip_replication : MDEV-9268 - Fails with timeout in sync_slave_with_master on Alpha
+rpl/extra/rpl_tests.* : MDEV-10994 - tests not maintained
+
#----------------------------------------------------------------
spider.* : MDEV-9329 - tests are too memory-consuming
@@ -214,6 +194,10 @@ spider/bg.vp_fixes : MDEV-9329 - Fails on Ubuntu/s390x
#----------------------------------------------------------------
+sphinx.* : MDEV-10747 - tests are not run in buildbot, they can't be stable
+
+#----------------------------------------------------------------
+
stress.ddl_innodb : MDEV-10635 - Testcase timeout
#----------------------------------------------------------------
@@ -229,11 +213,14 @@ tokudb.background_job_manager : MDEV-10327 - Assertion failure on server
tokudb.cluster_filter_unpack_varchar : MDEV-10636 - Wrong execution plan
tokudb.* : MDEV-9891 - massive crashes on shutdown
tokudb_alter_table.* : MDEV-9891 - massive crashes on shutdown
+tokudb_backup.* : MDEV-11001 - tests don't work
tokudb_bugs.checkpoint_lock : MDEV-10637 - Wrong processlist output
tokudb_bugs.checkpoint_lock_3 : MDEV-10637 - Wrong processlist output
tokudb_bugs.* : MDEV-9891 - massive crashes on shutdown
tokudb_parts.* : MDEV-9891 - massive crashes on shutdown
-rpl-tokudb.* : MDEV-9891 - massive crashes on shutdown, also modified on 2016-06-10 (Merge)
+tokudb_rpl_suites.* : MDEV-11001 - tests don't work
+tokudb_sys_vars.* : MDEV-11001 - tests don't work
+rpl-tokudb.* : MDEV-9891 - massive crashes on shutdown
tokudb/tokudb_add_index.* : MDEV-9891 - massive crashes on shutdown
tokudb/tokudb_backup.* : MDEV-9891 - massive crashes on shutdown
tokudb/tokudb_mariadb.* : MDEV-9891 - massive crashes on shutdown
@@ -247,7 +234,6 @@ unit.ma_test_loghandler : MDEV-10638 - record read not ok
#----------------------------------------------------------------
-vcol.charsets : Added on 2016-06-23
vcol.not_supported : MDEV-10639 - Testcase timeout
vcol.vcol_keys_innodb : MDEV-10639 - Testcase timeout
diff --git a/mysql-test/valgrind.supp b/mysql-test/valgrind.supp
index 1cc5d177972..5f1af1a0905 100644
--- a/mysql-test/valgrind.supp
+++ b/mysql-test/valgrind.supp
@@ -1228,6 +1228,125 @@
fun:dlopen@@GLIBC_2.2.5
}
+#
+# MDEV-11061: OpenSSL 0.9.8 problems
+#
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Cond
+ obj:*/libz.so*
+ ...
+ obj:*/libcrypto.so.0.9.8
+ ...
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Value8
+ obj:*/libz.so*
+ ...
+ obj:*/libcrypto.so.0.9.8
+ ...
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Cond
+ obj:*/libcrypto.so.0.9.8
+ ...
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Value8
+ obj:*/libcrypto.so.0.9.8
+ ...
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Cond
+ obj:*/libssl.so.0.9.8
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Value8
+ obj:*/libssl.so.0.9.8
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Cond
+ fun:memcpy
+ obj:*/libcrypto.so.0.9.8
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Value8
+ fun:memcpy
+ obj:*/libcrypto.so.0.9.8
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Cond
+ fun:is_overlap
+ fun:memcpy
+ obj:*/libcrypto.so.0.9.8
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Cond
+ fun:memset
+ obj:*/libcrypto.so.0.9.8
+ ...
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Value8
+ fun:memset
+ obj:*/libcrypto.so.0.9.8
+ ...
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
+{
+ MDEV-11061: OpenSSL 0.9.8
+ Memcheck:Param
+ write(buf)
+ obj:*/libpthread-2.9.so*
+ obj:*/libcrypto.so.0.9.8
+ ...
+ obj:*/libssl.so.0.9.8
+ ...
+}
+
{
GitHub codership/galera#330
Memcheck:Leak
@@ -1316,7 +1435,7 @@
}
{
-g codership/mysql-wsrep/issues#176
+ codership/mysql-wsrep/issues#176
Memcheck:Leak
fun:_Z16wsrep_set_paramsRN6galera10ReplicatorEPKc
}
diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c
index 409f1bfc9dd..a24f5161168 100644
--- a/mysys/my_fopen.c
+++ b/mysys/my_fopen.c
@@ -102,6 +102,7 @@ static FILE *my_win_freopen(const char *path, const char *mode, FILE *stream)
HANDLE osfh;
DBUG_ASSERT(path && stream);
+ DBUG_ASSERT(strchr(mode, 'a')); /* We use FILE_APPEND_DATA below */
/* Services don't have stdout/stderr on Windows, so _fileno returns -1. */
if (fd < 0)
@@ -112,15 +113,14 @@ static FILE *my_win_freopen(const char *path, const char *mode, FILE *stream)
fd= _fileno(stream);
}
- if ((osfh= CreateFile(path, GENERIC_READ | GENERIC_WRITE,
+ if ((osfh= CreateFile(path, GENERIC_READ | FILE_APPEND_DATA,
FILE_SHARE_READ | FILE_SHARE_WRITE |
FILE_SHARE_DELETE, NULL,
OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL,
NULL)) == INVALID_HANDLE_VALUE)
return NULL;
- if ((handle_fd= _open_osfhandle((intptr_t)osfh,
- _O_APPEND | _O_TEXT)) == -1)
+ if ((handle_fd= _open_osfhandle((intptr_t)osfh, _O_TEXT)) == -1)
{
CloseHandle(osfh);
return NULL;
diff --git a/mysys/my_redel.c b/mysys/my_redel.c
index 61e61b40791..976fc5a18c3 100644
--- a/mysys/my_redel.c
+++ b/mysys/my_redel.c
@@ -1,5 +1,5 @@
-/*
- Copyright (c) 2000, 2010, Oracle and/or its affiliates
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates
+ Copyright (c) 2009, 2016, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -49,7 +49,8 @@ int my_redel(const char *org_name, const char *tmp_name,
DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s' MyFlags: %lu",
org_name,tmp_name,MyFlags));
- if (my_copystat(org_name,tmp_name,MyFlags) < 0)
+ if (!my_disable_copystat_in_redel &&
+ my_copystat(org_name,tmp_name,MyFlags) < 0)
goto end;
if (MyFlags & MY_REDEL_MAKE_BACKUP)
{
diff --git a/mysys/my_static.c b/mysys/my_static.c
index 4aca78e30a9..9236c1395fb 100644
--- a/mysys/my_static.c
+++ b/mysys/my_static.c
@@ -98,3 +98,4 @@ my_bool my_disable_sync=0;
my_bool my_disable_async_io=0;
my_bool my_disable_flush_key_blocks=0;
my_bool my_disable_symlinks=0;
+my_bool my_disable_copystat_in_redel=0;
diff --git a/plugin/feedback/utils.cc b/plugin/feedback/utils.cc
index b83b69be0ce..dad3d59e76d 100644
--- a/plugin/feedback/utils.cc
+++ b/plugin/feedback/utils.cc
@@ -43,7 +43,11 @@ static const char *get_os_version_name(OSVERSIONINFOEX *ver)
{
DWORD major = ver->dwMajorVersion;
DWORD minor = ver->dwMinorVersion;
-
+ if (major == 10 && minor == 0)
+ {
+ return (ver->wProductType == VER_NT_WORKSTATION) ?
+ "Windows 10" : "Windows Server 2016";
+ }
if (major == 6 && minor == 3)
{
return (ver->wProductType == VER_NT_WORKSTATION)?
@@ -102,7 +106,12 @@ static int uname(struct utsname *buf)
if(version_str && version_str[0])
sprintf(buf->version, "%s %s",version_str, ver.szCSDVersion);
else
- sprintf(buf->version, "%s", ver.szCSDVersion);
+ {
+ /* Fallback for unknown versions, e.g "Windows <major_ver>.<minor_ver>" */
+ sprintf(buf->version, "Windows %d.%d%s",
+ ver.dwMajorVersion, ver.dwMinorVersion,
+ (ver.wProductType == VER_NT_WORKSTATION ? "" : " Server"));
+ }
#ifdef _WIN64
strcpy(buf->machine, "x64");
diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c
index b84f2b94806..d48b6c37728 100644
--- a/plugin/server_audit/server_audit.c
+++ b/plugin/server_audit/server_audit.c
@@ -427,9 +427,8 @@ static MYSQL_SYSVAR_UINT(query_log_limit, query_log_limit,
char locinfo_ini_value[sizeof(struct connection_info)+4];
static MYSQL_THDVAR_STR(loc_info,
- PLUGIN_VAR_READONLY | PLUGIN_VAR_MEMALLOC,
- "Auxiliary info.", NULL, NULL,
- locinfo_ini_value);
+ PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_MEMALLOC,
+ "Internal info", NULL, NULL, locinfo_ini_value);
static const char *syslog_facility_names[]=
{
diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh
index b8df320bf93..9d8bbce4860 100644
--- a/scripts/mysqld_safe.sh
+++ b/scripts/mysqld_safe.sh
@@ -717,6 +717,10 @@ else
logging=syslog
fi
+# close stdout and stderr, everything goes to $logging now
+exec 1>&-
+exec 2>&-
+
USER_OPTION=""
if test -w / -o "$USER" = "root"
then
@@ -747,7 +751,7 @@ if [ ! -d $mysql_unix_port_dir ]
then
if ! `mkdir -p $mysql_unix_port_dir`
then
- echo "Fatal error Can't create database directory '$mysql_unix_port'"
+ log_error "Fatal error Can't create database directory '$mysql_unix_port'"
exit 1
fi
chown $user $mysql_unix_port_dir
diff --git a/sql/contributors.h b/sql/contributors.h
index f52d3243453..0359ec54022 100644
--- a/sql/contributors.h
+++ b/sql/contributors.h
@@ -46,6 +46,7 @@ struct show_table_contributors_st show_table_contributors[]= {
{"Auttomattic", "https://automattic.com", "Bronze Sponsor of the MariaDB Foundation"},
{"Verkkokauppa.com", "https://virtuozzo.com", "Bronze Sponsor of the MariaDB Foundation"},
{"Virtuozzo", "https://virtuozzo.com/", "Bronze Sponsor of the MariaDB Foundation"},
+ {"Tencent Game DBA", "http://tencentdba.com/about/", "Bronze Sponsor of the MariaDB Foundation"},
/* Sponsors of important features */
{"Google", "USA", "Sponsoring encryption, parallel replication and GTID"},
diff --git a/sql/field.cc b/sql/field.cc
index eb017802da3..49989a4a4ed 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -355,7 +355,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]=
//MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP
MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR,
//MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24
- MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONG,
+ MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG,
//MYSQL_TYPE_DATE MYSQL_TYPE_TIME
MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR,
//MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR
diff --git a/sql/item.cc b/sql/item.cc
index 6cdbccde729..21c8b3f701e 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -2743,9 +2743,28 @@ void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
if (context)
{
Name_resolution_context *ctx= new Name_resolution_context();
- ctx->outer_context= NULL; // We don't build a complete name resolver
- ctx->table_list= NULL; // We rely on first_name_resolution_table instead
+ if (context->select_lex == new_parent)
+ {
+ /*
+ This field was pushed in then pulled out
+ (for example left part of IN)
+ */
+ ctx->outer_context= context->outer_context;
+ }
+ else if (context->outer_context)
+ {
+ /* just pull to the upper context */
+ ctx->outer_context= context->outer_context->outer_context;
+ }
+ else
+ {
+ /* No upper context (merging Derived/VIEW where context chain ends) */
+ ctx->outer_context= NULL;
+ }
+ ctx->table_list= context->first_name_resolution_table;
ctx->select_lex= new_parent;
+ if (context->select_lex == NULL)
+ ctx->select_lex= NULL;
ctx->first_name_resolution_table= context->first_name_resolution_table;
ctx->last_name_resolution_table= context->last_name_resolution_table;
ctx->error_processor= context->error_processor;
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 5cdfa427997..e70922bb5d3 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -2620,8 +2620,8 @@ static bool check_equality_for_exist2in(Item_func *func,
args[0]->all_used_tables() == OUTER_REF_TABLE_BIT)
{
/* It is Item_field or Item_direct_view_ref) */
- DBUG_ASSERT(args[0]->type() == Item::FIELD_ITEM ||
- args[0]->type() == Item::REF_ITEM);
+ DBUG_ASSERT(args[1]->type() == Item::FIELD_ITEM ||
+ args[1]->type() == Item::REF_ITEM);
*local_field= (Item_ident *)args[1];
*outer_exp= args[0];
return TRUE;
diff --git a/sql/log.cc b/sql/log.cc
index 2479208b395..7fd185aa5d3 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -3144,7 +3144,7 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
if (! write_error)
{
write_error= 1;
- sql_print_error(ER(ER_ERROR_ON_WRITE), name, error);
+ sql_print_error(ER(ER_ERROR_ON_WRITE), name, tmp_errno);
}
}
}
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 9d33d822961..d43f14c176b 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -4084,6 +4084,7 @@ static int init_common_variables()
max_system_variables.pseudo_thread_id= (ulong)~0;
server_start_time= flush_status_time= my_time(0);
+ my_disable_copystat_in_redel= 1;
global_rpl_filter= new Rpl_filter;
binlog_filter= new Rpl_filter;
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index 3e17ced92ba..f52549bfd0b 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -1,5 +1,5 @@
-/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
- Copyright (c) 2010, 2014, SkySQL Ab.
+/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
+ Copyright (c) 2012, 2016, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index e0ca43e6d72..5d6891a1edf 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3345,9 +3345,16 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
{
Field *field= *field_ptr;
uint16 store_length;
+ uint16 max_key_part_length= (uint16) table->file->max_key_part_length();
key_part->key= keys;
key_part->part= 0;
- key_part->length= (uint16) field->key_length();
+ if (field->flags & BLOB_FLAG)
+ key_part->length= max_key_part_length;
+ else
+ {
+ key_part->length= (uint16) field->key_length();
+ set_if_smaller(key_part->length, max_key_part_length);
+ }
store_length= key_part->length;
if (field->real_maybe_null())
store_length+= HA_KEY_NULL_LENGTH;
diff --git a/sql/parse_file.h b/sql/parse_file.h
index 2a0266e98b7..83a8eabcf5f 100644
--- a/sql/parse_file.h
+++ b/sql/parse_file.h
@@ -42,9 +42,9 @@ enum file_opt_type {
struct File_option
{
- LEX_STRING name; /**< Name of the option */
- int offset; /**< offset to base address of value */
- file_opt_type type; /**< Option type */
+ LEX_STRING name; /**< Name of the option */
+ my_ptrdiff_t offset; /**< offset to base address of value */
+ file_opt_type type; /**< Option type */
};
diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc
index fd6f62fa100..c3f25848e8a 100644
--- a/sql/signal_handler.cc
+++ b/sql/signal_handler.cc
@@ -64,13 +64,13 @@ extern "C" sig_handler handle_fatal_signal(int sig)
struct tm tm;
#ifdef HAVE_STACKTRACE
THD *thd;
-#endif
/*
This flag remembers if the query pointer was found invalid.
We will try and print the query at the end of the signal handler, in case
we're wrong.
*/
bool print_invalid_query_pointer= false;
+#endif
if (segfaulted)
{
@@ -265,6 +265,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
"\"mlockall\" bugs.\n");
}
+#ifdef HAVE_STACKTRACE
if (print_invalid_query_pointer)
{
my_safe_printf_stderr(
@@ -274,6 +275,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length()));
my_safe_printf_stderr("\n\n");
}
+#endif
#ifdef HAVE_WRITE_CORE
if (test_flags & TEST_CORE_ON_SIGNAL)
diff --git a/sql/slave.cc b/sql/slave.cc
index db6e9cbf0aa..359f4f8af9c 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -3126,8 +3126,13 @@ static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings)
*suppress_warnings= TRUE;
}
else
- sql_print_error("Error reading packet from server: %s ( server_errno=%d)",
- mysql_error(mysql), mysql_errno(mysql));
+ {
+ if (!mi->rli.abort_slave)
+ {
+ sql_print_error("Error reading packet from server: %s (server_errno=%d)",
+ mysql_error(mysql), mysql_errno(mysql));
+ }
+ }
DBUG_RETURN(packet_error);
}
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index d32b213a838..e1c66bceedc 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -455,7 +455,19 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
}
thd->prepare_derived_at_open= FALSE;
- table->next_global= save_next_global;
+ /*
+ MERGE engine may adjust table->next_global chain, thus we have to
+ append save_next_global after merge children.
+ */
+ if (save_next_global)
+ {
+ TABLE_LIST *table_list_iterator= table;
+ while (table_list_iterator->next_global)
+ table_list_iterator= table_list_iterator->next_global;
+ table_list_iterator->next_global= save_next_global;
+ save_next_global->prev_global= &table_list_iterator->next_global;
+ }
+
table->next_local= save_next_local;
thd->open_options&= ~extra_open_options;
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 4ff69039917..efe9ac6f7f4 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -9279,6 +9279,7 @@ open_system_tables_for_read(THD *thd, TABLE_LIST *table_list,
*/
lex->reset_n_backup_query_tables_list(&query_tables_list_backup);
thd->reset_n_backup_open_tables_state(backup);
+ thd->lex->sql_command= SQLCOM_SELECT;
if (open_and_lock_tables(thd, table_list, FALSE,
MYSQL_OPEN_IGNORE_FLUSH |
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 10779aebbda..b210a4d32dc 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -5666,9 +5666,11 @@ int THD::decide_logging_format(TABLE_LIST *tables)
{
static const char *prelocked_mode_name[] = {
"NON_PRELOCKED",
+ "LOCK_TABLES",
"PRELOCKED",
"PRELOCKED_UNDER_LOCK_TABLES",
};
+ compile_time_assert(array_elements(prelocked_mode_name) == LTM_always_last);
DBUG_PRINT("debug", ("prelocked_mode: %s",
prelocked_mode_name[locked_tables_mode]));
}
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 08f014d7f48..56ae659797c 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1203,7 +1203,8 @@ enum enum_locked_tables_mode
LTM_NONE= 0,
LTM_LOCK_TABLES,
LTM_PRELOCKED,
- LTM_PRELOCKED_UNDER_LOCK_TABLES
+ LTM_PRELOCKED_UNDER_LOCK_TABLES,
+ LTM_always_last
};
@@ -4373,6 +4374,11 @@ public:
save_copy_field_end= copy_field_end= NULL;
}
}
+ void free_copy_field_data()
+ {
+ for (Copy_field *ptr= copy_field ; ptr != copy_field_end ; ptr++)
+ ptr->tmp.free();
+ }
};
class select_union :public select_result_interceptor
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index e89c3d9e745..0a3ff64113f 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -784,7 +784,7 @@ exit:
bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
{
ulong deleted_tables= 0;
- bool error= true;
+ bool error= true, rm_mysql_schema;
char path[FN_REFLEN + 16];
MY_DIR *dirp;
uint length;
@@ -809,6 +809,18 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
length= build_table_filename(path, sizeof(path) - 1, db, "", "", 0);
strmov(path+length, MY_DB_OPT_FILE); // Append db option file name
del_dbopt(path); // Remove dboption hash entry
+ /*
+ Now remove the db.opt file.
+ The 'find_db_tables_and_rm_known_files' doesn't remove this file
+ if there exists a table with the name 'db', so let's just do it
+ separately. We know this file exists and needs to be deleted anyway.
+ */
+ if (my_delete_with_symlink(path, MYF(0)) && my_errno != ENOENT)
+ {
+ my_error(EE_DELETE, MYF(0), path, my_errno);
+ DBUG_RETURN(true);
+ }
+
path[length]= '\0'; // Remove file name
/* See if the directory exists */
@@ -835,7 +847,8 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
Disable drop of enabled log tables, must be done before name locking.
This check is only needed if we are dropping the "mysql" database.
*/
- if ((my_strcasecmp(system_charset_info, MYSQL_SCHEMA_NAME.str, db) == 0))
+ if ((rm_mysql_schema=
+ (my_strcasecmp(system_charset_info, MYSQL_SCHEMA_NAME.str, db) == 0)))
{
for (table= tables; table; table= table->next_local)
if (check_if_log_table(table, TRUE, "DROP"))
@@ -848,7 +861,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
lock_db_routines(thd, dbnorm))
goto exit;
- if (!in_bootstrap)
+ if (!in_bootstrap && !rm_mysql_schema)
{
for (table= tables; table; table= table->next_local)
{
@@ -893,10 +906,13 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
ha_drop_database(path);
tmp_disable_binlog(thd);
query_cache_invalidate1(thd, dbnorm);
- (void) sp_drop_db_routines(thd, dbnorm); /* @todo Do not ignore errors */
+ if (!rm_mysql_schema)
+ {
+ (void) sp_drop_db_routines(thd, dbnorm); /* @todo Do not ignore errors */
#ifdef HAVE_EVENT_SCHEDULER
- Events::drop_schema_events(thd, dbnorm);
+ Events::drop_schema_events(thd, dbnorm);
#endif
+ }
reenable_binlog(thd);
/*
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index c1402b36737..4ed1b7a5323 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -3136,12 +3136,6 @@ case SQLCOM_PREPARE:
}
/*
- For CREATE TABLE we should not open the table even if it exists.
- If the table exists, we should either not create it or replace it
- */
- lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB;
-
- /*
If we are a slave, we should add OR REPLACE if we don't have
IF EXISTS. This will help a slave to recover from
CREATE TABLE OR EXISTS failures by dropping the table and
@@ -8887,12 +8881,6 @@ bool create_table_precheck(THD *thd, TABLE_LIST *tables,
if (check_fk_parent_table_access(thd, &lex->create_info, &lex->alter_info, create_table->db))
goto err;
- /*
- For CREATE TABLE we should not open the table even if it exists.
- If the table exists, we should either not create it or replace it
- */
- lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB;
-
error= FALSE;
err:
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 956e3c9e57f..14f1107be33 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -2810,6 +2810,22 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
}
+static size_t var_storage_size(int flags)
+{
+ switch (flags & PLUGIN_VAR_TYPEMASK) {
+ case PLUGIN_VAR_BOOL: return sizeof(my_bool);
+ case PLUGIN_VAR_INT: return sizeof(int);
+ case PLUGIN_VAR_LONG: return sizeof(long);
+ case PLUGIN_VAR_ENUM: return sizeof(long);
+ case PLUGIN_VAR_LONGLONG: return sizeof(ulonglong);
+ case PLUGIN_VAR_SET: return sizeof(ulonglong);
+ case PLUGIN_VAR_STR: return sizeof(char*);
+ case PLUGIN_VAR_DOUBLE: return sizeof(double);
+ default: DBUG_ASSERT(0); return 0;
+ }
+}
+
+
/*
returns a bookmark for thd-local variables, creating if neccessary.
returns null for non thd-local variables.
@@ -2818,39 +2834,13 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
static st_bookmark *register_var(const char *plugin, const char *name,
int flags)
{
- uint length= strlen(plugin) + strlen(name) + 3, size= 0, offset, new_size;
+ uint length= strlen(plugin) + strlen(name) + 3, size, offset, new_size;
st_bookmark *result;
char *varname, *p;
- if (!(flags & PLUGIN_VAR_THDLOCAL))
- return NULL;
-
- switch (flags & PLUGIN_VAR_TYPEMASK) {
- case PLUGIN_VAR_BOOL:
- size= sizeof(my_bool);
- break;
- case PLUGIN_VAR_INT:
- size= sizeof(int);
- break;
- case PLUGIN_VAR_LONG:
- case PLUGIN_VAR_ENUM:
- size= sizeof(long);
- break;
- case PLUGIN_VAR_LONGLONG:
- case PLUGIN_VAR_SET:
- size= sizeof(ulonglong);
- break;
- case PLUGIN_VAR_STR:
- size= sizeof(char*);
- break;
- case PLUGIN_VAR_DOUBLE:
- size= sizeof(double);
- break;
- default:
- DBUG_ASSERT(0);
- return NULL;
- };
+ DBUG_ASSERT(flags & PLUGIN_VAR_THDLOCAL);
+ size= var_storage_size(flags);
varname= ((char*) my_alloca(length));
strxmov(varname + 1, plugin, "_", name, NullS);
for (p= varname + 1; *p; p++)
@@ -3052,25 +3042,17 @@ void sync_dynamic_session_variables(THD* thd, bool global_lock)
*/
for (idx= 0; idx < bookmark_hash.records; idx++)
{
- sys_var_pluginvar *pi;
- sys_var *var;
st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx);
if (v->version <= thd->variables.dynamic_variables_version)
continue; /* already in thd->variables */
- if (!(var= intern_find_sys_var(v->key + 1, v->name_len)) ||
- !(pi= var->cast_pluginvar()) ||
- v->key[0] != plugin_var_bookmark_key(pi->plugin_var->flags))
- continue;
-
/* Here we do anything special that may be required of the data types */
- if ((pi->plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
- pi->plugin_var->flags & PLUGIN_VAR_MEMALLOC)
+ if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
+ v->key[0] & BOOKMARK_MEMALLOC)
{
- int offset= ((thdvar_str_t *)(pi->plugin_var))->offset;
- char **pp= (char**) (thd->variables.dynamic_variables_ptr + offset);
+ char **pp= (char**) (thd->variables.dynamic_variables_ptr + v->offset);
if (*pp)
*pp= my_strdup(*pp, MYF(MY_WME|MY_FAE));
}
@@ -3331,69 +3313,58 @@ bool sys_var_pluginvar::session_update(THD *thd, set_var *var)
return false;
}
-bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
+static const void *var_def_ptr(st_mysql_sys_var *pv)
{
- DBUG_ASSERT(!is_readonly());
- mysql_mutex_assert_owner(&LOCK_global_system_variables);
-
- void *tgt= real_value_ptr(thd, var->type);
- const void *src= &var->save_result;
-
- if (!var->value)
- {
- switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
+ switch (pv->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
case PLUGIN_VAR_INT:
- src= &((sysvar_uint_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_uint_t*) pv)->def_val;
case PLUGIN_VAR_LONG:
- src= &((sysvar_ulong_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_ulong_t*) pv)->def_val;
case PLUGIN_VAR_LONGLONG:
- src= &((sysvar_ulonglong_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_ulonglong_t*) pv)->def_val;
case PLUGIN_VAR_ENUM:
- src= &((sysvar_enum_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_enum_t*) pv)->def_val;
case PLUGIN_VAR_SET:
- src= &((sysvar_set_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_set_t*) pv)->def_val;
case PLUGIN_VAR_BOOL:
- src= &((sysvar_bool_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_bool_t*) pv)->def_val;
case PLUGIN_VAR_STR:
- src= &((sysvar_str_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_str_t*) pv)->def_val;
case PLUGIN_VAR_DOUBLE:
- src= &((sysvar_double_t*) plugin_var)->def_val;
- break;
+ return &((sysvar_double_t*) pv)->def_val;
case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_uint_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_uint_t*) pv)->def_val;
case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_ulong_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_ulong_t*) pv)->def_val;
case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_ulonglong_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_ulonglong_t*) pv)->def_val;
case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_enum_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_enum_t*) pv)->def_val;
case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_set_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_set_t*) pv)->def_val;
case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_bool_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_bool_t*) pv)->def_val;
case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_str_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_str_t*) pv)->def_val;
case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL:
- src= &((thdvar_double_t*) plugin_var)->def_val;
- break;
+ return &((thdvar_double_t*) pv)->def_val;
default:
DBUG_ASSERT(0);
+ return NULL;
}
- }
+}
+
+
+bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
+{
+ DBUG_ASSERT(!is_readonly());
+ mysql_mutex_assert_owner(&LOCK_global_system_variables);
+
+ void *tgt= real_value_ptr(thd, var->type);
+ const void *src= &var->save_result;
+
+ if (!var->value)
+ src= var_def_ptr(plugin_var);
plugin_var->update(thd, plugin_var, tgt, src);
return false;
@@ -3749,7 +3720,18 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
*(int*)(opt + 1)= offset= v->offset;
if (opt->flags & PLUGIN_VAR_NOCMDOPT)
+ {
+ char *val= global_system_variables.dynamic_variables_ptr + offset;
+ if (((opt->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) &&
+ (opt->flags & PLUGIN_VAR_MEMALLOC))
+ {
+ char *def_val= *(char**)var_def_ptr(opt);
+ *(char**)val= def_val ? my_strdup(def_val, MYF(0)) : NULL;
+ }
+ else
+ memcpy(val, var_def_ptr(opt), var_storage_size(opt->flags));
continue;
+ }
optname= (char*) memdup_root(mem_root, v->key + 1,
(optnamelen= v->name_len) + 1);
@@ -3957,10 +3939,11 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
*str->value= strdup_root(mem_root, *str->value);
}
+ var= find_bookmark(plugin_name.str, o->name, o->flags);
if (o->flags & PLUGIN_VAR_NOSYSVAR)
continue;
tmp_backup[tmp->nbackups++].save(&o->name);
- if ((var= find_bookmark(plugin_name.str, o->name, o->flags)))
+ if (var)
v= new (mem_root) sys_var_pluginvar(&chain, var->key + 1, o, tmp);
else
{
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 063fe221403..0cfb964307d 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -9004,9 +9004,26 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
We need to destruct the copy_field (allocated in create_tmp_table())
before setting it to 0 if the join is not "reusable".
*/
- if (!tmp_join || tmp_join != this)
- tmp_table_param.cleanup();
- tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
+ if (!tmp_join || tmp_join != this)
+ tmp_table_param.cleanup();
+ else
+ {
+ /*
+ Free data buffered in copy_fields, but keep data pointed by copy_field
+ around for next iteration (possibly stored in save_copy_fields).
+
+ It would be logically simpler to not clear copy_field
+ below, but as we have loops that runs over copy_field to
+ copy_field_end that should not be done anymore, it's simpler to
+ just clear the pointers.
+
+ Another option would be to just clear copy_field_end and not run
+ the loops if this is not set or to have tmp_table_param.cleanup()
+ to run cleanup on save_copy_field if copy_field is not set.
+ */
+ tmp_table_param.free_copy_field_data();
+ tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
+ }
first_record= sort_and_group=0;
send_records= (ha_rows) 0;
@@ -11687,7 +11704,7 @@ void JOIN::join_free()
/**
Free resources of given join.
- @param fill true if we should free all resources, call with full==1
+ @param full true if we should free all resources, call with full==1
should be last, before it this function can be called with
full==0
@@ -11806,7 +11823,7 @@ void JOIN::cleanup(bool full)
/*
If we have tmp_join and 'this' JOIN is not tmp_join and
tmp_table_param.copy_field's of them are equal then we have to remove
- pointer to tmp_table_param.copy_field from tmp_join, because it qill
+ pointer to tmp_table_param.copy_field from tmp_join, because it will
be removed in tmp_table_param.cleanup().
*/
if (tmp_join &&
@@ -15710,6 +15727,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
case Item::VARBIN_ITEM:
case Item::CACHE_ITEM:
case Item::EXPR_CACHE_ITEM:
+ case Item::PARAM_ITEM:
if (make_copy_field)
{
DBUG_ASSERT(((Item_result_field*)item)->result_field);
@@ -22240,7 +22258,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
err:
if (copy)
delete [] param->copy_field; // This is never 0
- param->copy_field=0;
+ param->copy_field= 0;
err2:
DBUG_RETURN(TRUE);
}
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 47a5a40ebeb..70080a6b4f1 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -1003,11 +1003,13 @@ public:
switch (i) {
case COLUMN_STAT_MIN_VALUE:
+ table_field->read_stats->min_value->set_notnull();
stat_field->val_str(&val);
table_field->read_stats->min_value->store(val.ptr(), val.length(),
&my_charset_bin);
break;
case COLUMN_STAT_MAX_VALUE:
+ table_field->read_stats->max_value->set_notnull();
stat_field->val_str(&val);
table_field->read_stats->max_value->store(val.ptr(), val.length(),
&my_charset_bin);
@@ -3659,17 +3661,8 @@ double get_column_range_cardinality(Field *field,
{
double avg_frequency= col_stats->get_avg_frequency();
res= avg_frequency;
- /*
- psergey-todo: what does check for min_value, max_value mean?
- min/max_value are set to NULL in alloc_statistics_for_table() and
- alloc_statistics_for_table_share(). Both functions will immediately
- call create_min_max_statistical_fields_for_table and
- create_min_max_statistical_fields_for_table_share() respectively,
- which will set min/max_value to be valid pointers, unless OOM
- occurs.
- */
if (avg_frequency > 1.0 + 0.000001 &&
- col_stats->min_value && col_stats->max_value)
+ col_stats->min_max_values_are_provided())
{
Histogram *hist= &col_stats->histogram;
if (hist->is_available())
@@ -3692,7 +3685,7 @@ double get_column_range_cardinality(Field *field,
}
else
{
- if (col_stats->min_value && col_stats->max_value)
+ if (col_stats->min_max_values_are_provided())
{
double sel, min_mp_pos, max_mp_pos;
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 46e5cef22d1..8e5f8107849 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -388,6 +388,11 @@ public:
avg_frequency= (ulong) (val * Scale_factor_avg_frequency);
}
+ bool min_max_values_are_provided()
+ {
+ return !is_null(COLUMN_STAT_MIN_VALUE) &&
+ !is_null(COLUMN_STAT_MIN_VALUE);
+ }
};
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 736827d3782..a2e51ca8e93 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -2464,7 +2464,8 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists,
if (table_type && table_type != view_pseudo_hton)
ha_lock_engine(thd, table_type);
- if (thd->locked_tables_mode)
+ if (thd->locked_tables_mode == LTM_LOCK_TABLES ||
+ thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES)
{
if (wait_while_table_is_used(thd, table->table, HA_EXTRA_NOT_USED))
{
@@ -6336,6 +6337,7 @@ static bool fill_alter_inplace_info(THD *thd,
(field->stored_in_db || field->vcol_info->is_in_partitioning_expr()))
{
if (is_equal == IS_EQUAL_NO ||
+ !new_field->vcol_info ||
!field->vcol_info->is_equal(new_field->vcol_info))
ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_COLUMN_VCOL;
else
diff --git a/sql/table_cache.cc b/sql/table_cache.cc
index 097f37d26d8..bdb7914c32b 100644
--- a/sql/table_cache.cc
+++ b/sql/table_cache.cc
@@ -876,6 +876,8 @@ void tdc_release_share(TABLE_SHARE *share)
}
if (--share->tdc.ref_count)
{
+ if (!share->is_view)
+ mysql_cond_broadcast(&share->tdc.COND_release);
mysql_mutex_unlock(&share->tdc.LOCK_table_share);
mysql_mutex_unlock(&LOCK_unused_shares);
DBUG_VOID_RETURN;
diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc
index ae8a81b1bcd..9d263038bc9 100644
--- a/sql/threadpool_common.cc
+++ b/sql/threadpool_common.cc
@@ -73,17 +73,16 @@ struct Worker_thread_context
void save()
{
-#ifdef HAVE_PSI_INTERFACE
- psi_thread= PSI_server?PSI_server->get_thread():0;
+#ifdef HAVE_PSI_THREAD_INTERFACE
+ psi_thread = PSI_THREAD_CALL(get_thread)();
#endif
mysys_var= (st_my_thread_var *)pthread_getspecific(THR_KEY_mysys);
}
void restore()
{
-#ifdef HAVE_PSI_INTERFACE
- if (PSI_server)
- PSI_server->set_thread(psi_thread);
+#ifdef HAVE_PSI_THREAD_INTERFACE
+ PSI_THREAD_CALL(set_thread)(psi_thread);
#endif
pthread_setspecific(THR_KEY_mysys,mysys_var);
pthread_setspecific(THR_THD, 0);
@@ -92,6 +91,41 @@ struct Worker_thread_context
};
+#ifdef HAVE_PSI_INTERFACE
+
+/*
+ The following fixes PSI "idle" psi instrumentation.
+ The server assumes that connection becomes idle
+ just before net_read_packet() and switches to active after it.
+ In out setup, server becomes idle when async socket io is made.
+*/
+
+extern void net_before_header_psi(struct st_net *net, void *user_data, size_t);
+
+static void dummy_before_header(struct st_net *, void *, size_t)
+{
+}
+
+static void re_init_net_server_extension(THD *thd)
+{
+ thd->m_net_server_extension.m_before_header = dummy_before_header;
+}
+
+#else
+
+#define re_init_net_server_extension(thd)
+
+#endif /* HAVE_PSI_INTERFACE */
+
+
+static inline void set_thd_idle(THD *thd)
+{
+ thd->net.reading_or_writing= 1;
+#ifdef HAVE_PSI_INTERFACE
+ net_before_header_psi(&thd->net, thd, 0);
+#endif
+}
+
/*
Attach/associate the connection with the OS thread,
*/
@@ -100,10 +134,10 @@ static bool thread_attach(THD* thd)
pthread_setspecific(THR_KEY_mysys,thd->mysys_var);
thd->thread_stack=(char*)&thd;
thd->store_globals();
-#ifdef HAVE_PSI_INTERFACE
- if (PSI_server)
- PSI_server->set_thread(thd->event_scheduler.m_psi);
+#ifdef HAVE_PSI_THREAD_INTERFACE
+ PSI_THREAD_CALL(set_thread)(thd->event_scheduler.m_psi);
#endif
+ mysql_socket_set_thread_owner(thd->net.vio->mysql_socket);
return 0;
}
@@ -130,39 +164,38 @@ int threadpool_add_connection(THD *thd)
}
/* Create new PSI thread for use with the THD. */
-#ifdef HAVE_PSI_INTERFACE
- if (PSI_server)
- {
- thd->event_scheduler.m_psi =
- PSI_server->new_thread(key_thread_one_connection, thd, thd->thread_id);
- }
+#ifdef HAVE_PSI_THREAD_INTERFACE
+ thd->event_scheduler.m_psi=
+ PSI_THREAD_CALL(new_thread)(key_thread_one_connection, thd, thd->thread_id);
#endif
/* Login. */
thread_attach(thd);
+ re_init_net_server_extension(thd);
ulonglong now= microsecond_interval_timer();
thd->prior_thr_create_utime= now;
thd->start_utime= now;
thd->thr_create_utime= now;
- if (!setup_connection_thread_globals(thd))
- {
- if (!thd_prepare_connection(thd))
- {
-
- /*
- Check if THD is ok, as prepare_new_connection_state()
- can fail, for example if init command failed.
- */
- if (thd_is_connection_alive(thd))
- {
- retval= 0;
- thd->net.reading_or_writing= 1;
- thd->skip_wait_timeout= true;
- }
- }
- }
+ if (setup_connection_thread_globals(thd))
+ goto end;
+
+ if (thd_prepare_connection(thd))
+ goto end;
+
+ /*
+ Check if THD is ok, as prepare_new_connection_state()
+ can fail, for example if init command failed.
+ */
+ if (!thd_is_connection_alive(thd))
+ goto end;
+
+ retval= 0;
+ thd->skip_wait_timeout= true;
+ set_thd_idle(thd);
+
+end:
worker_context.restore();
return retval;
}
@@ -244,12 +277,13 @@ int threadpool_process_request(THD *thd)
goto end;
}
+ set_thd_idle(thd);
+
vio= thd->net.vio;
if (!vio->has_data(vio))
{
/* More info on this debug sync is in sql_parse.cc*/
DEBUG_SYNC(thd, "before_do_command_net_read");
- thd->net.reading_or_writing= 1;
goto end;
}
}
diff --git a/storage/connect/JdbcInterface.java b/storage/connect/JdbcInterface.java
index f765052915d..34af8c4e013 100644
--- a/storage/connect/JdbcInterface.java
+++ b/storage/connect/JdbcInterface.java
@@ -340,6 +340,18 @@ public class JdbcInterface {
return m;
} // end of GetMaxValue
+ public String GetQuoteString() {
+ String qs = null;
+
+ try {
+ qs = dbmd.getIdentifierQuoteString();
+ } catch(SQLException se) {
+ SetErrmsg(se);
+ } // end try/catch
+
+ return qs;
+ } // end of GetQuoteString
+
public int GetColumns(String[] parms) {
int ncol = -1;
@@ -680,11 +692,11 @@ public class JdbcInterface {
return 0;
} // end of TimestampField
- public String ObjectField(int n, String name) {
+ public Object ObjectField(int n, String name) {
if (rs == null) {
System.out.println("No result set");
} else try {
- return (n > 0) ? rs.getObject(n).toString() : rs.getObject(name).toString();
+ return (n > 0) ? rs.getObject(n) : rs.getObject(name);
} catch (SQLException se) {
SetErrmsg(se);
} //end try/catch
diff --git a/storage/connect/filamdbf.cpp b/storage/connect/filamdbf.cpp
index 8afda723578..a4557facbd8 100644
--- a/storage/connect/filamdbf.cpp
+++ b/storage/connect/filamdbf.cpp
@@ -383,7 +383,7 @@ DBFBASE::DBFBASE(DBFBASE *txfp)
/* and header length. Set Records, check that Reclen is equal to lrecl and */
/* return the header length or 0 in case of error. */
/****************************************************************************/
-int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath)
+int DBFBASE::ScanHeader(PGLOBAL g, PSZ fn, int lrecl, int *rln, char *defpath)
{
int rc;
char filename[_MAX_PATH];
@@ -393,7 +393,7 @@ int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath)
/************************************************************************/
/* Open the input file. */
/************************************************************************/
- PlugSetPath(filename, fname, defpath);
+ PlugSetPath(filename, fn, defpath);
if (!(infile= global_fopen(g, MSGID_CANNOT_OPEN, filename, "rb")))
return 0; // Assume file does not exist
@@ -410,11 +410,7 @@ int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath)
} else if (rc == RC_FX)
return -1;
- if ((int)header.Reclen() != lrecl) {
- sprintf(g->Message, MSG(BAD_LRECL), lrecl, header.Reclen());
- return -1;
- } // endif Lrecl
-
+ *rln = (int)header.Reclen();
Records = (int)header.Records();
return (int)header.Headlen();
} // end of ScanHeader
@@ -431,9 +427,27 @@ int DBFFAM::Cardinality(PGLOBAL g)
if (!g)
return 1;
- if (!Headlen)
- if ((Headlen = ScanHeader(g, To_File, Lrecl, Tdbp->GetPath())) < 0)
- return -1; // Error in ScanHeader
+ if (!Headlen) {
+ int rln = 0; // Record length in the file header
+
+ Headlen = ScanHeader(g, To_File, Lrecl, &rln, Tdbp->GetPath());
+
+ if (Headlen < 0)
+ return -1; // Error in ScanHeader
+
+ if (rln && Lrecl != rln) {
+ // This happens always on some Linux platforms
+ sprintf(g->Message, MSG(BAD_LRECL), Lrecl, rln);
+
+ if (Accept) {
+ Lrecl = rln;
+ PushWarning(g, Tdbp);
+ } else
+ return -1;
+
+ } // endif rln
+
+ } // endif Headlen
// Set number of blocks for later use
Block = (Records > 0) ? (Records + Nrec - 1) / Nrec : 0;
@@ -565,7 +579,13 @@ bool DBFFAM::AllocateBuffer(PGLOBAL g)
if (Lrecl != reclen) {
sprintf(g->Message, MSG(BAD_LRECL), Lrecl, reclen);
- return true;
+
+ if (Accept) {
+ Lrecl = reclen;
+ PushWarning(g, Tdbp);
+ } else
+ return true;
+
} // endif Lrecl
hlen = HEADLEN * (n + 1) + 2;
@@ -641,8 +661,14 @@ bool DBFFAM::AllocateBuffer(PGLOBAL g)
if ((rc = dbfhead(g, Stream, Tdbp->GetFile(g), &header)) == RC_OK) {
if (Lrecl != (int)header.Reclen()) {
sprintf(g->Message, MSG(BAD_LRECL), Lrecl, header.Reclen());
- return true;
- } // endif Lrecl
+
+ if (Accept) {
+ Lrecl = header.Reclen();
+ PushWarning(g, Tdbp);
+ } else
+ return true;
+
+ } // endif Lrecl
Records = (int)header.Records();
Headlen = (int)header.Headlen();
@@ -916,9 +942,27 @@ int DBMFAM::Cardinality(PGLOBAL g)
if (!g)
return 1;
- if (!Headlen)
- if ((Headlen = ScanHeader(g, To_File, Lrecl, Tdbp->GetPath())) < 0)
- return -1; // Error in ScanHeader
+ if (!Headlen) {
+ int rln = 0; // Record length in the file header
+
+ Headlen = ScanHeader(g, To_File, Lrecl, &rln, Tdbp->GetPath());
+
+ if (Headlen < 0)
+ return -1; // Error in ScanHeader
+
+ if (rln && Lrecl != rln) {
+ // This happens always on some Linux platforms
+ sprintf(g->Message, MSG(BAD_LRECL), Lrecl, rln);
+
+ if (Accept) {
+ Lrecl = rln;
+ PushWarning(g, Tdbp);
+ } else
+ return -1;
+
+ } // endif rln
+
+ } // endif Headlen
// Set number of blocks for later use
Block = (Records > 0) ? (Records + Nrec - 1) / Nrec : 0;
@@ -961,8 +1005,14 @@ bool DBMFAM::AllocateBuffer(PGLOBAL g)
if (Lrecl != (int)hp->Reclen()) {
sprintf(g->Message, MSG(BAD_LRECL), Lrecl, hp->Reclen());
- return true;
- } // endif Lrecl
+
+ if (Accept) {
+ Lrecl = hp->Reclen();
+ PushWarning(g, Tdbp);
+ } else
+ return true;
+
+ } // endif Lrecl
Records = (int)hp->Records();
Headlen = (int)hp->Headlen();
diff --git a/storage/connect/filamdbf.h b/storage/connect/filamdbf.h
index da84d7685a8..66458a10eaa 100644
--- a/storage/connect/filamdbf.h
+++ b/storage/connect/filamdbf.h
@@ -31,7 +31,7 @@ class DllExport DBFBASE {
DBFBASE(PDBF txfp);
// Implementation
- int ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath);
+ int ScanHeader(PGLOBAL g, PSZ fname, int lrecl, int *rlen, char *defpath);
protected:
// Default constructor, not to be used
diff --git a/storage/connect/ha_connect.cc b/storage/connect/ha_connect.cc
index ea6fb1b08c1..cf945a73f46 100644
--- a/storage/connect/ha_connect.cc
+++ b/storage/connect/ha_connect.cc
@@ -224,6 +224,7 @@ uint GetWorkSize(void);
void SetWorkSize(uint);
extern "C" const char *msglang(void);
+static void PopUser(PCONNECT xp);
static PCONNECT GetUser(THD *thd, PCONNECT xp);
static PGLOBAL GetPlug(THD *thd, PCONNECT& lxp);
@@ -831,34 +832,43 @@ ha_connect::~ha_connect(void)
table ? table->s->table_name.str : "<null>",
xp, xp ? xp->count : 0);
- if (xp) {
- PCONNECT p;
+ PopUser(xp);
+} // end of ha_connect destructor
- xp->count--;
- for (p= user_connect::to_users; p; p= p->next)
- if (p == xp)
- break;
+/****************************************************************************/
+/* Check whether this user can be removed. */
+/****************************************************************************/
+static void PopUser(PCONNECT xp)
+{
+ if (xp) {
+ xp->count--;
- if (p && !p->count) {
- if (p->next)
- p->next->previous= p->previous;
+ if (!xp->count) {
+ PCONNECT p;
- if (p->previous)
- p->previous->next= p->next;
- else
- user_connect::to_users= p->next;
+ for (p= user_connect::to_users; p; p= p->next)
+ if (p == xp)
+ break;
- } // endif p
+ if (p) {
+ if (p->next)
+ p->next->previous= p->previous;
- if (!xp->count) {
- PlugCleanup(xp->g, true);
- delete xp;
- } // endif count
+ if (p->previous)
+ p->previous->next= p->next;
+ else
+ user_connect::to_users= p->next;
- } // endif xp
+ } // endif p
-} // end of ha_connect destructor
+ PlugCleanup(xp->g, true);
+ delete xp;
+ } // endif count
+
+ } // endif xp
+
+} // end of PopUser
/****************************************************************************/
@@ -866,7 +876,7 @@ ha_connect::~ha_connect(void)
/****************************************************************************/
static PCONNECT GetUser(THD *thd, PCONNECT xp)
{
- if (!thd)
+ if (!thd)
return NULL;
if (xp && thd == xp->thdp)
@@ -890,7 +900,6 @@ static PCONNECT GetUser(THD *thd, PCONNECT xp)
return xp;
} // end of GetUser
-
/****************************************************************************/
/* Get the global pointer of the user of this handler. */
/****************************************************************************/
@@ -5261,7 +5270,18 @@ static int connect_assisted_discovery(handlerton *, THD* thd,
if (!(shm= (char*)db))
db= table_s->db.str; // Default value
- // Check table type
+ // Save stack and allocation environment and prepare error return
+ if (g->jump_level == MAX_JUMP) {
+ strcpy(g->Message, MSG(TOO_MANY_JUMPS));
+ goto jer;
+ } // endif jump_level
+
+ if ((rc= setjmp(g->jumper[++g->jump_level])) != 0) {
+ my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0));
+ goto err;
+ } // endif rc
+
+ // Check table type
if (ttp == TAB_UNDEF) {
topt->type= (src) ? "MYSQL" : (tab) ? "PROXY" : "DOS";
ttp= GetTypeID(topt->type);
@@ -5270,20 +5290,9 @@ static int connect_assisted_discovery(handlerton *, THD* thd,
} else if (ttp == TAB_NIY) {
sprintf(g->Message, "Unsupported table type %s", topt->type);
my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0));
- return HA_ERR_INTERNAL_ERROR;
+ goto err;
} // endif ttp
- // Save stack and allocation environment and prepare error return
- if (g->jump_level == MAX_JUMP) {
- strcpy(g->Message, MSG(TOO_MANY_JUMPS));
- return HA_ERR_INTERNAL_ERROR;
- } // endif jump_level
-
- if ((rc= setjmp(g->jumper[++g->jump_level])) != 0) {
- my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0));
- goto err;
- } // endif rc
-
if (!tab) {
if (ttp == TAB_TBL) {
// Make tab the first table of the list
@@ -5843,6 +5852,7 @@ static int connect_assisted_discovery(handlerton *, THD* thd,
rc= init_table_share(thd, table_s, create_info, &sql);
g->jump_level--;
+ PopUser(xp);
return rc;
} // endif ok
@@ -5850,7 +5860,9 @@ static int connect_assisted_discovery(handlerton *, THD* thd,
err:
g->jump_level--;
- return HA_ERR_INTERNAL_ERROR;
+ jer:
+ PopUser(xp);
+ return HA_ERR_INTERNAL_ERROR;
} // end of connect_assisted_discovery
/**
diff --git a/storage/connect/jdbconn.cpp b/storage/connect/jdbconn.cpp
index 3b8de3e975b..dca9bd0eac4 100644
--- a/storage/connect/jdbconn.cpp
+++ b/storage/connect/jdbconn.cpp
@@ -498,145 +498,6 @@ PQRYRES JDBCDrivers(PGLOBAL g, int maxres, bool info)
return qrp;
} // end of JDBCDrivers
-#if 0
-/*************************************************************************/
-/* JDBCDataSources: constructs the result blocks containing all JDBC */
-/* data sources available on the local host. */
-/* Called with info=true to have result column names. */
-/*************************************************************************/
-PQRYRES JDBCDataSources(PGLOBAL g, int maxres, bool info)
-{
- int buftyp[] ={ TYPE_STRING, TYPE_STRING };
- XFLD fldtyp[] ={ FLD_NAME, FLD_REM };
- unsigned int length[] ={ 0, 256 };
- bool b[] ={ false, true };
- int i, n = 0, ncol = 2;
- PCOLRES crp;
- PQRYRES qrp;
- JDBConn *jcp = NULL;
-
- /************************************************************************/
- /* Do an evaluation of the result size. */
- /************************************************************************/
- if (!info) {
- jcp = new(g)JDBConn(g, NULL);
- n = jcp->GetMaxValue(SQL_MAX_DSN_LENGTH);
- length[0] = (n) ? (n + 1) : 256;
-
- if (!maxres)
- maxres = 512; // Estimated max number of data sources
-
- } else {
- length[0] = 256;
- maxres = 0;
- } // endif info
-
- if (trace)
- htrc("JDBCDataSources: max=%d len=%d\n", maxres, length[0]);
-
- /************************************************************************/
- /* Allocate the structures used to refer to the result set. */
- /************************************************************************/
- qrp = PlgAllocResult(g, ncol, maxres, IDS_DSRC,
- buftyp, fldtyp, length, false, true);
-
- for (i = 0, crp = qrp->Colresp; crp; i++, crp = crp->Next)
- if (b[i])
- crp->Kdata->SetNullable(true);
-
- /************************************************************************/
- /* Now get the results into blocks. */
- /************************************************************************/
- if (!info && qrp && jcp->GetDataSources(qrp))
- qrp = NULL;
-
- /************************************************************************/
- /* Return the result pointer for use by GetData routines. */
- /************************************************************************/
- return qrp;
-} // end of JDBCDataSources
-
-/**************************************************************************/
-/* PrimaryKeys: constructs the result blocks containing all the */
-/* JDBC catalog information concerning primary keys. */
-/**************************************************************************/
-PQRYRES JDBCPrimaryKeys(PGLOBAL g, JDBConn *op, char *dsn, char *table)
-{
- static int buftyp[] ={ TYPE_STRING, TYPE_STRING, TYPE_STRING,
- TYPE_STRING, TYPE_SHORT, TYPE_STRING };
- static unsigned int length[] ={ 0, 0, 0, 0, 6, 128 };
- int n, ncol = 5;
- int maxres;
- PQRYRES qrp;
- JCATPARM *cap;
- JDBConn *jcp = op;
-
- if (!op) {
- /**********************************************************************/
- /* Open the connection with the JDBC data source. */
- /**********************************************************************/
- jcp = new(g)JDBConn(g, NULL);
-
- if (jcp->Open(dsn, 2) < 1) // 2 is openReadOnly
- return NULL;
-
- } // endif op
-
- /************************************************************************/
- /* Do an evaluation of the result size. */
- /************************************************************************/
- n = jcp->GetMaxValue(SQL_MAX_COLUMNS_IN_TABLE);
- maxres = (n) ? (int)n : 250;
- n = jcp->GetMaxValue(SQL_MAX_CATALOG_NAME_LEN);
- length[0] = (n) ? (n + 1) : 128;
- n = jcp->GetMaxValue(SQL_MAX_SCHEMA_NAME_LEN);
- length[1] = (n) ? (n + 1) : 128;
- n = jcp->GetMaxValue(SQL_MAX_TABLE_NAME_LEN);
- length[2] = (n) ? (n + 1) : 128;
- n = jcp->GetMaxValue(SQL_MAX_COLUMN_NAME_LEN);
- length[3] = (n) ? (n + 1) : 128;
-
- if (trace)
- htrc("JDBCPrimaryKeys: max=%d len=%d,%d,%d\n",
- maxres, length[0], length[1], length[2]);
-
- /************************************************************************/
- /* Allocate the structure used to refer to the result set. */
- /************************************************************************/
- qrp = PlgAllocResult(g, ncol, maxres, IDS_PKEY,
- buftyp, NULL, length, false, true);
-
- if (trace)
- htrc("Getting pkey results ncol=%d\n", qrp->Nbcol);
-
- cap = AllocCatInfo(g, CAT_KEY, NULL, table, qrp);
-
- /************************************************************************/
- /* Now get the results into blocks. */
- /************************************************************************/
- if ((n = jcp->GetCatInfo(cap)) >= 0) {
- qrp->Nblin = n;
- // ResetNullValues(cap);
-
- if (trace)
- htrc("PrimaryKeys: NBCOL=%d NBLIN=%d\n", qrp->Nbcol, qrp->Nblin);
-
- } else
- qrp = NULL;
-
- /************************************************************************/
- /* Close any local connection. */
- /************************************************************************/
- if (!op)
- jcp->Close();
-
- /************************************************************************/
- /* Return the result pointer for use by GetData routines. */
- /************************************************************************/
- return qrp;
-} // end of JDBCPrimaryKeys
-#endif // 0
-
/***********************************************************************/
/* JDBConn construction/destruction. */
/***********************************************************************/
@@ -651,7 +512,7 @@ JDBConn::JDBConn(PGLOBAL g, TDBJDBC *tdbp)
xqid = xuid = xid = grs = readid = fetchid = typid = errid = nullptr;
prepid = xpid = pcid = nullptr;
chrfldid = intfldid = dblfldid = fltfldid = bigfldid = nullptr;
- datfldid = timfldid = tspfldid = nullptr;
+ objfldid = datfldid = timfldid = tspfldid = nullptr;
//m_LoginTimeout = DEFAULT_LOGIN_TIMEOUT;
//m_QueryTimeout = DEFAULT_QUERY_TIMEOUT;
//m_UpdateOptions = 0;
@@ -739,60 +600,6 @@ bool JDBConn::gmID(PGLOBAL g, jmethodID& mid, const char *name, const char *sig
} // end of gmID
-#if 0
-/***********************************************************************/
-/* Utility routine. */
-/***********************************************************************/
-PSZ JDBConn::GetStringInfo(ushort infotype)
-{
- //ASSERT(m_hdbc != SQL_NULL_HDBC);
- char *p, buffer[MAX_STRING_INFO];
- SWORD result;
- RETCODE rc;
-
- rc = SQLGetInfo(m_hdbc, infotype, buffer, sizeof(buffer), &result);
-
- if (!Check(rc)) {
- ThrowDJX(rc, "SQLGetInfo"); // Temporary
- // *buffer = '\0';
- } // endif rc
-
- p = PlugDup(m_G, buffer);
- return p;
-} // end of GetStringInfo
-
-/***********************************************************************/
-/* Utility routines. */
-/***********************************************************************/
-void JDBConn::OnSetOptions(HSTMT hstmt)
-{
- RETCODE rc;
- ASSERT(m_hdbc != SQL_NULL_HDBC);
-
- if ((signed)m_QueryTimeout != -1) {
- // Attempt to set query timeout. Ignore failure
- rc = SQLSetStmtOption(hstmt, SQL_QUERY_TIMEOUT, m_QueryTimeout);
-
- if (!Check(rc))
- // don't attempt it again
- m_QueryTimeout = (DWORD)-1;
-
- } // endif m_QueryTimeout
-
- if (m_RowsetSize > 0) {
- // Attempt to set rowset size.
- // In case of failure reset it to 0 to use Fetch.
- rc = SQLSetStmtOption(hstmt, SQL_ROWSET_SIZE, m_RowsetSize);
-
- if (!Check(rc))
- // don't attempt it again
- m_RowsetSize = 0;
-
- } // endif m_RowsetSize
-
-} // end of OnSetOptions
-#endif // 0
-
/***********************************************************************/
/* Utility routine. */
/***********************************************************************/
@@ -1007,7 +814,7 @@ int JDBConn::Open(PJPARM sop)
#define N 1
#endif
- // Java source will be compiled as ajar file installed in the plugin dir
+ // Java source will be compiled as a jar file installed in the plugin dir
jpop->Append(sep);
jpop->Append(GetPluginDir());
jpop->Append("JdbcInterface.jar");
@@ -1204,6 +1011,21 @@ int JDBConn::Open(PJPARM sop)
return RC_FX;
} // endif Msg
+ jmethodID qcid = nullptr;
+
+ if (!gmID(g, qcid, "GetQuoteString", "()Ljava/lang/String;")) {
+ jstring s = (jstring)env->CallObjectMethod(job, qcid);
+
+ if (s != nullptr) {
+ char *qch = (char*)env->GetStringUTFChars(s, (jboolean)false);
+ m_IDQuoteChar[0] = *qch;
+ } else {
+ s = (jstring)env->CallObjectMethod(job, errid);
+ Msg = (char*)env->GetStringUTFChars(s, (jboolean)false);
+ } // endif s
+
+ } // endif qcid
+
if (gmID(g, typid, "ColumnType", "(ILjava/lang/String;)I"))
return RC_FX;
else
@@ -1345,9 +1167,10 @@ void JDBConn::Close()
/***********************************************************************/
void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val)
{
- PGLOBAL& g = m_G;
- jint ctyp;
- jstring cn, jn = nullptr;
+ PGLOBAL& g = m_G;
+ jint ctyp;
+ jstring cn, jn = nullptr;
+ jobject jb = nullptr;
if (rank == 0)
if (!name || (jn = env->NewStringUTF(name)) == nullptr) {
@@ -1363,21 +1186,32 @@ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val)
longjmp(g->jumper[g->jump_level], TYPE_AM_JDBC);
} // endif Check
+ if (val->GetNullable())
+ if (!gmID(g, objfldid, "ObjectField", "(ILjava/lang/String;)Ljava/lang/Object;")) {
+ jb = env->CallObjectMethod(job, objfldid, (jint)rank, jn);
+
+ if (jb == nullptr) {
+ val->Reset();
+ val->SetNull(true);
+ goto chk;
+ } // endif job
+
+ } // endif objfldid
+
switch (ctyp) {
case 12: // VARCHAR
case -1: // LONGVARCHAR
case 1: // CHAR
- if (!gmID(g, chrfldid, "StringField", "(ILjava/lang/String;)Ljava/lang/String;")) {
+ if (jb)
+ cn = (jstring)jb;
+ else if (!gmID(g, chrfldid, "StringField", "(ILjava/lang/String;)Ljava/lang/String;"))
cn = (jstring)env->CallObjectMethod(job, chrfldid, (jint)rank, jn);
+ else
+ cn = nullptr;
- if (cn) {
- const char *field = env->GetStringUTFChars(cn, (jboolean)false);
- val->SetValue_psz((PSZ)field);
- } else {
- val->Reset();
- val->SetNull(true);
- } // endif cn
-
+ if (cn) {
+ const char *field = env->GetStringUTFChars(cn, (jboolean)false);
+ val->SetValue_psz((PSZ)field);
} else
val->Reset();
@@ -1449,6 +1283,7 @@ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val)
val->Reset();
} // endswitch Type
+ chk:
if (Check()) {
if (rank == 0)
env->DeleteLocalRef(jn);
diff --git a/storage/connect/jdbconn.h b/storage/connect/jdbconn.h
index 095b1565bd2..0a1c52d4576 100644
--- a/storage/connect/jdbconn.h
+++ b/storage/connect/jdbconn.h
@@ -165,6 +165,7 @@ protected:
jmethodID xpid; // The ExecutePrep method ID
jmethodID pcid; // The ClosePrepStmt method ID
jmethodID errid; // The GetErrmsg method ID
+ jmethodID objfldid; // The ObjectField method ID
jmethodID chrfldid; // The StringField method ID
jmethodID intfldid; // The IntField method ID
jmethodID dblfldid; // The DoubleField method ID
diff --git a/storage/connect/json.cpp b/storage/connect/json.cpp
index 3558c5762bb..c45630129f1 100644
--- a/storage/connect/json.cpp
+++ b/storage/connect/json.cpp
@@ -595,7 +595,7 @@ PSZ Serialize(PGLOBAL g, PJSON jsp, char *fn, int pretty)
fputs(EL, fs);
fclose(fs);
str = (err) ? NULL : strcpy(g->Message, "Ok");
- } else if (!err) {
+ } else if (!err) {
str = ((JOUTSTR*)jp)->Strp;
jp->WriteChr('\0');
PlugSubAlloc(g, NULL, ((JOUTSTR*)jp)->N);
diff --git a/storage/connect/reldef.cpp b/storage/connect/reldef.cpp
index 2c8ada52e6f..ac2327212e0 100644
--- a/storage/connect/reldef.cpp
+++ b/storage/connect/reldef.cpp
@@ -294,7 +294,7 @@ int TABDEF::GetColCatInfo(PGLOBAL g)
nlg+= nof;
case TAB_DIR:
case TAB_XML:
- poff= loff + 1;
+ poff= loff + (pcf->Flags & U_VIRTUAL ? 0 : 1);
break;
case TAB_INI:
case TAB_MAC:
@@ -440,7 +440,11 @@ int TABDEF::GetColCatInfo(PGLOBAL g)
} // endswitch tc
// lrecl must be at least recln to avoid buffer overflow
- recln= MY_MAX(recln, Hc->GetIntegerOption("Lrecl"));
+ if (trace)
+ htrc("Lrecl: Calculated=%d defined=%d\n",
+ recln, Hc->GetIntegerOption("Lrecl"));
+
+ recln = MY_MAX(recln, Hc->GetIntegerOption("Lrecl"));
Hc->SetIntegerOption("Lrecl", recln);
((PDOSDEF)this)->SetLrecl(recln);
} // endif Lrecl
diff --git a/storage/connect/tabjdbc.cpp b/storage/connect/tabjdbc.cpp
index 86fd831b262..e398523892f 100644
--- a/storage/connect/tabjdbc.cpp
+++ b/storage/connect/tabjdbc.cpp
@@ -686,6 +686,9 @@ bool TDBJDBC::MakeInsert(PGLOBAL g)
else
Prepared = true;
+ if (trace)
+ htrc("Insert=%s\n", Query->GetStr());
+
return false;
} // end of MakeInsert
@@ -733,17 +736,18 @@ bool TDBJDBC::MakeCommand(PGLOBAL g)
// If so, it must be quoted in the original query
strlwr(strcat(strcat(strcpy(name, " "), Name), " "));
- if (!strstr(" update delete low_priority ignore quick from ", name))
- strlwr(strcpy(name, Name)); // Not a keyword
- else
+ if (strstr(" update delete low_priority ignore quick from ", name)) {
strlwr(strcat(strcat(strcpy(name, qc), Name), qc));
+ k += 2;
+ } else
+ strlwr(strcpy(name, Name)); // Not a keyword
if ((p = strstr(qrystr, name))) {
for (i = 0; i < p - qrystr; i++)
stmt[i] = (Qrystr[i] == '`') ? *qc : Qrystr[i];
stmt[i] = 0;
- k = i + (int)strlen(Name);
+ k += i + (int)strlen(Name);
if (qtd && *(p-1) == ' ')
strcat(strcat(strcat(stmt, qc), TableName), qc);
@@ -765,6 +769,9 @@ bool TDBJDBC::MakeCommand(PGLOBAL g)
return NULL;
} // endif p
+ if (trace)
+ htrc("Command=%s\n", stmt);
+
Query = new(g)STRING(g, 0, stmt);
return (!Query->GetSize());
} // end of MakeCommand
@@ -1214,6 +1221,10 @@ int TDBJDBC::WriteDB(PGLOBAL g)
} // endif oom
Query->RepLast(')');
+
+ if (trace > 1)
+ htrc("Inserting: %s\n", Query->GetStr());
+
rc = Jcp->ExecuteUpdate(Query->GetStr());
Query->Truncate(len); // Restore query
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index b073398f8ec..a4aa43651f8 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -673,7 +673,10 @@ void
dict_stats_copy(
/*============*/
dict_table_t* dst, /*!< in/out: destination table */
- const dict_table_t* src) /*!< in: source table */
+ const dict_table_t* src, /*!< in: source table */
+ bool reset_ignored_indexes) /*!< in: if true, set ignored indexes
+ to have the same statistics as if
+ the table was empty */
{
dst->stats_last_recalc = src->stats_last_recalc;
dst->stat_n_rows = src->stat_n_rows;
@@ -692,7 +695,16 @@ dict_stats_copy(
&& (src_idx = dict_table_get_next_index(src_idx)))) {
if (dict_stats_should_ignore_index(dst_idx)) {
- continue;
+ if (reset_ignored_indexes) {
+ /* Reset index statistics for all ignored indexes,
+ unless they are FT indexes (these have no statistics)*/
+ if (dst_idx->type & DICT_FTS) {
+ continue;
+ }
+ dict_stats_empty_index(dst_idx);
+ } else {
+ continue;
+ }
}
ut_ad(!dict_index_is_univ(dst_idx));
@@ -782,7 +794,7 @@ dict_stats_snapshot_create(
t = dict_stats_table_clone_create(table);
- dict_stats_copy(t, table);
+ dict_stats_copy(t, table, false);
t->stat_persistent = table->stat_persistent;
t->stats_auto_recalc = table->stats_auto_recalc;
@@ -3240,13 +3252,10 @@ dict_stats_update(
dict_table_stats_lock(table, RW_X_LATCH);
- /* Initialize all stats to dummy values before
- copying because dict_stats_table_clone_create() does
- skip corrupted indexes so our dummy object 't' may
- have less indexes than the real object 'table'. */
- dict_stats_empty_table(table);
-
- dict_stats_copy(table, t);
+ /* Pass reset_ignored_indexes=true as parameter
+ to dict_stats_copy. This will cause statictics
+ for corrupted indexes to be set to empty values */
+ dict_stats_copy(table, t, true);
dict_stats_assert_initialized(table);
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 99a25fead84..6059c28eabc 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key;
/** variable to record innodb_fts_internal_tbl_name for information
schema table INNODB_FTS_INSERTED etc. */
UNIV_INTERN char* fts_internal_tbl_name = NULL;
+UNIV_INTERN char* fts_internal_tbl_name2 = NULL;
/** InnoDB default stopword list:
There are different versions of stopwords, the stop words listed
@@ -6570,6 +6571,36 @@ fts_check_corrupt_index(
return(0);
}
+/* Get parent table name if it's a fts aux table
+@param[in] aux_table_name aux table name
+@param[in] aux_table_len aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+ const char* aux_table_name,
+ ulint aux_table_len)
+{
+ fts_aux_table_t aux_table;
+ char* parent_table_name = NULL;
+
+ if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
+ dict_table_t* parent_table;
+
+ parent_table = dict_table_open_on_id(
+ aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
+
+ if (parent_table != NULL) {
+ parent_table_name = mem_strdupl(
+ parent_table->name,
+ strlen(parent_table->name));
+
+ dict_table_close(parent_table, TRUE, FALSE);
+ }
+ }
+
+ return(parent_table_name);
+}
+
/** Check the validity of the parent table.
@param[in] aux_table auxiliary table
@return true if it is a valid table or false if it is not */
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 3b438d140c7..21bb795f5d9 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16217,7 +16217,12 @@ innodb_internal_table_update(
my_free(old);
}
- fts_internal_tbl_name = *(char**) var_ptr;
+ fts_internal_tbl_name2 = *(char**) var_ptr;
+ if (fts_internal_tbl_name2 == NULL) {
+ fts_internal_tbl_name = const_cast<char*>("default");
+ } else {
+ fts_internal_tbl_name = fts_internal_tbl_name2;
+ }
}
/****************************************************************//**
@@ -18319,7 +18324,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
"Whether to disable OS system file cache for sort I/O",
NULL, NULL, FALSE);
-static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
+static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2,
PLUGIN_VAR_NOCMDARG,
"FTS internal auxiliary table to be checked",
innodb_internal_table_validate,
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 13bc7574542..e164d7e279c 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -213,7 +213,10 @@ innobase_need_rebuild(
const Alter_inplace_info* ha_alter_info,
const TABLE* altered_table)
{
- if (ha_alter_info->handler_flags
+ Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
+ ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE);
+
+ if (alter_inplace_flags
== Alter_inplace_info::CHANGE_CREATE_OPTION
&& !(ha_alter_info->create_info->used_fields
& (HA_CREATE_USED_ROW_FORMAT
@@ -3937,7 +3940,7 @@ err_exit:
}
if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
- || (ha_alter_info->handler_flags
+ || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
== Alter_inplace_info::CHANGE_CREATE_OPTION
&& !innobase_need_rebuild(ha_alter_info, table))) {
@@ -4111,7 +4114,7 @@ ok_exit:
DBUG_RETURN(false);
}
- if (ha_alter_info->handler_flags
+ if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
== Alter_inplace_info::CHANGE_CREATE_OPTION
&& !innobase_need_rebuild(ha_alter_info, table)) {
goto ok_exit;
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index c5310e1369f..50340e05860 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -3981,6 +3981,8 @@ i_s_fts_config_fill(
DBUG_RETURN(0);
}
+ DEBUG_SYNC_C("i_s_fts_config_fille_check");
+
fields = table->field;
/* Prevent DDL to drop fts aux tables. */
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 87b5787d416..3e2f359bbeb 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -375,6 +375,7 @@ extern bool fts_need_sync;
/** Variable specifying the table that has Fulltext index to display its
content through information schema table */
extern char* fts_internal_tbl_name;
+extern char* fts_internal_tbl_name2;
#define fts_que_graph_free(graph) \
do { \
@@ -823,6 +824,15 @@ void
fts_drop_orphaned_tables(void);
/*==========================*/
+/* Get parent table name if it's a fts aux table
+@param[in] aux_table_name aux table name
+@param[in] aux_table_len aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+ const char* aux_table_name,
+ ulint aux_table_len);
+
/******************************************************************//**
Since we do a horizontal split on the index table, we need to drop
all the split tables.
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 7c98e2877d5..e6ddf45faba 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 5
#define INNODB_VERSION_MINOR 6
-#define INNODB_VERSION_BUGFIX 32
+#define INNODB_VERSION_BUGFIX 33
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 59382455cea..77cca37ddd1 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -613,7 +613,7 @@ row_log_table_delete(
&old_pk_extra_size);
ut_ad(old_pk_extra_size < 0x100);
- mrec_size = 4 + old_pk_size;
+ mrec_size = 6 + old_pk_size;
/* Log enough prefix of the BLOB unless both the
old and new table are in COMPACT or REDUNDANT format,
@@ -643,8 +643,8 @@ row_log_table_delete(
*b++ = static_cast<byte>(old_pk_extra_size);
/* Log the size of external prefix we saved */
- mach_write_to_2(b, ext_size);
- b += 2;
+ mach_write_to_4(b, ext_size);
+ b += 4;
rec_convert_dtuple_to_temp(
b + old_pk_extra_size, new_index,
@@ -2268,14 +2268,14 @@ row_log_table_apply_op(
break;
case ROW_T_DELETE:
- /* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
- if (mrec + 4 >= mrec_end) {
+ /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
+ if (mrec + 6 >= mrec_end) {
return(NULL);
}
extra_size = *mrec++;
- ext_size = mach_read_from_2(mrec);
- mrec += 2;
+ ext_size = mach_read_from_4(mrec);
+ mrec += 4;
ut_ad(mrec < mrec_end);
/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 122c03b7cc9..07b841de300 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -2715,6 +2715,10 @@ loop:
return(n_tables + n_tables_dropped);
}
+ DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
+ os_thread_sleep(5000000);
+ );
+
table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
DICT_ERR_IGNORE_NONE);
@@ -2725,6 +2729,16 @@ loop:
goto already_dropped;
}
+ if (!table->to_be_dropped) {
+ /* There is a scenario: the old table is dropped
+ just after it's added into drop list, and new
+ table with the same name is created, then we try
+ to drop the new table in background. */
+ dict_table_close(table, FALSE, FALSE);
+
+ goto already_dropped;
+ }
+
ut_a(!table->can_be_evicted);
dict_table_close(table, FALSE, FALSE);
@@ -3992,6 +4006,13 @@ row_drop_table_for_mysql(
}
}
+
+ DBUG_EXECUTE_IF("row_drop_table_add_to_background",
+ row_add_table_to_background_drop_list(table->name);
+ err = DB_SUCCESS;
+ goto funct_exit;
+ );
+
/* TODO: could we replace the counter n_foreign_key_checks_running
with lock checks on the table? Acquire here an exclusive lock on the
table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
@@ -4608,6 +4629,19 @@ loop:
row_mysql_lock_data_dictionary(trx);
while ((table_name = dict_get_first_table_name_in_db(name))) {
+ /* Drop parent table if it is a fts aux table, to
+ avoid accessing dropped fts aux tables in information
+ scheam when parent table still exists.
+ Note: Drop parent table will drop fts aux tables. */
+ char* parent_table_name;
+ parent_table_name = fts_get_parent_table_name(
+ table_name, strlen(table_name));
+
+ if (parent_table_name != NULL) {
+ mem_free(table_name);
+ table_name = parent_table_name;
+ }
+
ut_a(memcmp(table_name, name, namelen) == 0);
table = dict_table_open_on_name(
diff --git a/storage/oqgraph/graphcore.cc b/storage/oqgraph/graphcore.cc
index 4346b94805c..7c8ca53c096 100644
--- a/storage/oqgraph/graphcore.cc
+++ b/storage/oqgraph/graphcore.cc
@@ -485,7 +485,7 @@ namespace open_query
optional<Vertex>
oqgraph_share::find_vertex(VertexID id) const
{
- return ::boost::find_vertex(id, g);
+ return oqgraph3::find_vertex(id, g);
}
#if 0
diff --git a/storage/oqgraph/oqgraph_shim.h b/storage/oqgraph/oqgraph_shim.h
index af240b88ebd..004d7f0f7c5 100644
--- a/storage/oqgraph/oqgraph_shim.h
+++ b/storage/oqgraph/oqgraph_shim.h
@@ -274,6 +274,33 @@ namespace boost
};
#endif
+ template<>
+ struct property_map<oqgraph3::graph, edge_weight_t>
+ {
+ typedef void type;
+ typedef oqgraph3::edge_weight_property_map const_type;
+ };
+
+ template<>
+ struct property_map<oqgraph3::graph, vertex_index_t>
+ {
+ typedef void type;
+ typedef oqgraph3::vertex_index_property_map const_type;
+ };
+
+ template<>
+ struct property_map<oqgraph3::graph, edge_index_t>
+ {
+ typedef void type;
+ typedef oqgraph3::edge_index_property_map const_type;
+ };
+
+}
+
+namespace oqgraph3
+{
+ using namespace boost;
+
inline graph_traits<oqgraph3::graph>::vertex_descriptor
source(
const graph_traits<oqgraph3::graph>::edge_descriptor& e,
@@ -401,27 +428,6 @@ namespace boost
return count;
}
- template<>
- struct property_map<oqgraph3::graph, edge_weight_t>
- {
- typedef void type;
- typedef oqgraph3::edge_weight_property_map const_type;
- };
-
- template<>
- struct property_map<oqgraph3::graph, vertex_index_t>
- {
- typedef void type;
- typedef oqgraph3::vertex_index_property_map const_type;
- };
-
- template<>
- struct property_map<oqgraph3::graph, edge_index_t>
- {
- typedef void type;
- typedef oqgraph3::edge_index_property_map const_type;
- };
-
inline property_map<
oqgraph3::graph,
edge_weight_t>::const_type::reference
diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc
index 444de87ba8b..2445bd80927 100644
--- a/storage/perfschema/ha_perfschema.cc
+++ b/storage/perfschema/ha_perfschema.cc
@@ -205,7 +205,7 @@ maria_declare_plugin(perfschema)
0x0001,
pfs_status_vars,
NULL,
- "5.6.32",
+ "5.6.33",
MariaDB_PLUGIN_MATURITY_STABLE
}
maria_declare_plugin_end;
diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt
index 8a9058bbb3e..dad90fe96eb 100644
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@@ -1,4 +1,4 @@
-SET(TOKUDB_VERSION 5.6.31-77.0)
+SET(TOKUDB_VERSION 5.6.33-79.0)
# PerconaFT only supports x86-64 and cmake-2.8.9+
IF(CMAKE_VERSION VERSION_LESS "2.8.9")
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
index 4b62703480f..7ede78b3c0d 100644
--- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
+++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
@@ -367,8 +367,8 @@ static void print_db_env_struct (void) {
"int (*checkpointing_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */",
"int (*cleaner_set_period) (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */",
"int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
- "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */",
- "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */",
+ "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation. 0 means disabled. */",
+ "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation. 0 means disabled. */",
"int (*evictor_set_enable_partial_eviction) (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
"int (*evictor_get_enable_partial_eviction) (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
@@ -422,6 +422,9 @@ static void print_db_env_struct (void) {
"int (*set_checkpoint_pool_threads)(DB_ENV *, uint32_t)",
"void (*set_check_thp)(DB_ENV *, bool new_val)",
"bool (*get_check_thp)(DB_ENV *)",
+ "bool (*set_dir_per_db)(DB_ENV *, bool new_val)",
+ "bool (*get_dir_per_db)(DB_ENV *)",
+ "const char *(*get_data_dir)(DB_ENV *env)",
NULL};
sort_and_dump_fields("db_env", true, extra);
diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
index 4c5004cd6a5..883f35041e2 100644
--- a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
+++ b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
@@ -97,7 +97,7 @@ if (NOT HAVE_BACKTRACE_WITHOUT_EXECINFO)
endif ()
endif ()
-if(HAVE_CLOCK_REALTIME)
+if(HAVE_CLOCK_REALTIME AND (NOT APPLE))
list(APPEND EXTRA_SYSTEM_LIBS rt)
else()
list(APPEND EXTRA_SYSTEM_LIBS System)
@@ -109,6 +109,8 @@ check_function_exists(pthread_rwlockattr_setkind_np HAVE_PTHREAD_RWLOCKATTR_SETK
## check for the right way to yield using pthreads
check_function_exists(pthread_yield HAVE_PTHREAD_YIELD)
check_function_exists(pthread_yield_np HAVE_PTHREAD_YIELD_NP)
+## check if we have pthread_threadid_np() (i.e. osx)
+check_function_exists(pthread_threadid_np HAVE_PTHREAD_THREADID_NP)
## check if we have pthread_getthreadid_np() (i.e. freebsd)
check_function_exists(pthread_getthreadid_np HAVE_PTHREAD_GETTHREADID_NP)
check_function_exists(sched_getcpu HAVE_SCHED_GETCPU)
diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
index 6082178da75..769bdffa5d9 100644
--- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
+++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
@@ -98,11 +98,10 @@ set_cflags_if_supported(
-Wno-error=address-of-array-temporary
-Wno-error=tautological-constant-out-of-range-compare
-Wno-error=maybe-uninitialized
- -Wno-ignored-attributes
-Wno-error=extern-c-compat
- -Wno-pointer-bool-conversion
-fno-rtti
-fno-exceptions
+ -Wno-error=nonnull-compare
)
## set_cflags_if_supported_named("-Weffc++" -Weffcpp)
diff --git a/storage/tokudb/PerconaFT/ft/CMakeLists.txt b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
index 11091073ac2..6696c26ecc0 100644
--- a/storage/tokudb/PerconaFT/ft/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
@@ -55,8 +55,8 @@ set(FT_SOURCES
msg_buffer
node
pivotkeys
+ serialize/rbtree_mhs
serialize/block_allocator
- serialize/block_allocator_strategy
serialize/block_table
serialize/compress
serialize/ft_node-serialize
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h
index dc6aec9226d..05fb771de08 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h
@@ -138,6 +138,8 @@ struct cachefile {
// nor attempt to open any cachefile with the same fname (dname)
// until this cachefile has been fully closed and unlinked.
bool unlink_on_close;
+ // If set then fclose will not be logged in recovery log.
+ bool skip_log_recover_on_close;
int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */
CACHETABLE cachetable;
struct fileid fileid;
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
index 5bba977de1a..6d753805fa9 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
@@ -467,6 +467,10 @@ toku_cachefile_fname_in_env (CACHEFILE cf) {
return cf->fname_in_env;
}
+void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env) {
+ cf->fname_in_env = new_fname_in_env;
+}
+
int
toku_cachefile_get_fd (CACHEFILE cf) {
return cf->fd;
@@ -2903,6 +2907,18 @@ bool toku_cachefile_is_unlink_on_close(CACHEFILE cf) {
return cf->unlink_on_close;
}
+void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf) {
+ cf->skip_log_recover_on_close = true;
+}
+
+void toku_cachefile_do_log_recover_on_close(CACHEFILE cf) {
+ cf->skip_log_recover_on_close = false;
+}
+
+bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf) {
+ return cf->skip_log_recover_on_close;
+}
+
uint64_t toku_cachefile_size(CACHEFILE cf) {
int64_t file_size;
int fd = toku_cachefile_get_fd(cf);
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
index 148326562ab..3b3cb0a2d46 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
@@ -500,12 +500,18 @@ int toku_cachefile_get_fd (CACHEFILE);
// Return the filename
char * toku_cachefile_fname_in_env (CACHEFILE cf);
+void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env);
+
// Make it so when the cachefile closes, the underlying file is unlinked
void toku_cachefile_unlink_on_close(CACHEFILE cf);
// is this cachefile marked as unlink on close?
bool toku_cachefile_is_unlink_on_close(CACHEFILE cf);
+void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf);
+void toku_cachefile_do_log_recover_on_close(CACHEFILE cf);
+bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf);
+
// Return the logger associated with the cachefile
struct tokulogger *toku_cachefile_logger(CACHEFILE cf);
diff --git a/storage/tokudb/PerconaFT/ft/ft-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
index fb456ea6a18..e6452f60cfc 100644
--- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
@@ -496,7 +496,7 @@ handle_split_of_child(
// We never set the rightmost blocknum to be the root.
// Instead, we wait for the root to split and let promotion initialize the rightmost
- // blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
+ // blocknum to be the first non-root leaf node on the right extreme to receive an insert.
BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
if (childa->blocknum.b == rightmost_blocknum.b) {
@@ -1470,7 +1470,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
// It is possible after reading in the entire child,
// that we now know that the child is not reactive
// if so, we can unpin parent right now
- // we wont be splitting/merging child
+ // we won't be splitting/merging child
// and we have already replaced the bnc
// for the root with a fresh one
enum reactivity child_re = toku_ftnode_get_reactivity(ft, child);
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc
index 8f61bc67339..30a8710d7aa 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
@@ -149,22 +149,23 @@ basement nodes, bulk fetch, and partial fetch:
#include "ft/cachetable/checkpoint.h"
#include "ft/cursor.h"
-#include "ft/ft.h"
#include "ft/ft-cachetable-wrappers.h"
#include "ft/ft-flusher.h"
#include "ft/ft-internal.h"
-#include "ft/msg.h"
+#include "ft/ft.h"
#include "ft/leafentry.h"
#include "ft/logger/log-internal.h"
+#include "ft/msg.h"
#include "ft/node.h"
#include "ft/serialize/block_table.h"
-#include "ft/serialize/sub_block.h"
#include "ft/serialize/ft-serialize.h"
#include "ft/serialize/ft_layout_version.h"
#include "ft/serialize/ft_node-serialize.h"
+#include "ft/serialize/sub_block.h"
#include "ft/txn/txn_manager.h"
-#include "ft/ule.h"
#include "ft/txn/xids.h"
+#include "ft/ule.h"
+#include "src/ydb-internal.h"
#include <toku_race_tools.h>
@@ -179,6 +180,7 @@ basement nodes, bulk fetch, and partial fetch:
#include <stdint.h>
+#include <memory>
/* Status is intended for display to humans to help understand system behavior.
* It does not need to be perfectly thread-safe.
*/
@@ -598,15 +600,12 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data) {
}
}
-void toku_ftnode_clone_callback(
- void* value_data,
- void** cloned_value_data,
- long* clone_size,
- PAIR_ATTR* new_attr,
- bool for_checkpoint,
- void* write_extraargs
- )
-{
+void toku_ftnode_clone_callback(void *value_data,
+ void **cloned_value_data,
+ long *clone_size,
+ PAIR_ATTR *new_attr,
+ bool for_checkpoint,
+ void *write_extraargs) {
FTNODE node = static_cast<FTNODE>(value_data);
toku_ftnode_assert_fully_in_memory(node);
FT ft = static_cast<FT>(write_extraargs);
@@ -618,13 +617,16 @@ void toku_ftnode_clone_callback(
toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize);
}
- cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known;
- cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk;
+ cloned_node->oldest_referenced_xid_known =
+ node->oldest_referenced_xid_known;
+ cloned_node->max_msn_applied_to_node_on_disk =
+ node->max_msn_applied_to_node_on_disk;
cloned_node->flags = node->flags;
cloned_node->blocknum = node->blocknum;
cloned_node->layout_version = node->layout_version;
cloned_node->layout_version_original = node->layout_version_original;
- cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk;
+ cloned_node->layout_version_read_from_disk =
+ node->layout_version_read_from_disk;
cloned_node->build_id = node->build_id;
cloned_node->height = node->height;
cloned_node->dirty = node->dirty;
@@ -649,38 +651,39 @@ void toku_ftnode_clone_callback(
// set new pair attr if necessary
if (node->height == 0) {
*new_attr = make_ftnode_pair_attr(node);
- }
- else {
+ for (int i = 0; i < node->n_children; i++) {
+ BLB(node, i)->logical_rows_delta = 0;
+ BLB(cloned_node, i)->logical_rows_delta = 0;
+ }
+ } else {
new_attr->is_valid = false;
}
*clone_size = ftnode_memory_size(cloned_node);
*cloned_value_data = cloned_node;
}
-void toku_ftnode_flush_callback(
- CACHEFILE UU(cachefile),
- int fd,
- BLOCKNUM blocknum,
- void *ftnode_v,
- void** disk_data,
- void *extraargs,
- PAIR_ATTR size __attribute__((unused)),
- PAIR_ATTR* new_size,
- bool write_me,
- bool keep_me,
- bool for_checkpoint,
- bool is_clone
- )
-{
- FT ft = (FT) extraargs;
- FTNODE ftnode = (FTNODE) ftnode_v;
- FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
+void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
+ int fd,
+ BLOCKNUM blocknum,
+ void *ftnode_v,
+ void **disk_data,
+ void *extraargs,
+ PAIR_ATTR size __attribute__((unused)),
+ PAIR_ATTR *new_size,
+ bool write_me,
+ bool keep_me,
+ bool for_checkpoint,
+ bool is_clone) {
+ FT ft = (FT)extraargs;
+ FTNODE ftnode = (FTNODE)ftnode_v;
+ FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data;
assert(ftnode->blocknum.b == blocknum.b);
int height = ftnode->height;
if (write_me) {
toku_ftnode_assert_fully_in_memory(ftnode);
if (height > 0 && !is_clone) {
- // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback()
+ // cloned nodes already had their stale messages moved, see
+ // toku_ftnode_clone_callback()
toku_move_ftnode_messages_to_stale(ft, ftnode);
} else if (height == 0) {
toku_ftnode_leaf_run_gc(ft, ftnode);
@@ -688,7 +691,8 @@ void toku_ftnode_flush_callback(
toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint);
}
}
- int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
+ int r = toku_serialize_ftnode_to(
+ fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
assert_zero(r);
ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION;
}
@@ -703,20 +707,22 @@ void toku_ftnode_flush_callback(
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
}
toku_free(*disk_data);
- }
- else {
+ } else {
if (ftnode->height == 0) {
for (int i = 0; i < ftnode->n_children; i++) {
- if (BP_STATE(ftnode,i) == PT_AVAIL) {
+ if (BP_STATE(ftnode, i) == PT_AVAIL) {
BASEMENTNODE bn = BLB(ftnode, i);
- toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
+ toku_ft_decrease_stats(&ft->in_memory_stats,
+ bn->stat64_delta);
+ if (!ftnode->dirty)
+ toku_ft_adjust_logical_row_count(
+ ft, -bn->logical_rows_delta);
}
}
}
}
toku_ftnode_free(&ftnode);
- }
- else {
+ } else {
*new_size = make_ftnode_pair_attr(ftnode);
}
}
@@ -845,10 +851,13 @@ static void compress_internal_node_partition(FTNODE node, int i, enum toku_compr
}
// callback for partially evicting a node
-int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
- void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) {
- FTNODE node = (FTNODE) ftnode_pv;
- FT ft = (FT) write_extraargs;
+int toku_ftnode_pe_callback(void *ftnode_pv,
+ PAIR_ATTR old_attr,
+ void *write_extraargs,
+ void (*finalize)(PAIR_ATTR new_attr, void *extra),
+ void *finalize_extra) {
+ FTNODE node = (FTNODE)ftnode_pv;
+ FT ft = (FT)write_extraargs;
int num_partial_evictions = 0;
// Hold things we intend to destroy here.
@@ -866,7 +875,8 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
}
// Don't partially evict nodes whose partitions can't be read back
// from disk individually
- if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
+ if (node->layout_version_read_from_disk <
+ FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
goto exit;
}
//
@@ -874,77 +884,77 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
//
if (node->height > 0) {
for (int i = 0; i < node->n_children; i++) {
- if (BP_STATE(node,i) == PT_AVAIL) {
- if (BP_SHOULD_EVICT(node,i)) {
+ if (BP_STATE(node, i) == PT_AVAIL) {
+ if (BP_SHOULD_EVICT(node, i)) {
NONLEAF_CHILDINFO bnc = BNC(node, i);
if (ft_compress_buffers_before_eviction &&
- // We may not serialize and compress a partition in memory if its
- // in memory layout version is different than what's on disk (and
- // therefore requires upgrade).
+ // We may not serialize and compress a partition in
+ // memory if its in memory layout version is different
+ // than what's on disk (and therefore requires upgrade).
//
- // Auto-upgrade code assumes that if a node's layout version read
- // from disk is not current, it MUST require upgrade. Breaking
- // this rule would cause upgrade code to upgrade this partition
- // again after we serialize it as the current version, which is bad.
- node->layout_version == node->layout_version_read_from_disk) {
+ // Auto-upgrade code assumes that if a node's layout
+ // version read from disk is not current, it MUST
+ // require upgrade.
+ // Breaking this rule would cause upgrade code to
+ // upgrade this partition again after we serialize it as
+ // the current version, which is bad.
+ node->layout_version ==
+ node->layout_version_read_from_disk) {
toku_ft_bnc_move_messages_to_stale(ft, bnc);
compress_internal_node_partition(
node,
i,
// Always compress with quicklz
- TOKU_QUICKLZ_METHOD
- );
+ TOKU_QUICKLZ_METHOD);
} else {
// We're not compressing buffers before eviction. Simply
- // detach the buffer and set the child's state to on-disk.
+ // detach the buffer and set the child's state to
+ // on-disk.
set_BNULL(node, i);
BP_STATE(node, i) = PT_ON_DISK;
}
buffers_to_destroy[num_buffers_to_destroy++] = bnc;
num_partial_evictions++;
+ } else {
+ BP_SWEEP_CLOCK(node, i);
}
- else {
- BP_SWEEP_CLOCK(node,i);
- }
- }
- else {
+ } else {
continue;
}
}
- }
- //
- // partial eviction strategy for basement nodes:
- // if the bn is compressed, evict it
- // else: check if it requires eviction, if it does, evict it, if not, sweep the clock count
- //
- else {
+ } else {
+ //
+ // partial eviction strategy for basement nodes:
+ // if the bn is compressed, evict it
+ // else: check if it requires eviction, if it does, evict it, if not,
+ // sweep the clock count
+ //
for (int i = 0; i < node->n_children; i++) {
// Get rid of compressed stuff no matter what.
- if (BP_STATE(node,i) == PT_COMPRESSED) {
+ if (BP_STATE(node, i) == PT_COMPRESSED) {
SUB_BLOCK sb = BSB(node, i);
pointers_to_free[num_pointers_to_free++] = sb->compressed_ptr;
pointers_to_free[num_pointers_to_free++] = sb;
set_BNULL(node, i);
- BP_STATE(node,i) = PT_ON_DISK;
+ BP_STATE(node, i) = PT_ON_DISK;
num_partial_evictions++;
- }
- else if (BP_STATE(node,i) == PT_AVAIL) {
- if (BP_SHOULD_EVICT(node,i)) {
+ } else if (BP_STATE(node, i) == PT_AVAIL) {
+ if (BP_SHOULD_EVICT(node, i)) {
BASEMENTNODE bn = BLB(node, i);
basements_to_destroy[num_basements_to_destroy++] = bn;
- toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
+ toku_ft_decrease_stats(&ft->in_memory_stats,
+ bn->stat64_delta);
+ toku_ft_adjust_logical_row_count(ft,
+ -bn->logical_rows_delta);
set_BNULL(node, i);
BP_STATE(node, i) = PT_ON_DISK;
num_partial_evictions++;
+ } else {
+ BP_SWEEP_CLOCK(node, i);
}
- else {
- BP_SWEEP_CLOCK(node,i);
- }
- }
- else if (BP_STATE(node,i) == PT_ON_DISK) {
+ } else if (BP_STATE(node, i) == PT_ON_DISK) {
continue;
- }
- else {
+ } else {
abort();
}
}
@@ -2378,12 +2388,16 @@ ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) {
toku_ft_root_put_msg(ft_h->ft, msg, &gc_info);
}
-void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra,
- TOKUTXN txn, bool oplsn_valid, LSN oplsn,
- bool do_logging) {
+void toku_ft_maybe_update(FT_HANDLE ft_h,
+ const DBT *key,
+ const DBT *update_function_extra,
+ TOKUTXN txn,
+ bool oplsn_valid,
+ LSN oplsn,
+ bool do_logging) {
TXNID_PAIR xid = toku_txn_get_txnid(txn);
if (txn) {
- BYTESTRING keybs = { key->size, (char *) key->data };
+ BYTESTRING keybs = {key->size, (char *)key->data};
toku_logger_save_rollback_cmdupdate(
txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs);
toku_txn_maybe_note_ft(txn, ft_h->ft);
@@ -2392,22 +2406,33 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func
TOKULOGGER logger;
logger = toku_txn_logger(txn);
if (do_logging && logger) {
- BYTESTRING keybs = {.len=key->size, .data=(char *) key->data};
- BYTESTRING extrabs = {.len=update_function_extra->size,
- .data = (char *) update_function_extra->data};
- toku_log_enq_update(logger, NULL, 0, txn,
- toku_cachefile_filenum(ft_h->ft->cf),
- xid, keybs, extrabs);
+ BYTESTRING keybs = {.len = key->size, .data = (char *)key->data};
+ BYTESTRING extrabs = {.len = update_function_extra->size,
+ .data = (char *)update_function_extra->data};
+ toku_log_enq_update(logger,
+ NULL,
+ 0,
+ txn,
+ toku_cachefile_filenum(ft_h->ft->cf),
+ xid,
+ keybs,
+ extrabs);
}
LSN treelsn;
- if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
+ if (oplsn_valid &&
+ oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
// do nothing
} else {
- XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
- ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
+ XIDS message_xids =
+ txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
+ ft_msg msg(
+ key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
ft_send_update_msg(ft_h, msg, txn);
}
+ // updates get converted to insert messages, which should do a -1 on the
+ // logical row count when the messages are permanently applied
+ toku_ft_adjust_logical_row_count(ft_h->ft, 1);
}
void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra,
@@ -2570,12 +2595,104 @@ static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode
static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH;
+inline bool toku_file_is_root(const char *path, const char *last_slash) {
+ return last_slash == path;
+}
+
+static std::unique_ptr<char[], decltype(&toku_free)> toku_file_get_parent_dir(
+ const char *path) {
+ std::unique_ptr<char[], decltype(&toku_free)> result(nullptr, &toku_free);
+
+ bool has_trailing_slash = false;
+
+ /* Find the offset of the last slash */
+ const char *last_slash = strrchr(path, OS_PATH_SEPARATOR);
+
+ if (!last_slash) {
+ /* No slash in the path, return NULL */
+ return result;
+ }
+
+ /* Ok, there is a slash. Is there anything after it? */
+ if (static_cast<size_t>(last_slash - path + 1) == strlen(path)) {
+ has_trailing_slash = true;
+ }
+
+ /* Reduce repetative slashes. */
+ while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) {
+ last_slash--;
+ }
+
+ /* Check for the root of a drive. */
+ if (toku_file_is_root(path, last_slash)) {
+ return result;
+ }
+
+ /* If a trailing slash prevented the first strrchr() from trimming
+ the last component of the path, trim that component now. */
+ if (has_trailing_slash) {
+ /* Back up to the previous slash. */
+ last_slash--;
+ while (last_slash > path && last_slash[0] != OS_PATH_SEPARATOR) {
+ last_slash--;
+ }
+
+ /* Reduce repetative slashes. */
+ while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) {
+ last_slash--;
+ }
+ }
+
+ /* Check for the root of a drive. */
+ if (toku_file_is_root(path, last_slash)) {
+ return result;
+ }
+
+ result.reset(toku_strndup(path, last_slash - path));
+ return result;
+}
+
+static bool toku_create_subdirs_if_needed(const char *path) {
+ static const mode_t dir_mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
+ S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH;
+
+ toku_struct_stat stat;
+ bool subdir_exists = true;
+ auto subdir = toku_file_get_parent_dir(path);
+
+ if (!subdir.get())
+ return true;
+
+ if (toku_stat(subdir.get(), &stat) == -1) {
+ if (ENOENT == get_error_errno())
+ subdir_exists = false;
+ else
+ return false;
+ }
+
+ if (subdir_exists) {
+ if (!S_ISDIR(stat.st_mode))
+ return false;
+ return true;
+ }
+
+ if (!toku_create_subdirs_if_needed(subdir.get()))
+ return false;
+
+ if (toku_os_mkdir(subdir.get(), dir_mode))
+ return false;
+
+ return true;
+}
+
// open a file for use by the ft
// Requires: File does not exist.
static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) {
int r;
int fd;
int er;
+ if (!toku_create_subdirs_if_needed(fname))
+ return get_error_errno();
fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode);
assert(fd==-1);
if ((er = get_maybe_error_errno()) != ENOENT) {
@@ -4404,6 +4521,55 @@ void toku_ft_unlink(FT_HANDLE handle) {
toku_cachefile_unlink_on_close(cf);
}
+int toku_ft_rename_iname(DB_TXN *txn,
+ const char *data_dir,
+ const char *old_iname,
+ const char *new_iname,
+ CACHETABLE ct) {
+ int r = 0;
+
+ std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(nullptr,
+ &toku_free);
+ std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(nullptr,
+ &toku_free);
+
+ new_iname_full.reset(toku_construct_full_name(2, data_dir, new_iname));
+ old_iname_full.reset(toku_construct_full_name(2, data_dir, old_iname));
+
+ if (txn) {
+ BYTESTRING bs_old_name = {static_cast<uint32_t>(strlen(old_iname) + 1),
+ const_cast<char *>(old_iname)};
+ BYTESTRING bs_new_name = {static_cast<uint32_t>(strlen(new_iname) + 1),
+ const_cast<char *>(new_iname)};
+ FILENUM filenum = FILENUM_NONE;
+ {
+ CACHEFILE cf;
+ r = toku_cachefile_of_iname_in_env(ct, old_iname, &cf);
+ if (r != ENOENT) {
+ char *old_fname_in_cf = toku_cachefile_fname_in_env(cf);
+ toku_cachefile_set_fname_in_env(cf, toku_xstrdup(new_iname));
+ toku_free(old_fname_in_cf);
+ filenum = toku_cachefile_filenum(cf);
+ }
+ }
+ toku_logger_save_rollback_frename(
+ db_txn_struct_i(txn)->tokutxn, &bs_old_name, &bs_new_name);
+ toku_log_frename(db_txn_struct_i(txn)->tokutxn->logger,
+ (LSN *)0,
+ 0,
+ toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn),
+ bs_old_name,
+ filenum,
+ bs_new_name);
+ }
+
+ r = toku_os_rename(old_iname_full.get(), new_iname_full.get());
+ if (r != 0)
+ return r;
+ r = toku_fsync_directory(new_iname_full.get());
+ return r;
+}
+
int toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) {
int fd = toku_cachefile_get_fd(ft_handle->ft->cf);
toku_ft_lock(ft_handle->ft);
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h
index 313a74628ea..70cf045d43c 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.h
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.h
@@ -48,6 +48,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "ft/msg.h"
#include "util/dbt.h"
+#define OS_PATH_SEPARATOR '/'
+
typedef struct ft_handle *FT_HANDLE;
int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
diff --git a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
index adac96f4882..e31d80772d5 100644
--- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
@@ -73,30 +73,20 @@ static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) {
return rre->_cancelled =
rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
}
-int toku_ft_recount_rows(
- FT_HANDLE ft,
- int (*progress_callback)(
- uint64_t count,
- uint64_t deleted,
- void* progress_extra),
- void* progress_extra) {
-
+int toku_ft_recount_rows(FT_HANDLE ft,
+ int (*progress_callback)(uint64_t count,
+ uint64_t deleted,
+ void* progress_extra),
+ void* progress_extra) {
int ret = 0;
- recount_rows_extra_t rre = {
- progress_callback,
- progress_extra,
- 0,
- false
- };
+ recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false};
ft_cursor c;
ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
- if (ret) return ret;
+ if (ret)
+ return ret;
- toku_ft_cursor_set_check_interrupt_cb(
- &c,
- recount_rows_interrupt,
- &rre);
+ toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre);
ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
while (FT_LIKELY(ret == 0)) {
@@ -108,6 +98,7 @@ int toku_ft_recount_rows(
if (rre._cancelled == false) {
// update ft count
toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
+ ft->ft->h->dirty = 1;
ret = 0;
}
diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc
index 93d21233bf7..7c94b4c59d3 100644
--- a/storage/tokudb/PerconaFT/ft/ft.cc
+++ b/storage/tokudb/PerconaFT/ft/ft.cc
@@ -253,7 +253,19 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val
char* fname_in_env = toku_cachefile_fname_in_env(cachefile);
assert(fname_in_env);
BYTESTRING bs = {.len=(uint32_t) strlen(fname_in_env), .data=fname_in_env};
- toku_log_fclose(logger, &lsn, ft->h->dirty, bs, toku_cachefile_filenum(cachefile)); // flush the log on close (if new header is being written), otherwise it might not make it out.
+ if (!toku_cachefile_is_skip_log_recover_on_close(cachefile)) {
+ toku_log_fclose(
+ logger,
+ &lsn,
+ ft->h->dirty,
+ bs,
+ toku_cachefile_filenum(cachefile)); // flush the log on
+ // close (if new header
+ // is being written),
+ // otherwise it might
+ // not make it out.
+ toku_cachefile_do_log_recover_on_close(cachefile);
+ }
}
}
if (ft->h->dirty) { // this is the only place this bit is tested (in currentheader)
@@ -903,6 +915,9 @@ void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) {
// must be returned in toku_ft_stat64.
if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) {
toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta);
+ if (ft->in_memory_logical_rows == (uint64_t)-1) {
+ toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), 1);
+ }
}
}
diff --git a/storage/tokudb/PerconaFT/ft/ft.h b/storage/tokudb/PerconaFT/ft/ft.h
index d600e093bdc..7a3c4fa783c 100644
--- a/storage/tokudb/PerconaFT/ft/ft.h
+++ b/storage/tokudb/PerconaFT/ft/ft.h
@@ -53,6 +53,12 @@ typedef struct ft_options *FT_OPTIONS;
void toku_ft_unlink(FT_HANDLE handle);
void toku_ft_unlink_on_commit(FT_HANDLE handle, TOKUTXN txn);
+int toku_ft_rename_iname(DB_TXN *txn,
+ const char *data_dir,
+ const char *old_iname,
+ const char *new_iname,
+ CACHETABLE ct);
+
void toku_ft_init_reflock(FT ft);
void toku_ft_destroy_reflock(FT ft);
void toku_ft_grab_reflock(FT ft);
diff --git a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
index dd070373e26..1aa2c203831 100644
--- a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
+++ b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
@@ -301,7 +301,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
-// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.)
+// For test purposes only. (In production, the rowset size is determined by negotiation with the cachetable for some memory. See #2613.)
uint64_t toku_ft_loader_get_rowset_budget_for_testing (void);
int toku_ft_loader_finish_extractor(FTLOADER bl);
diff --git a/storage/tokudb/PerconaFT/ft/loader/loader.cc b/storage/tokudb/PerconaFT/ft/loader/loader.cc
index 20f9363da1e..528c86a8f79 100644
--- a/storage/tokudb/PerconaFT/ft/loader/loader.cc
+++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc
@@ -91,7 +91,7 @@ toku_ft_loader_set_size_factor(uint32_t factor) {
uint64_t
toku_ft_loader_get_rowset_budget_for_testing (void)
-// For test purposes only. In production, the rowset size is determined by negotation with the cachetable for some memory. (See #2613).
+// For test purposes only. In production, the rowset size is determined by negotiation with the cachetable for some memory. (See #2613).
{
return 16ULL*size_factor*1024ULL;
}
diff --git a/storage/tokudb/PerconaFT/ft/logger/logformat.cc b/storage/tokudb/PerconaFT/ft/logger/logformat.cc
index 6f3baa81c86..49b61138803 100644
--- a/storage/tokudb/PerconaFT/ft/logger/logformat.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/logformat.cc
@@ -90,6 +90,10 @@ const struct logtype rollbacks[] = {
{"fcreate", 'F', FA{{"FILENUM", "filenum", 0},
{"BYTESTRING", "iname", 0},
NULLFIELD}, LOG_BEGIN_ACTION_NA},
+ //rename file
+ {"frename", 'n', FA{{"BYTESTRING", "old_iname", 0},
+ {"BYTESTRING", "new_iname", 0},
+ NULLFIELD}, LOG_BEGIN_ACTION_NA},
// cmdinsert is used to insert a key-value pair into a DB. For rollback we don't need the data.
{"cmdinsert", 'i', FA{
{"FILENUM", "filenum", 0},
@@ -195,6 +199,11 @@ const struct logtype logtypes[] = {
{"fdelete", 'U', FA{{"TXNID_PAIR", "xid", 0},
{"FILENUM", "filenum", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
+ {"frename", 'n', FA{{"TXNID_PAIR", "xid", 0},
+ {"BYTESTRING", "old_iname", 0},
+ {"FILENUM", "old_filenum", 0},
+ {"BYTESTRING", "new_iname", 0},
+ NULLFIELD}, IGNORE_LOG_BEGIN},
{"enq_insert", 'I', FA{{"FILENUM", "filenum", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "key", 0},
diff --git a/storage/tokudb/PerconaFT/ft/logger/recover.cc b/storage/tokudb/PerconaFT/ft/logger/recover.cc
index 38f29773bd6..a9c30c0e37a 100644
--- a/storage/tokudb/PerconaFT/ft/logger/recover.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/recover.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <memory>
#include "ft/cachetable/cachetable.h"
#include "ft/cachetable/checkpoint.h"
#include "ft/ft.h"
@@ -935,6 +936,83 @@ static int toku_recover_backward_fdelete (struct logtype_fdelete *UU(l), RECOVER
return 0;
}
+static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) {
+ assert(renv);
+ assert(renv->env);
+
+ toku_struct_stat stat;
+ const char *data_dir = renv->env->get_data_dir(renv->env);
+ bool old_exist = true;
+ bool new_exist = true;
+
+ assert(data_dir);
+
+ struct file_map_tuple *tuple;
+
+ std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(
+ toku_construct_full_name(2, data_dir, l->old_iname.data), &toku_free);
+ std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(
+ toku_construct_full_name(2, data_dir, l->new_iname.data), &toku_free);
+
+ if (toku_stat(old_iname_full.get(), &stat) == -1) {
+ if (ENOENT == errno)
+ old_exist = false;
+ else
+ return 1;
+ }
+
+ if (toku_stat(new_iname_full.get(), &stat) == -1) {
+ if (ENOENT == errno)
+ new_exist = false;
+ else
+ return 1;
+ }
+
+ // Both old and new files can exist if:
+ // - rename() is not completed
+ // - fcreate was replayed during recovery
+ // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains
+ // closed but not yet evicted cachefiles and the key of this container is
+ // fs-dependent file id - (device id, inode number) pair. As it is supposed
+ // new file have not yet created during recovery process the 'stalled
+ // cachefile' container can contain only cache file of old file.
+ // To preserve the old cachefile file's id and keep it in
+ // 'stalled cachefiles' container the new file is removed
+ // and the old file is renamed.
+ if (old_exist && new_exist &&
+ (toku_os_unlink(new_iname_full.get()) == -1 ||
+ toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
+ toku_fsync_directory(old_iname_full.get()) == -1 ||
+ toku_fsync_directory(new_iname_full.get()) == -1))
+ return 1;
+
+ if (old_exist && !new_exist &&
+ (toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
+ toku_fsync_directory(old_iname_full.get()) == -1 ||
+ toku_fsync_directory(new_iname_full.get()) == -1))
+ return 1;
+
+ if (file_map_find(&renv->fmap, l->old_filenum, &tuple) != DB_NOTFOUND) {
+ if (tuple->iname)
+ toku_free(tuple->iname);
+ tuple->iname = toku_xstrdup(l->new_iname.data);
+ }
+
+ TOKUTXN txn = NULL;
+ toku_txnid2txn(renv->logger, l->xid, &txn);
+
+ if (txn)
+ toku_logger_save_rollback_frename(txn, &l->old_iname, &l->new_iname);
+
+ return 0;
+}
+
+static int toku_recover_backward_frename(struct logtype_frename *UU(l),
+ RECOVER_ENV UU(renv)) {
+ // nothing
+ return 0;
+}
+
static int toku_recover_enq_insert (struct logtype_enq_insert *l, RECOVER_ENV renv) {
int r;
TOKUTXN txn = NULL;
diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc
index 58ba675eb7c..12e5fda226e 100644
--- a/storage/tokudb/PerconaFT/ft/node.cc
+++ b/storage/tokudb/PerconaFT/ft/node.cc
@@ -373,52 +373,48 @@ find_bounds_within_message_tree(
}
}
-/**
- * For each message in the ancestor's buffer (determined by childnum) that
- * is key-wise between lower_bound_exclusive and upper_bound_inclusive,
- * apply the message to the basement node. We treat the bounds as minus
- * or plus infinity respectively if they are NULL. Do not mark the node
- * as dirty (preserve previous state of 'dirty' bit).
- */
+// For each message in the ancestor's buffer (determined by childnum) that
+// is key-wise between lower_bound_exclusive and upper_bound_inclusive,
+// apply the message to the basement node. We treat the bounds as minus
+// or plus infinity respectively if they are NULL. Do not mark the node
+// as dirty (preserve previous state of 'dirty' bit).
static void bnc_apply_messages_to_basement_node(
- FT_HANDLE t, // used for comparison function
- BASEMENTNODE bn, // where to apply messages
+ FT_HANDLE t, // used for comparison function
+ BASEMENTNODE bn, // where to apply messages
FTNODE ancestor, // the ancestor node where we can find messages to apply
- int childnum, // which child buffer of ancestor contains messages we want
- const pivot_bounds &bounds, // contains pivot key bounds of this basement node
- txn_gc_info* gc_info,
- bool* msgs_applied) {
-
+ int childnum, // which child buffer of ancestor contains messages we want
+ const pivot_bounds &
+ bounds, // contains pivot key bounds of this basement node
+ txn_gc_info *gc_info,
+ bool *msgs_applied) {
int r;
NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
// Determine the offsets in the message trees between which we need to
// apply messages from this buffer
- STAT64INFO_S stats_delta = {0,0};
+ STAT64INFO_S stats_delta = {0, 0};
uint64_t workdone_this_ancestor = 0;
int64_t logical_rows_delta = 0;
uint32_t stale_lbi, stale_ube;
if (!bn->stale_ancestor_messages_applied) {
- find_bounds_within_message_tree(
- t->ft->cmp,
- bnc->stale_message_tree,
- &bnc->msg_buffer,
- bounds,
- &stale_lbi,
- &stale_ube);
+ find_bounds_within_message_tree(t->ft->cmp,
+ bnc->stale_message_tree,
+ &bnc->msg_buffer,
+ bounds,
+ &stale_lbi,
+ &stale_ube);
} else {
stale_lbi = 0;
stale_ube = 0;
}
uint32_t fresh_lbi, fresh_ube;
- find_bounds_within_message_tree(
- t->ft->cmp,
- bnc->fresh_message_tree,
- &bnc->msg_buffer,
- bounds,
- &fresh_lbi,
- &fresh_ube);
+ find_bounds_within_message_tree(t->ft->cmp,
+ bnc->fresh_message_tree,
+ &bnc->msg_buffer,
+ bounds,
+ &fresh_lbi,
+ &fresh_ube);
// We now know where all the messages we must apply are, so one of the
// following 4 cases will do the application, depending on which of
@@ -432,44 +428,53 @@ static void bnc_apply_messages_to_basement_node(
// We have messages in multiple trees, so we grab all
// the relevant messages' offsets and sort them by MSN, then apply
// them in MSN order.
- const int buffer_size = ((stale_ube - stale_lbi) +
- (fresh_ube - fresh_lbi) +
- bnc->broadcast_list.size());
+ const int buffer_size =
+ ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) +
+ bnc->broadcast_list.size());
toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
- struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
+ struct store_msg_buffer_offset_extra sfo_extra = {.offsets = offsets,
+ .i = 0};
// Populate offsets array with offsets to stale messages
- r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
+ r = bnc->stale_message_tree
+ .iterate_on_range<struct store_msg_buffer_offset_extra,
+ store_msg_buffer_offset>(
+ stale_lbi, stale_ube, &sfo_extra);
assert_zero(r);
// Then store fresh offsets, and mark them to be moved to stale later.
- r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
+ r = bnc->fresh_message_tree
+ .iterate_and_mark_range<struct store_msg_buffer_offset_extra,
+ store_msg_buffer_offset>(
+ fresh_lbi, fresh_ube, &sfo_extra);
assert_zero(r);
// Store offsets of all broadcast messages.
- r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
+ r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra,
+ store_msg_buffer_offset>(&sfo_extra);
assert_zero(r);
invariant(sfo_extra.i == buffer_size);
// Sort by MSN.
- toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
+ toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::
+ mergesort_r(offsets, buffer_size, bnc->msg_buffer);
// Apply the messages in MSN order.
for (int i = 0; i < buffer_size; ++i) {
*msgs_applied = true;
- do_bn_apply_msg(
- t,
- bn,
- &bnc->msg_buffer,
- offsets[i],
- gc_info,
- &workdone_this_ancestor,
- &stats_delta,
- &logical_rows_delta);
+ do_bn_apply_msg(t,
+ bn,
+ &bnc->msg_buffer,
+ offsets[i],
+ gc_info,
+ &workdone_this_ancestor,
+ &stats_delta,
+ &logical_rows_delta);
}
} else if (stale_lbi == stale_ube) {
- // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
+ // No stale messages to apply, we just apply fresh messages, and mark
+ // them to be moved to stale later.
struct iterate_do_bn_apply_msg_extra iter_extra = {
.t = t,
.bn = bn,
@@ -477,16 +482,20 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
- .logical_rows_delta = &logical_rows_delta
- };
- if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
- r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
+ .logical_rows_delta = &logical_rows_delta};
+ if (fresh_ube - fresh_lbi > 0)
+ *msgs_applied = true;
+ r = bnc->fresh_message_tree
+ .iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra,
+ iterate_do_bn_apply_msg>(
+ fresh_lbi, fresh_ube, &iter_extra);
assert_zero(r);
} else {
invariant(fresh_lbi == fresh_ube);
// No fresh messages to apply, we just apply stale messages.
- if (stale_ube - stale_lbi > 0) *msgs_applied = true;
+ if (stale_ube - stale_lbi > 0)
+ *msgs_applied = true;
struct iterate_do_bn_apply_msg_extra iter_extra = {
.t = t,
.bn = bn,
@@ -494,22 +503,26 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
- .logical_rows_delta = &logical_rows_delta
- };
+ .logical_rows_delta = &logical_rows_delta};
- r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
+ r = bnc->stale_message_tree
+ .iterate_on_range<struct iterate_do_bn_apply_msg_extra,
+ iterate_do_bn_apply_msg>(
+ stale_lbi, stale_ube, &iter_extra);
assert_zero(r);
}
//
// update stats
//
if (workdone_this_ancestor > 0) {
- (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor);
+ (void)toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum),
+ workdone_this_ancestor);
}
if (stats_delta.numbytes || stats_delta.numrows) {
toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
}
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
+ bn->logical_rows_delta += logical_rows_delta;
}
static void
diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h
index ad0298e81c5..52eefec0936 100644
--- a/storage/tokudb/PerconaFT/ft/node.h
+++ b/storage/tokudb/PerconaFT/ft/node.h
@@ -199,6 +199,7 @@ struct ftnode_leaf_basement_node {
MSN max_msn_applied; // max message sequence number applied
bool stale_ancestor_messages_applied;
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
+ int64_t logical_rows_delta;
};
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
index 1355f3739ee..19811373d16 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
@@ -46,415 +46,214 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "portability/toku_stdlib.h"
#include "ft/serialize/block_allocator.h"
-#include "ft/serialize/block_allocator_strategy.h"
+#include "ft/serialize/rbtree_mhs.h"
#if TOKU_DEBUG_PARANOID
-#define VALIDATE() validate()
+#define VALIDATE() Validate()
#else
#define VALIDATE()
#endif
-static FILE *ba_trace_file = nullptr;
-
-void block_allocator::maybe_initialize_trace(void) {
- const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH");
- if (ba_trace_path != nullptr) {
- ba_trace_file = toku_os_fopen(ba_trace_path, "w");
- if (ba_trace_file == nullptr) {
- fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), "
- "but it could not be opened for writing (errno %d)\n",
- ba_trace_path, get_maybe_error_errno());
- } else {
- fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path);
- }
- }
-}
-
-void block_allocator::maybe_close_trace() {
- if (ba_trace_file != nullptr) {
- int r = toku_os_fclose(ba_trace_file);
- if (r != 0) {
- fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n",
- r, get_maybe_error_errno());
- } else {
- fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n");
- }
- }
-}
-
-void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) {
- // the alignment must be at least 512 and aligned with 512 to work with direct I/O
- assert(alignment >= 512 && (alignment % 512) == 0);
+void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning,
+ uint64_t alignment) {
+ // the alignment must be at least 512 and aligned with 512 to work with
+ // direct I/O
+ invariant(alignment >= 512 && (alignment % 512) == 0);
_reserve_at_beginning = reserve_at_beginning;
_alignment = alignment;
_n_blocks = 0;
- _blocks_array_size = 1;
- XMALLOC_N(_blocks_array_size, _blocks_array);
_n_bytes_in_use = reserve_at_beginning;
- _strategy = BA_STRATEGY_FIRST_FIT;
-
- memset(&_trace_lock, 0, sizeof(toku_mutex_t));
- toku_mutex_init(&_trace_lock, nullptr);
+ _tree = new MhsRbTree::Tree(alignment);
+}
+void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) {
+ CreateInternal(reserve_at_beginning, alignment);
+ _tree->Insert({reserve_at_beginning, MAX_BYTE});
VALIDATE();
}
-void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
- _create_internal(reserve_at_beginning, alignment);
- _trace_create();
+void BlockAllocator::Destroy() {
+ delete _tree;
}
-void block_allocator::destroy() {
- toku_free(_blocks_array);
- _trace_destroy();
- toku_mutex_destroy(&_trace_lock);
-}
+void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning,
+ uint64_t alignment,
+ struct BlockPair *translation_pairs,
+ uint64_t n_blocks) {
+ CreateInternal(reserve_at_beginning, alignment);
+ _n_blocks = n_blocks;
-void block_allocator::set_strategy(enum allocation_strategy strategy) {
- _strategy = strategy;
-}
+ struct BlockPair *XMALLOC_N(n_blocks, pairs);
+ memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair));
+ std::sort(pairs, pairs + n_blocks);
-void block_allocator::grow_blocks_array_by(uint64_t n_to_add) {
- if (_n_blocks + n_to_add > _blocks_array_size) {
- uint64_t new_size = _n_blocks + n_to_add;
- uint64_t at_least = _blocks_array_size * 2;
- if (at_least > new_size) {
- new_size = at_least;
- }
- _blocks_array_size = new_size;
- XREALLOC_N(_blocks_array_size, _blocks_array);
+ if (pairs[0]._offset > reserve_at_beginning) {
+ _tree->Insert(
+ {reserve_at_beginning, pairs[0]._offset - reserve_at_beginning});
}
-}
-
-void block_allocator::grow_blocks_array() {
- grow_blocks_array_by(1);
-}
-
-void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
- struct blockpair *pairs, uint64_t n_blocks) {
- _create_internal(reserve_at_beginning, alignment);
-
- _n_blocks = n_blocks;
- grow_blocks_array_by(_n_blocks);
- memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
- std::sort(_blocks_array, _blocks_array + _n_blocks);
for (uint64_t i = 0; i < _n_blocks; i++) {
- // Allocator does not support size 0 blocks. See block_allocator_free_block.
- invariant(_blocks_array[i].size > 0);
- invariant(_blocks_array[i].offset >= _reserve_at_beginning);
- invariant(_blocks_array[i].offset % _alignment == 0);
-
- _n_bytes_in_use += _blocks_array[i].size;
+ // Allocator does not support size 0 blocks. See
+ // block_allocator_free_block.
+ invariant(pairs[i]._size > 0);
+ invariant(pairs[i]._offset >= _reserve_at_beginning);
+ invariant(pairs[i]._offset % _alignment == 0);
+
+ _n_bytes_in_use += pairs[i]._size;
+
+ MhsRbTree::OUUInt64 free_size(MAX_BYTE);
+ MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size);
+ if (i < n_blocks - 1) {
+ MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset);
+ invariant(next_offset >= free_offset);
+ free_size = next_offset - free_offset;
+ if (free_size == 0)
+ continue;
+ }
+ _tree->Insert({free_offset, free_size});
}
-
+ toku_free(pairs);
VALIDATE();
-
- _trace_create_from_blockpairs();
}
// Effect: align a value by rounding up.
-static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
+static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) {
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
}
-struct block_allocator::blockpair *
-block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) {
- switch (_strategy) {
- case BA_STRATEGY_FIRST_FIT:
- return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
- case BA_STRATEGY_BEST_FIT:
- return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
- case BA_STRATEGY_HEAT_ZONE:
- return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat);
- case BA_STRATEGY_PADDED_FIT:
- return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment);
- default:
- abort();
- }
-}
-
-// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
-void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) {
- struct blockpair *bp;
-
+// Effect: Allocate a block. The resulting block must be aligned on the
+// ba->alignment (which to make direct_io happy must be a positive multiple of
+// 512).
+void BlockAllocator::AllocBlock(uint64_t size,
+ uint64_t *offset) {
// Allocator does not support size 0 blocks. See block_allocator_free_block.
invariant(size > 0);
- grow_blocks_array();
_n_bytes_in_use += size;
+ *offset = _tree->Remove(size);
- uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
-
- if (_n_blocks == 0) {
- // First and only block
- assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use
- _blocks_array[0].offset = align(_reserve_at_beginning, _alignment);
- _blocks_array[0].size = size;
- *offset = _blocks_array[0].offset;
- goto done;
- } else if (end_of_reserve + size <= _blocks_array[0].offset ) {
- // Check to see if the space immediately after the reserve is big enough to hold the new block.
- bp = &_blocks_array[0];
- memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
- bp[0].offset = end_of_reserve;
- bp[0].size = size;
- *offset = end_of_reserve;
- goto done;
- }
-
- bp = choose_block_to_alloc_after(size, heat);
- if (bp != nullptr) {
- // our allocation strategy chose the space after `bp' to fit the new block
- uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
- uint64_t blocknum = bp - _blocks_array;
- invariant(&_blocks_array[blocknum] == bp);
- invariant(blocknum < _n_blocks);
- memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
- bp[1].offset = answer_offset;
- bp[1].size = size;
- *offset = answer_offset;
- } else {
- // It didn't fit anywhere, so fit it on the end.
- assert(_n_blocks < _blocks_array_size);
- bp = &_blocks_array[_n_blocks];
- uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment);
- bp->offset = answer_offset;
- bp->size = size;
- *offset = answer_offset;
- }
-
-done:
_n_blocks++;
VALIDATE();
-
- _trace_alloc(size, heat, *offset);
-}
-
-// Find the index in the blocks array that has a particular offset. Requires that the block exist.
-// Use binary search so it runs fast.
-int64_t block_allocator::find_block(uint64_t offset) {
- VALIDATE();
- if (_n_blocks == 1) {
- assert(_blocks_array[0].offset == offset);
- return 0;
- }
-
- uint64_t lo = 0;
- uint64_t hi = _n_blocks;
- while (1) {
- assert(lo < hi); // otherwise no such block exists.
- uint64_t mid = (lo + hi) / 2;
- uint64_t thisoff = _blocks_array[mid].offset;
- if (thisoff < offset) {
- lo = mid + 1;
- } else if (thisoff > offset) {
- hi = mid;
- } else {
- return mid;
- }
- }
}
-// To support 0-sized blocks, we need to include size as an input to this function.
+// To support 0-sized blocks, we need to include size as an input to this
+// function.
// All 0-sized blocks at the same offset can be considered identical, but
// a 0-sized block can share offset with a non-zero sized block.
-// The non-zero sized block is not exchangable with a zero sized block (or vice versa),
-// so inserting 0-sized blocks can cause corruption here.
-void block_allocator::free_block(uint64_t offset) {
+// The non-zero sized block is not exchangable with a zero sized block (or vice
+// versa), so inserting 0-sized blocks can cause corruption here.
+void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) {
VALIDATE();
- int64_t bn = find_block(offset);
- assert(bn >= 0); // we require that there is a block with that offset.
- _n_bytes_in_use -= _blocks_array[bn].size;
- memmove(&_blocks_array[bn], &_blocks_array[bn + 1],
- (_n_blocks - bn - 1) * sizeof(struct blockpair));
+ _n_bytes_in_use -= size;
+ _tree->Insert({offset, size});
_n_blocks--;
VALIDATE();
-
- _trace_free(offset);
-}
-
-uint64_t block_allocator::block_size(uint64_t offset) {
- int64_t bn = find_block(offset);
- assert(bn >=0); // we require that there is a block with that offset.
- return _blocks_array[bn].size;
}
-uint64_t block_allocator::allocated_limit() const {
- if (_n_blocks == 0) {
- return _reserve_at_beginning;
- } else {
- struct blockpair *last = &_blocks_array[_n_blocks - 1];
- return last->offset + last->size;
- }
+uint64_t BlockAllocator::AllocatedLimit() const {
+ MhsRbTree::Node *max_node = _tree->MaxNode();
+ return rbn_offset(max_node).ToInt();
}
-// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
+// Effect: Consider the blocks in sorted order. The reserved block at the
+// beginning is number 0. The next one is number 1 and so forth.
// Return the offset and size of the block with that number.
// Return 0 if there is a block that big, return nonzero if b is too big.
-int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) {
- if (b ==0 ) {
+int BlockAllocator::NthBlockInLayoutOrder(uint64_t b,
+ uint64_t *offset,
+ uint64_t *size) {
+ MhsRbTree::Node *x, *y;
+ if (b == 0) {
*offset = 0;
*size = _reserve_at_beginning;
- return 0;
+ return 0;
} else if (b > _n_blocks) {
return -1;
} else {
- *offset =_blocks_array[b - 1].offset;
- *size =_blocks_array[b - 1].size;
+ x = _tree->MinNode();
+ for (uint64_t i = 1; i <= b; i++) {
+ y = x;
+ x = _tree->Successor(x);
+ }
+ *size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt();
+ *offset = (rbn_offset(y) + rbn_size(y)).ToInt();
return 0;
}
}
+struct VisUnusedExtra {
+ TOKU_DB_FRAGMENTATION _report;
+ uint64_t _align;
+};
+
+static void VisUnusedCollector(void *extra,
+ MhsRbTree::Node *node,
+ uint64_t UU(depth)) {
+ struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra;
+ TOKU_DB_FRAGMENTATION report = v_e->_report;
+ uint64_t alignm = v_e->_align;
+
+ MhsRbTree::OUUInt64 offset = rbn_offset(node);
+ MhsRbTree::OUUInt64 size = rbn_size(node);
+ MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm));
+ uint64_t free_space = (offset + size - answer_offset).ToInt();
+ if (free_space > 0) {
+ report->unused_bytes += free_space;
+ report->unused_blocks++;
+ if (free_space > report->largest_unused_block) {
+ report->largest_unused_block = free_space;
+ }
+ }
+}
// Requires: report->file_size_bytes is filled in
// Requires: report->data_bytes is filled in
// Requires: report->checkpoint_bytes_additional is filled in
-void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) {
- assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional);
+void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) {
+ invariant(_n_bytes_in_use ==
+ report->data_bytes + report->checkpoint_bytes_additional);
report->unused_bytes = 0;
report->unused_blocks = 0;
report->largest_unused_block = 0;
- if (_n_blocks > 0) {
- //Deal with space before block 0 and after reserve:
- {
- struct blockpair *bp = &_blocks_array[0];
- assert(bp->offset >= align(_reserve_at_beginning, _alignment));
- uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment);
- if (free_space > 0) {
- report->unused_bytes += free_space;
- report->unused_blocks++;
- if (free_space > report->largest_unused_block) {
- report->largest_unused_block = free_space;
- }
- }
- }
-
- //Deal with space between blocks:
- for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) {
- // Consider the space after blocknum
- struct blockpair *bp = &_blocks_array[blocknum];
- uint64_t this_offset = bp[0].offset;
- uint64_t this_size = bp[0].size;
- uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
- uint64_t next_offset = bp[1].offset;
- uint64_t free_space = next_offset - end_of_this_block;
- if (free_space > 0) {
- report->unused_bytes += free_space;
- report->unused_blocks++;
- if (free_space > report->largest_unused_block) {
- report->largest_unused_block = free_space;
- }
- }
- }
-
- //Deal with space after last block
- {
- struct blockpair *bp = &_blocks_array[_n_blocks-1];
- uint64_t this_offset = bp[0].offset;
- uint64_t this_size = bp[0].size;
- uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
- if (end_of_this_block < report->file_size_bytes) {
- uint64_t free_space = report->file_size_bytes - end_of_this_block;
- assert(free_space > 0);
- report->unused_bytes += free_space;
- report->unused_blocks++;
- if (free_space > report->largest_unused_block) {
- report->largest_unused_block = free_space;
- }
- }
- }
- } else {
- // No blocks. Just the reserve.
- uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment);
- if (end_of_this_block < report->file_size_bytes) {
- uint64_t free_space = report->file_size_bytes - end_of_this_block;
- assert(free_space > 0);
- report->unused_bytes += free_space;
- report->unused_blocks++;
- if (free_space > report->largest_unused_block) {
- report->largest_unused_block = free_space;
- }
- }
- }
+ struct VisUnusedExtra extra = {report, _alignment};
+ _tree->InOrderVisitor(VisUnusedCollector, &extra);
}
-void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) {
- report->data_bytes = _n_bytes_in_use;
- report->data_blocks = _n_blocks;
+void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) {
+ report->data_bytes = _n_bytes_in_use;
+ report->data_blocks = _n_blocks;
report->file_size_bytes = 0;
report->checkpoint_bytes_additional = 0;
- get_unused_statistics(report);
+ UnusedStatistics(report);
}
-void block_allocator::validate() const {
- uint64_t n_bytes_in_use = _reserve_at_beginning;
- for (uint64_t i = 0; i < _n_blocks; i++) {
- n_bytes_in_use += _blocks_array[i].size;
- if (i > 0) {
- assert(_blocks_array[i].offset > _blocks_array[i - 1].offset);
- assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size );
- }
- }
- assert(n_bytes_in_use == _n_bytes_in_use);
-}
-
-// Tracing
-
-void block_allocator::_trace_create(void) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n",
- this, _reserve_at_beginning, _alignment);
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
- }
-}
-
-void block_allocator::_trace_create_from_blockpairs(void) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ",
- this, _reserve_at_beginning, _alignment);
- for (uint64_t i = 0; i < _n_blocks; i++) {
- fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ",
- _blocks_array[i].offset, _blocks_array[i].size);
- }
- fprintf(ba_trace_file, "\n");
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
- }
-}
-
-void block_allocator::_trace_destroy(void) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
- }
-}
-
-void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- this, size, heat, offset);
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
+struct ValidateExtra {
+ uint64_t _bytes;
+ MhsRbTree::Node *_pre_node;
+};
+static void VisUsedBlocksInOrder(void *extra,
+ MhsRbTree::Node *cur_node,
+ uint64_t UU(depth)) {
+ struct ValidateExtra *v_e = (struct ValidateExtra *)extra;
+ MhsRbTree::Node *pre_node = v_e->_pre_node;
+ // verify no overlaps
+ if (pre_node) {
+ invariant(rbn_size(pre_node) > 0);
+ invariant(rbn_offset(cur_node) >
+ rbn_offset(pre_node) + rbn_size(pre_node));
+ MhsRbTree::OUUInt64 used_space =
+ rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node));
+ v_e->_bytes += used_space.ToInt();
+ } else {
+ v_e->_bytes += rbn_offset(cur_node).ToInt();
}
+ v_e->_pre_node = cur_node;
}
-void block_allocator::_trace_free(uint64_t offset) {
- if (ba_trace_file != nullptr) {
- toku_mutex_lock(&_trace_lock);
- fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset);
- toku_mutex_unlock(&_trace_lock);
-
- fflush(ba_trace_file);
- }
+void BlockAllocator::Validate() const {
+ _tree->ValidateBalance();
+ _tree->ValidateMhs();
+ struct ValidateExtra extra = {0, nullptr};
+ _tree->InOrderVisitor(VisUsedBlocksInOrder, &extra);
+ invariant(extra._bytes == _n_bytes_in_use);
}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
index 9b2c1553e7f..648ea9a9ef2 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
@@ -43,6 +43,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "portability/toku_pthread.h"
#include "portability/toku_stdint.h"
#include "portability/toku_stdlib.h"
+#include "ft/serialize/rbtree_mhs.h"
// Block allocator.
//
@@ -51,151 +52,128 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// The allocation of block numbers is handled elsewhere.
//
// When creating a block allocator we also specify a certain-sized
-// block at the beginning that is preallocated (and cannot be allocated or freed)
+// block at the beginning that is preallocated (and cannot be allocated or
+// freed)
//
// We can allocate blocks of a particular size at a particular location.
-// We can allocate blocks of a particular size at a location chosen by the allocator.
// We can free blocks.
// We can determine the size of a block.
-
-class block_allocator {
-public:
+#define MAX_BYTE 0xffffffffffffffff
+class BlockAllocator {
+ public:
static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096;
// How much must be reserved at the beginning for the block?
- // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
+ // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1
+ // pointer for each root.
// So 4096 should be enough.
static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096;
-
- static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0,
+
+ static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT ==
+ 0,
"block allocator header must have proper alignment");
- static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
+ static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE =
+ BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
- enum allocation_strategy {
- BA_STRATEGY_FIRST_FIT = 1,
- BA_STRATEGY_BEST_FIT,
- BA_STRATEGY_PADDED_FIT,
- BA_STRATEGY_HEAT_ZONE
- };
-
- struct blockpair {
- uint64_t offset;
- uint64_t size;
- blockpair(uint64_t o, uint64_t s) :
- offset(o), size(s) {
- }
- int operator<(const struct blockpair &rhs) const {
- return offset < rhs.offset;
- }
- int operator<(const uint64_t &o) const {
- return offset < o;
+ struct BlockPair {
+ uint64_t _offset;
+ uint64_t _size;
+ BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
+ int operator<(const struct BlockPair &rhs) const {
+ return _offset < rhs._offset;
}
+ int operator<(const uint64_t &o) const { return _offset < o; }
};
- // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
- // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
+ // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
+ // bytes are not put into a block.
+ // The default allocation strategy is first fit
+ // (BA_STRATEGY_FIRST_FIT)
// All blocks be start on a multiple of ALIGNMENT.
// Aborts if we run out of memory.
// Parameters
- // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
+ // reserve_at_beginning (IN) Size of reserved block at beginning.
+ // This size does not have to be aligned.
// alignment (IN) Block alignment.
- void create(uint64_t reserve_at_beginning, uint64_t alignment);
+ void Create(uint64_t reserve_at_beginning, uint64_t alignment);
- // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
- // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
- // The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs'
+ // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
+ // bytes are not put into a block.
+ // The allocator is initialized to contain `n_blocks' of BlockPairs,
+ // taken from `pairs'
// All blocks be start on a multiple of ALIGNMENT.
// Aborts if we run out of memory.
// Parameters
// pairs, unowned array of pairs to copy
// n_blocks, Size of pairs array
- // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
+ // reserve_at_beginning (IN) Size of reserved block at beginning.
+ // This size does not have to be aligned.
// alignment (IN) Block alignment.
- void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
- struct blockpair *pairs, uint64_t n_blocks);
+ void CreateFromBlockPairs(uint64_t reserve_at_beginning,
+ uint64_t alignment,
+ struct BlockPair *pairs,
+ uint64_t n_blocks);
// Effect: Destroy this block allocator
- void destroy();
-
- // Effect: Set the allocation strategy that the allocator should use
- // Requires: No other threads are operating on this block allocator
- void set_strategy(enum allocation_strategy strategy);
+ void Destroy();
- // Effect: Allocate a block of the specified size at an address chosen by the allocator.
+ // Effect: Allocate a block of the specified size at an address chosen by
+ // the allocator.
// Aborts if anything goes wrong.
// The block address will be a multiple of the alignment.
// Parameters:
- // size (IN): The size of the block. (The size does not have to be aligned.)
+ // size (IN): The size of the block. (The size does not have to be
+ // aligned.)
// offset (OUT): The location of the block.
- // heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint)
- // Heat values are lexiographically ordered (like integers), but their specific values are arbitrary
- void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset);
+ // block soon (perhaps in the next checkpoint)
+ // Heat values are lexiographically ordered (like integers),
+ // but their specific values are arbitrary
+ void AllocBlock(uint64_t size, uint64_t *offset);
// Effect: Free the block at offset.
// Requires: There must be a block currently allocated at that offset.
// Parameters:
// offset (IN): The offset of the block.
- void free_block(uint64_t offset);
+ void FreeBlock(uint64_t offset, uint64_t size);
- // Effect: Return the size of the block that starts at offset.
- // Requires: There must be a block currently allocated at that offset.
- // Parameters:
- // offset (IN): The offset of the block.
- uint64_t block_size(uint64_t offset);
-
- // Effect: Check to see if the block allocator is OK. This may take a long time.
+ // Effect: Check to see if the block allocator is OK. This may take a long
+ // time.
// Usage Hints: Probably only use this for unit tests.
// TODO: Private?
- void validate() const;
+ void Validate() const;
// Effect: Return the unallocated block address of "infinite" size.
- // That is, return the smallest address that is above all the allocated blocks.
- uint64_t allocated_limit() const;
+ // That is, return the smallest address that is above all the allocated
+ // blocks.
+ uint64_t AllocatedLimit() const;
- // Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
+ // Effect: Consider the blocks in sorted order. The reserved block at the
+ // beginning is number 0. The next one is number 1 and so forth.
// Return the offset and size of the block with that number.
// Return 0 if there is a block that big, return nonzero if b is too big.
// Rationale: This is probably useful only for tests.
- int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size);
+ int NthBlockInLayoutOrder(uint64_t b, uint64_t *offset, uint64_t *size);
// Effect: Fill in report to indicate how the file is used.
- // Requires:
+ // Requires:
// report->file_size_bytes is filled in
// report->data_bytes is filled in
// report->checkpoint_bytes_additional is filled in
- void get_unused_statistics(TOKU_DB_FRAGMENTATION report);
+ void UnusedStatistics(TOKU_DB_FRAGMENTATION report);
// Effect: Fill in report->data_bytes with the number of bytes in use
- // Fill in report->data_blocks with the number of blockpairs in use
+ // Fill in report->data_blocks with the number of BlockPairs in use
// Fill in unused statistics using this->get_unused_statistics()
// Requires:
// report->file_size is ignored on return
// report->checkpoint_bytes_additional is ignored on return
- void get_statistics(TOKU_DB_FRAGMENTATION report);
-
- // Block allocator tracing.
- // - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file
- // should be written to.
- // - Trace may be replayed by ba_trace_replay tool in tools/ directory
- // eg: "cat mytracefile | ba_trace_replay"
- static void maybe_initialize_trace();
- static void maybe_close_trace();
-
-private:
- void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment);
- void grow_blocks_array_by(uint64_t n_to_add);
- void grow_blocks_array();
- int64_t find_block(uint64_t offset);
- struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
-
- // Tracing
- toku_mutex_t _trace_lock;
- void _trace_create(void);
- void _trace_create_from_blockpairs(void);
- void _trace_destroy(void);
- void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset);
- void _trace_free(uint64_t offset);
+ void Statistics(TOKU_DB_FRAGMENTATION report);
+
+ virtual ~BlockAllocator(){};
+
+ private:
+ void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment);
// How much to reserve at the beginning
uint64_t _reserve_at_beginning;
@@ -203,12 +181,8 @@ private:
uint64_t _alignment;
// How many blocks
uint64_t _n_blocks;
- // How big is the blocks_array. Must be >= n_blocks.
- uint64_t _blocks_array_size;
- // These blocks are sorted by address.
- struct blockpair *_blocks_array;
- // Including the reserve_at_beginning
uint64_t _n_bytes_in_use;
- // The allocation strategy are we using
- enum allocation_strategy _strategy;
+
+ // These blocks are sorted by address.
+ MhsRbTree::Tree *_tree;
};
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
deleted file mode 100644
index 62bb8fc4a87..00000000000
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License, version 2,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License, version 3,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-#include <algorithm>
-
-#include <string.h>
-
-#include "portability/toku_assert.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
- return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
-}
-
-static uint64_t _roundup_to_power_of_two(uint64_t value) {
- uint64_t r = 4096;
- while (r < value) {
- r *= 2;
- invariant(r > 0);
- }
- return r;
-}
-
-// First fit block allocation
-static struct block_allocator::blockpair *
-_first_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment,
- uint64_t max_padding) {
- if (n_blocks == 1) {
- // won't enter loop, can't underflow the direction < 0 case
- return nullptr;
- }
-
- struct block_allocator::blockpair *bp = &blocks_array[0];
- for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
- n_spaces_to_check--, bp++) {
- // Consider the space after bp
- uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
- uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
- if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1]
- invariant(bp - blocks_array < (int64_t) n_blocks);
- return bp;
- }
- }
- return nullptr;
-}
-
-static struct block_allocator::blockpair *
-_first_fit_bw(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment,
- uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) {
- if (n_blocks == 1) {
- // won't enter loop, can't underflow the direction < 0 case
- return nullptr;
- }
-
- struct block_allocator::blockpair *bp = &blocks_array[-1];
- for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
- n_spaces_to_check--, bp--) {
- // Consider the space after bp
- uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
- uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
- if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) {
- invariant(blocks_array - bp < (int64_t) n_blocks);
- return bp;
- }
- }
- return nullptr;
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment) {
- return _first_fit(blocks_array, n_blocks, size, alignment, 0);
-}
-
-// Best fit block allocation
-struct block_allocator::blockpair *
-block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment) {
- struct block_allocator::blockpair *best_bp = nullptr;
- uint64_t best_hole_size = 0;
- for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
- // Consider the space after blocknum
- struct block_allocator::blockpair *bp = &blocks_array[blocknum];
- uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
- uint64_t possible_end_offset = possible_offset + size;
- if (possible_end_offset <= bp[1].offset) {
- // It fits here. Is it the best fit?
- uint64_t hole_size = bp[1].offset - possible_end_offset;
- if (best_bp == nullptr || hole_size < best_hole_size) {
- best_hole_size = hole_size;
- best_bp = bp;
- }
- }
- }
- return best_bp;
-}
-
-static uint64_t padded_fit_alignment = 4096;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-// portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_padded_fit_alignment_from_env(void) {
- // TODO: Should be in portability as 'toku_os_getenv()?'
- const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
- if (s != nullptr && strlen(s) > 0) {
- const int64_t alignment = strtoll(s, nullptr, 10);
- if (alignment <= 0) {
- fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
- "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n",
- s, padded_fit_alignment);
- } else {
- padded_fit_alignment = _roundup_to_power_of_two(alignment);
- fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n",
- padded_fit_alignment);
- }
- }
-}
-
-// First fit into a block that is oversized by up to max_padding.
-// The hope is that if we purposefully waste a bit of space at allocation
-// time we'll be more likely to reuse this block later.
-struct block_allocator::blockpair *
-block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment) {
- return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment);
-}
-
-static double hot_zone_threshold = 0.85;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-// portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_hot_zone_threshold_from_env(void) {
- // TODO: Should be in portability as 'toku_os_getenv()?'
- const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
- if (s != nullptr && strlen(s) > 0) {
- const double hot_zone = strtod(s, nullptr);
- if (hot_zone < 1 || hot_zone > 99) {
- fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "
- "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s);
- hot_zone_threshold = 85 / 100;
- } else {
- fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s);
- hot_zone_threshold = hot_zone / 100;
- }
- }
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment,
- uint64_t heat) {
- if (heat > 0) {
- struct block_allocator::blockpair *bp, *boundary_bp;
-
- // Hot allocation. Find the beginning of the hot zone.
- boundary_bp = &blocks_array[n_blocks - 1];
- uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment);
- uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
-
- boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
- uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp;
- uint64_t blocks_outside_zone = boundary_bp - blocks_array;
- invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
-
- if (blocks_in_zone > 0) {
- // Find the first fit in the hot zone, going forward.
- bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0);
- if (bp != nullptr) {
- return bp;
- }
- }
- if (blocks_outside_zone > 0) {
- // Find the first fit in the cold zone, going backwards.
- bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]);
- if (bp != nullptr) {
- return bp;
- }
- }
- } else {
- // Cold allocations are simply first-fit from the beginning.
- return _first_fit(blocks_array, n_blocks, size, alignment, 0);
- }
- return nullptr;
-}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
index 7101ba9f58c..d2532134d96 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
@@ -46,31 +46,27 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "ft/ft-internal.h"
// TODO: reorganize this dependency (FT-303)
-#include "ft/ft-ops.h" // for toku_maybe_truncate_file
+#include "ft/ft-ops.h" // for toku_maybe_truncate_file
#include "ft/serialize/block_table.h"
#include "ft/serialize/rbuf.h"
#include "ft/serialize/wbuf.h"
#include "ft/serialize/block_allocator.h"
-
#include "util/nb_mutex.h"
#include "util/scoped_malloc.h"
// indicates the end of a freelist
-static const BLOCKNUM freelist_null = { -1 };
+static const BLOCKNUM freelist_null = {-1};
// value of block_translation_pair.size if blocknum is unused
-static const DISKOFF size_is_free = (DISKOFF) -1;
+static const DISKOFF size_is_free = (DISKOFF)-1;
-// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock
-static const DISKOFF diskoff_unused = (DISKOFF) -2;
+// value of block_translation_pair.u.diskoff if blocknum is used but does not
+// yet have a diskblock
+static const DISKOFF diskoff_unused = (DISKOFF)-2;
-void block_table::_mutex_lock() {
- toku_mutex_lock(&_mutex);
-}
+void block_table::_mutex_lock() { toku_mutex_lock(&_mutex); }
-void block_table::_mutex_unlock() {
- toku_mutex_unlock(&_mutex);
-}
+void block_table::_mutex_unlock() { toku_mutex_unlock(&_mutex); }
// TODO: Move lock to FT
void toku_ft_lock(FT ft) {
@@ -85,13 +81,16 @@ void toku_ft_unlock(FT ft) {
bt->_mutex_unlock();
}
-// There are two headers: the reserve must fit them both and be suitably aligned.
-static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
- block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0,
+// There are two headers: the reserve must fit them both and be suitably
+// aligned.
+static_assert(BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
+ BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT ==
+ 0,
"Block allocator's header reserve must be suitibly aligned");
-static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
- block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
- "Block allocator's total header reserve must exactly fit two headers");
+static_assert(
+ BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+ "Block allocator's total header reserve must exactly fit two headers");
// does NOT initialize the block allocator: the caller is responsible
void block_table::_create_internal() {
@@ -99,25 +98,30 @@ void block_table::_create_internal() {
memset(&_inprogress, 0, sizeof(struct translation));
memset(&_checkpointed, 0, sizeof(struct translation));
memset(&_mutex, 0, sizeof(_mutex));
+ _bt_block_allocator = new BlockAllocator();
toku_mutex_init(&_mutex, nullptr);
nb_mutex_init(&_safe_file_size_lock);
}
-// Fill in the checkpointed translation from buffer, and copy checkpointed to current.
-// The one read from disk is the last known checkpointed one, so we are keeping it in
-// place and then setting current (which is never stored on disk) for current use.
-// The translation_buffer has translation only, we create the rest of the block_table.
-int block_table::create_from_buffer(int fd,
- DISKOFF location_on_disk, //Location of translation_buffer
- DISKOFF size_on_disk,
- unsigned char *translation_buffer) {
+// Fill in the checkpointed translation from buffer, and copy checkpointed to
+// current.
+// The one read from disk is the last known checkpointed one, so we are keeping
+// it in
+// place and then setting current (which is never stored on disk) for current
+// use.
+// The translation_buffer has translation only, we create the rest of the
+// block_table.
+int block_table::create_from_buffer(
+ int fd,
+ DISKOFF location_on_disk, // Location of translation_buffer
+ DISKOFF size_on_disk,
+ unsigned char *translation_buffer) {
// Does not initialize the block allocator
_create_internal();
// Deserialize the translation and copy it to current
- int r = _translation_deserialize_from_buffer(&_checkpointed,
- location_on_disk, size_on_disk,
- translation_buffer);
+ int r = _translation_deserialize_from_buffer(
+ &_checkpointed, location_on_disk, size_on_disk, translation_buffer);
if (r != 0) {
return r;
}
@@ -130,22 +134,26 @@ int block_table::create_from_buffer(int fd,
invariant(file_size >= 0);
_safe_file_size = file_size;
- // Gather the non-empty translations and use them to create the block allocator
+ // Gather the non-empty translations and use them to create the block
+ // allocator
toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b *
- sizeof(struct block_allocator::blockpair));
- struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
+ sizeof(struct BlockAllocator::BlockPair));
+ struct BlockAllocator::BlockPair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
uint64_t n_pairs = 0;
for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) {
struct block_translation_pair pair = _checkpointed.block_translation[i];
if (pair.size > 0) {
invariant(pair.u.diskoff != diskoff_unused);
- pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size);
+ pairs[n_pairs++] =
+ BlockAllocator::BlockPair(pair.u.diskoff, pair.size);
}
}
- _bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
- block_allocator::BLOCK_ALLOCATOR_ALIGNMENT,
- pairs, n_pairs);
+ _bt_block_allocator->CreateFromBlockPairs(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+ BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT,
+ pairs,
+ n_pairs);
return 0;
}
@@ -155,8 +163,10 @@ void block_table::create() {
_create_internal();
_checkpointed.type = TRANSLATION_CHECKPOINTED;
- _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS);
- _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b;
+ _checkpointed.smallest_never_used_blocknum =
+ make_blocknum(RESERVED_BLOCKNUMS);
+ _checkpointed.length_of_array =
+ _checkpointed.smallest_never_used_blocknum.b;
_checkpointed.blocknum_freelist_head = freelist_null;
XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
@@ -164,12 +174,13 @@ void block_table::create() {
_checkpointed.block_translation[i].u.diskoff = diskoff_unused;
}
- // we just created a default checkpointed, now copy it to current.
+ // we just created a default checkpointed, now copy it to current.
_copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
// Create an empty block allocator.
- _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
- block_allocator::BLOCK_ALLOCATOR_ALIGNMENT);
+ _bt_block_allocator->Create(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+ BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT);
}
// TODO: Refactor with FT-303
@@ -185,20 +196,24 @@ static void ft_set_dirty(FT ft, bool for_checkpoint) {
void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) {
toku_mutex_assert_locked(&_mutex);
- uint64_t new_size_needed = _bt_block_allocator.allocated_limit();
- //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful.
- if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) {
+ uint64_t new_size_needed = _bt_block_allocator->AllocatedLimit();
+ // Save a call to toku_os_get_file_size (kernel call) if unlikely to be
+ // useful.
+ if (new_size_needed < size_needed_before &&
+ new_size_needed < _safe_file_size) {
nb_mutex_lock(&_safe_file_size_lock, &_mutex);
// Must hold _safe_file_size_lock to change _safe_file_size.
if (new_size_needed < _safe_file_size) {
int64_t safe_file_size_before = _safe_file_size;
- // Not safe to use the 'to-be-truncated' portion until truncate is done.
+ // Not safe to use the 'to-be-truncated' portion until truncate is
+ // done.
_safe_file_size = new_size_needed;
_mutex_unlock();
uint64_t size_after;
- toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after);
+ toku_maybe_truncate_file(
+ fd, new_size_needed, safe_file_size_before, &size_after);
_mutex_lock();
_safe_file_size = size_after;
@@ -213,26 +228,35 @@ void block_table::maybe_truncate_file_on_open(int fd) {
_mutex_unlock();
}
-void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) {
- // We intend to malloc a fresh block, so the incoming translation should be empty
+void block_table::_copy_translation(struct translation *dst,
+ struct translation *src,
+ enum translation_type newtype) {
+ // We intend to malloc a fresh block, so the incoming translation should be
+ // empty
invariant_null(dst->block_translation);
invariant(src->length_of_array >= src->smallest_never_used_blocknum.b);
invariant(newtype == TRANSLATION_DEBUG ||
- (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) ||
- (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT));
+ (src->type == TRANSLATION_CURRENT &&
+ newtype == TRANSLATION_INPROGRESS) ||
+ (src->type == TRANSLATION_CHECKPOINTED &&
+ newtype == TRANSLATION_CURRENT));
dst->type = newtype;
dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum;
- dst->blocknum_freelist_head = src->blocknum_freelist_head;
+ dst->blocknum_freelist_head = src->blocknum_freelist_head;
- // destination btt is of fixed size. Allocate + memcpy the exact length necessary.
+ // destination btt is of fixed size. Allocate + memcpy the exact length
+ // necessary.
dst->length_of_array = dst->smallest_never_used_blocknum.b;
XMALLOC_N(dst->length_of_array, dst->block_translation);
- memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation));
+ memcpy(dst->block_translation,
+ src->block_translation,
+ dst->length_of_array * sizeof(*dst->block_translation));
// New version of btt is not yet stored on disk.
dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0;
- dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused;
+ dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff =
+ diskoff_unused;
}
int64_t block_table::get_blocks_in_use_unlocked() {
@@ -240,8 +264,9 @@ int64_t block_table::get_blocks_in_use_unlocked() {
struct translation *t = &_current;
int64_t num_blocks = 0;
{
- //Reserved blocknums do not get upgraded; They are part of the header.
- for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
+ // Reserved blocknums do not get upgraded; They are part of the header.
+ for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
+ b.b++) {
if (t->block_translation[b.b].size != size_is_free) {
num_blocks++;
}
@@ -251,38 +276,43 @@ int64_t block_table::get_blocks_in_use_unlocked() {
}
void block_table::_maybe_optimize_translation(struct translation *t) {
- //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just
- //on a free list. Doing so requires us to regenerate the free list.
- //This is O(n) work, so do it only if you're already doing that.
+ // Reduce 'smallest_never_used_blocknum.b' (completely free blocknums
+ // instead of just
+ // on a free list. Doing so requires us to regenerate the free list.
+ // This is O(n) work, so do it only if you're already doing that.
BLOCKNUM b;
paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
- //Calculate how large the free suffix is.
+ // Calculate how large the free suffix is.
int64_t freed;
{
- for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) {
- if (t->block_translation[b.b-1].size != size_is_free) {
+ for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS;
+ b.b--) {
+ if (t->block_translation[b.b - 1].size != size_is_free) {
break;
}
}
freed = t->smallest_never_used_blocknum.b - b.b;
}
- if (freed>0) {
+ if (freed > 0) {
t->smallest_never_used_blocknum.b = b.b;
- if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) {
- //We're using more memory than necessary to represent this now. Reduce.
+ if (t->length_of_array / 4 > t->smallest_never_used_blocknum.b) {
+ // We're using more memory than necessary to represent this now.
+ // Reduce.
uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
XREALLOC_N(new_length, t->block_translation);
t->length_of_array = new_length;
- //No need to zero anything out.
+ // No need to zero anything out.
}
- //Regenerate free list.
+ // Regenerate free list.
t->blocknum_freelist_head.b = freelist_null.b;
- for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
+ for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
+ b.b++) {
if (t->block_translation[b.b].size == size_is_free) {
- t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
- t->blocknum_freelist_head = b;
+ t->block_translation[b.b].u.next_free_blocknum =
+ t->blocknum_freelist_head;
+ t->blocknum_freelist_head = b;
}
}
}
@@ -303,14 +333,16 @@ void block_table::note_start_checkpoint_unlocked() {
}
void block_table::note_skipped_checkpoint() {
- //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header
+ // Purpose, alert block translation that the checkpoint was skipped, e.x.
+ // for a non-dirty header
_mutex_lock();
paranoid_invariant_notnull(_inprogress.block_translation);
_checkpoint_skipped = true;
_mutex_unlock();
}
-// Purpose: free any disk space used by previous checkpoint that isn't in use by either
+// Purpose: free any disk space used by previous checkpoint that isn't in use by
+// either
// - current state
// - in-progress checkpoint
// capture inprogress as new checkpointed.
@@ -323,7 +355,7 @@ void block_table::note_skipped_checkpoint() {
void block_table::note_end_checkpoint(int fd) {
// Free unused blocks
_mutex_lock();
- uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit();
+ uint64_t allocated_limit_at_start = _bt_block_allocator->AllocatedLimit();
paranoid_invariant_notnull(_inprogress.block_translation);
if (_checkpoint_skipped) {
toku_free(_inprogress.block_translation);
@@ -331,17 +363,23 @@ void block_table::note_end_checkpoint(int fd) {
goto end;
}
- //Make certain inprogress was allocated space on disk
- assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
- assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0);
+ // Make certain inprogress was allocated space on disk
+ invariant(
+ _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
+ invariant(
+ _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff >
+ 0);
{
struct translation *t = &_checkpointed;
for (int64_t i = 0; i < t->length_of_array; i++) {
struct block_translation_pair *pair = &t->block_translation[i];
- if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) {
- assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair));
- _bt_block_allocator.free_block(pair->u.diskoff);
+ if (pair->size > 0 &&
+ !_translation_prevents_freeing(
+ &_inprogress, make_blocknum(i), pair)) {
+ invariant(!_translation_prevents_freeing(
+ &_current, make_blocknum(i), pair));
+ _bt_block_allocator->FreeBlock(pair->u.diskoff, pair->size);
}
}
toku_free(_checkpointed.block_translation);
@@ -359,53 +397,65 @@ bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) {
return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b;
}
-void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+void block_table::_verify_valid_blocknum(struct translation *UU(t),
+ BLOCKNUM UU(b)) {
invariant(_is_valid_blocknum(t, b));
}
-bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) {
+bool block_table::_is_valid_freeable_blocknum(struct translation *t,
+ BLOCKNUM b) {
invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b;
}
// should be freeable
-void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t),
+ BLOCKNUM UU(b)) {
invariant(_is_valid_freeable_blocknum(t, b));
}
// Also used only in ft-serialize-test.
-void block_table::block_free(uint64_t offset) {
+void block_table::block_free(uint64_t offset, uint64_t size) {
_mutex_lock();
- _bt_block_allocator.free_block(offset);
+ _bt_block_allocator->FreeBlock(offset, size);
_mutex_unlock();
}
int64_t block_table::_calculate_size_on_disk(struct translation *t) {
- return 8 + // smallest_never_used_blocknum
- 8 + // blocknum_freelist_head
- t->smallest_never_used_blocknum.b * 16 + // Array
- 4; // 4 for checksum
+ return 8 + // smallest_never_used_blocknum
+ 8 + // blocknum_freelist_head
+ t->smallest_never_used_blocknum.b * 16 + // Array
+ 4; // 4 for checksum
}
-// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table.
-bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) {
- return t->block_translation &&
- b.b < t->smallest_never_used_blocknum.b &&
+// We cannot free the disk space allocated to this blocknum if it is still in
+// use by the given translation table.
+bool block_table::_translation_prevents_freeing(
+ struct translation *t,
+ BLOCKNUM b,
+ struct block_translation_pair *old_pair) {
+ return t->block_translation && b.b < t->smallest_never_used_blocknum.b &&
old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
}
-void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) {
+void block_table::_realloc_on_disk_internal(BLOCKNUM b,
+ DISKOFF size,
+ DISKOFF *offset,
+ FT ft,
+ bool for_checkpoint) {
toku_mutex_assert_locked(&_mutex);
ft_set_dirty(ft, for_checkpoint);
struct translation *t = &_current;
struct block_translation_pair old_pair = t->block_translation[b.b];
- //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
- bool cannot_free = (bool)
- ((!for_checkpoint && _translation_prevents_freeing(&_inprogress, b, &old_pair)) ||
- _translation_prevents_freeing(&_checkpointed, b, &old_pair));
- if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) {
- _bt_block_allocator.free_block(old_pair.u.diskoff);
+ // Free the old block if it is not still in use by the checkpoint in
+ // progress or the previous checkpoint
+ bool cannot_free =
+ (!for_checkpoint &&
+ _translation_prevents_freeing(&_inprogress, b, &old_pair)) ||
+ _translation_prevents_freeing(&_checkpointed, b, &old_pair);
+ if (!cannot_free && old_pair.u.diskoff != diskoff_unused) {
+ _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
}
uint64_t allocator_offset = diskoff_unused;
@@ -413,19 +463,22 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o
if (size > 0) {
// Allocate a new block if the size is greater than 0,
// if the size is just 0, offset will be set to diskoff_unused
- _bt_block_allocator.alloc_block(size, heat, &allocator_offset);
+ _bt_block_allocator->AllocBlock(size, &allocator_offset);
}
t->block_translation[b.b].u.diskoff = allocator_offset;
*offset = allocator_offset;
- //Update inprogress btt if appropriate (if called because Pending bit is set).
+ // Update inprogress btt if appropriate (if called because Pending bit is
+ // set).
if (for_checkpoint) {
paranoid_invariant(b.b < _inprogress.length_of_array);
_inprogress.block_translation[b.b] = t->block_translation[b.b];
}
}
-void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) {
+void block_table::_ensure_safe_write_unlocked(int fd,
+ DISKOFF block_size,
+ DISKOFF block_offset) {
// Requires: holding _mutex
uint64_t size_needed = block_size + block_offset;
if (size_needed > _safe_file_size) {
@@ -435,7 +488,8 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
_mutex_unlock();
int64_t size_after;
- toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after);
+ toku_maybe_preallocate_in_file(
+ fd, size_needed, _safe_file_size, &size_after);
_mutex_lock();
_safe_file_size = size_after;
@@ -444,11 +498,16 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
}
}
-void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) {
+void block_table::realloc_on_disk(BLOCKNUM b,
+ DISKOFF size,
+ DISKOFF *offset,
+ FT ft,
+ int fd,
+ bool for_checkpoint) {
_mutex_lock();
struct translation *t = &_current;
_verify_valid_freeable_blocknum(t, b);
- _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat);
+ _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint);
_ensure_safe_write_unlocked(fd, size, *offset);
_mutex_unlock();
@@ -458,70 +517,97 @@ bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) {
return pair->size == 0 && pair->u.diskoff == diskoff_unused;
}
-// Effect: figure out where to put the inprogress btt on disk, allocate space for it there.
-// The space must be 512-byte aligned (both the starting address and the size).
-// As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt.
+// Effect: figure out where to put the inprogress btt on disk, allocate space
+// for it there.
+// The space must be 512-byte aligned (both the starting address and the
+// size).
+// As a result, the allcoated space may be a little bit bigger (up to the next
+// 512-byte boundary) than the actual btt.
void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
toku_mutex_assert_locked(&_mutex);
struct translation *t = &_inprogress;
paranoid_invariant_notnull(t->block_translation);
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
- //Each inprogress is allocated only once
+ // Each inprogress is allocated only once
paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b]));
- //Allocate a new block
+ // Allocate a new block
int64_t size = _calculate_size_on_disk(t);
uint64_t offset;
- _bt_block_allocator.alloc_block(size, 0, &offset);
+ _bt_block_allocator->AllocBlock(size, &offset);
t->block_translation[b.b].u.diskoff = offset;
- t->block_translation[b.b].size = size;
+ t->block_translation[b.b].size = size;
}
// Effect: Serializes the blocktable to a wbuf (which starts uninitialized)
-// A clean shutdown runs checkpoint start so that current and inprogress are copies.
-// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd)
-// The address is guaranteed to be 512-byte aligned, but the size is not guaranteed.
-// It *is* guaranteed that we can read up to the next 512-byte boundary, however
-void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w,
- int64_t *address, int64_t *size) {
+// A clean shutdown runs checkpoint start so that current and inprogress are
+// copies.
+// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the
+// total length is a multiple of 512 (so we pad with zeros at the end if
+// needd)
+// The address is guaranteed to be 512-byte aligned, but the size is not
+// guaranteed.
+// It *is* guaranteed that we can read up to the next 512-byte boundary,
+// however
+void block_table::serialize_translation_to_wbuf(int fd,
+ struct wbuf *w,
+ int64_t *address,
+ int64_t *size) {
_mutex_lock();
struct translation *t = &_inprogress;
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
- _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy.
+ _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block
+ // must be 512-byte
+ // aligned to make
+ // O_DIRECT happy.
uint64_t size_translation = _calculate_size_on_disk(t);
- uint64_t size_aligned = roundup_to_multiple(512, size_translation);
- assert((int64_t)size_translation==t->block_translation[b.b].size);
+ uint64_t size_aligned = roundup_to_multiple(512, size_translation);
+ invariant((int64_t)size_translation == t->block_translation[b.b].size);
{
- //Init wbuf
+ // Init wbuf
if (0)
- printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff);
+ printf(
+ "%s:%d writing translation table of size_translation %" PRIu64
+ " at %" PRId64 "\n",
+ __FILE__,
+ __LINE__,
+ size_translation,
+ t->block_translation[b.b].u.diskoff);
char *XMALLOC_N_ALIGNED(512, size_aligned, buf);
- for (uint64_t i=size_translation; i<size_aligned; i++) buf[i]=0; // fill in the end of the buffer with zeros.
+ for (uint64_t i = size_translation; i < size_aligned; i++)
+ buf[i] = 0; // fill in the end of the buffer with zeros.
wbuf_init(w, buf, size_aligned);
}
- wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum);
- wbuf_BLOCKNUM(w, t->blocknum_freelist_head);
+ wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum);
+ wbuf_BLOCKNUM(w, t->blocknum_freelist_head);
int64_t i;
- for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
+ for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
if (0)
- printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+ printf("%s:%d %" PRId64 ",%" PRId64 "\n",
+ __FILE__,
+ __LINE__,
+ t->block_translation[i].u.diskoff,
+ t->block_translation[i].size);
wbuf_DISKOFF(w, t->block_translation[i].u.diskoff);
wbuf_DISKOFF(w, t->block_translation[i].size);
}
uint32_t checksum = toku_x1764_finish(&w->checksum);
wbuf_int(w, checksum);
*address = t->block_translation[b.b].u.diskoff;
- *size = size_translation;
- assert((*address)%512 == 0);
+ *size = size_translation;
+ invariant((*address) % 512 == 0);
_ensure_safe_write_unlocked(fd, size_aligned, *address);
_mutex_unlock();
}
-// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+// Perhaps rename: purpose is get disk address of a block, given its blocknum
+// (blockid?)
+void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
+ DISKOFF *offset,
+ DISKOFF *size) {
struct translation *t = &_current;
_verify_valid_blocknum(t, b);
if (offset) {
@@ -532,8 +618,11 @@ void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOF
}
}
-// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+// Perhaps rename: purpose is get disk address of a block, given its blocknum
+// (blockid?)
+void block_table::translate_blocknum_to_offset_size(BLOCKNUM b,
+ DISKOFF *offset,
+ DISKOFF *size) {
_mutex_lock();
_translate_blocknum_to_offset_size_unlocked(b, offset, size);
_mutex_unlock();
@@ -544,13 +633,13 @@ void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset,
// given that one more never-used blocknum will soon be used.
void block_table::_maybe_expand_translation(struct translation *t) {
if (t->length_of_array <= t->smallest_never_used_blocknum.b) {
- //expansion is necessary
+ // expansion is necessary
uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
XREALLOC_N(new_length, t->block_translation);
uint64_t i;
for (i = t->length_of_array; i < new_length; i++) {
t->block_translation[i].u.next_free_blocknum = freelist_null;
- t->block_translation[i].size = size_is_free;
+ t->block_translation[i].size = size_is_free;
}
t->length_of_array = new_length;
}
@@ -563,7 +652,8 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
if (t->blocknum_freelist_head.b == freelist_null.b) {
// no previously used blocknums are available
// use a never used blocknum
- _maybe_expand_translation(t); //Ensure a never used blocknums is available
+ _maybe_expand_translation(
+ t); // Ensure a never used blocknums is available
result = t->smallest_never_used_blocknum;
t->smallest_never_used_blocknum.b++;
} else { // reuse a previously used blocknum
@@ -571,11 +661,11 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum;
t->blocknum_freelist_head = next;
}
- //Verify the blocknum is free
+ // Verify the blocknum is free
paranoid_invariant(t->block_translation[result.b].size == size_is_free);
- //blocknum is not free anymore
+ // blocknum is not free anymore
t->block_translation[result.b].u.diskoff = diskoff_unused;
- t->block_translation[result.b].size = 0;
+ t->block_translation[result.b].size = 0;
_verify_valid_freeable_blocknum(t, result);
*res = result;
ft_set_dirty(ft, false);
@@ -587,42 +677,46 @@ void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) {
_mutex_unlock();
}
-void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) {
+void block_table::_free_blocknum_in_translation(struct translation *t,
+ BLOCKNUM b) {
_verify_valid_freeable_blocknum(t, b);
paranoid_invariant(t->block_translation[b.b].size != size_is_free);
- t->block_translation[b.b].size = size_is_free;
+ t->block_translation[b.b].size = size_is_free;
t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
- t->blocknum_freelist_head = b;
+ t->blocknum_freelist_head = b;
}
// Effect: Free a blocknum.
// If the blocknum holds the only reference to a block on disk, free that block
-void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) {
+void block_table::_free_blocknum_unlocked(BLOCKNUM *bp,
+ FT ft,
+ bool for_checkpoint) {
toku_mutex_assert_locked(&_mutex);
BLOCKNUM b = *bp;
- bp->b = 0; //Remove caller's reference.
+ bp->b = 0; // Remove caller's reference.
struct block_translation_pair old_pair = _current.block_translation[b.b];
_free_blocknum_in_translation(&_current, b);
if (for_checkpoint) {
- paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
+ paranoid_invariant(ft->checkpoint_header->type ==
+ FT_CHECKPOINT_INPROGRESS);
_free_blocknum_in_translation(&_inprogress, b);
}
- //If the size is 0, no disk block has ever been assigned to this blocknum.
+ // If the size is 0, no disk block has ever been assigned to this blocknum.
if (old_pair.size > 0) {
- //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
- bool cannot_free = (bool)
- (_translation_prevents_freeing(&_inprogress, b, &old_pair) ||
- _translation_prevents_freeing(&_checkpointed, b, &old_pair));
+ // Free the old block if it is not still in use by the checkpoint in
+ // progress or the previous checkpoint
+ bool cannot_free =
+ _translation_prevents_freeing(&_inprogress, b, &old_pair) ||
+ _translation_prevents_freeing(&_checkpointed, b, &old_pair);
if (!cannot_free) {
- _bt_block_allocator.free_block(old_pair.u.diskoff);
+ _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
}
- }
- else {
- paranoid_invariant(old_pair.size==0);
+ } else {
+ paranoid_invariant(old_pair.size == 0);
paranoid_invariant(old_pair.u.diskoff == diskoff_unused);
}
ft_set_dirty(ft, for_checkpoint);
@@ -644,13 +738,14 @@ void block_table::verify_no_free_blocknums() {
void block_table::free_unused_blocknums(BLOCKNUM root) {
_mutex_lock();
int64_t smallest = _current.smallest_never_used_blocknum.b;
- for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) {
+ for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) {
if (i == root.b) {
continue;
}
BLOCKNUM b = make_blocknum(i);
if (_current.block_translation[b.b].size == 0) {
- invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused);
+ invariant(_current.block_translation[b.b].u.diskoff ==
+ diskoff_unused);
_free_blocknum_in_translation(&_current, b);
}
}
@@ -675,13 +770,14 @@ bool block_table::_no_data_blocks_except_root(BLOCKNUM root) {
goto cleanup;
}
}
- cleanup:
+cleanup:
_mutex_unlock();
return ok;
}
// Verify there are no data blocks except root.
-// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now.
+// TODO(leif): This actually takes a lock, but I don't want to fix all the
+// callers right now.
void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) {
paranoid_invariant(_no_data_blocks_except_root(root));
}
@@ -705,13 +801,24 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
if (t->block_translation) {
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array);
- fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b);
- fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b);
- fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
- fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff);
+ fprintf(f,
+ " smallest_never_used_blocknum[%" PRId64 "]",
+ t->smallest_never_used_blocknum.b);
+ fprintf(f,
+ " blocknum_free_list_head[%" PRId64 "]",
+ t->blocknum_freelist_head.b);
+ fprintf(
+ f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
+ fprintf(f,
+ " location_on_disk[%" PRId64 "]\n",
+ t->block_translation[b.b].u.diskoff);
int64_t i;
- for (i=0; i<t->length_of_array; i++) {
- fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+ for (i = 0; i < t->length_of_array; i++) {
+ fprintf(f,
+ " %" PRId64 ": %" PRId64 " %" PRId64 "\n",
+ i,
+ t->block_translation[i].u.diskoff,
+ t->block_translation[i].size);
}
fprintf(f, "\n");
} else {
@@ -724,9 +831,13 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
void block_table::dump_translation_table_pretty(FILE *f) {
_mutex_lock();
struct translation *t = &_checkpointed;
- assert(t->block_translation != nullptr);
+ invariant(t->block_translation != nullptr);
for (int64_t i = 0; i < t->length_of_array; ++i) {
- fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+ fprintf(f,
+ "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n",
+ i,
+ t->block_translation[i].u.diskoff,
+ t->block_translation[i].size);
}
_mutex_unlock();
}
@@ -750,7 +861,10 @@ void block_table::blocknum_dump_translation(BLOCKNUM b) {
struct translation *t = &_current;
if (b.b < t->length_of_array) {
struct block_translation_pair *bx = &t->block_translation[b.b];
- printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size);
+ printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n",
+ b.b,
+ bx->u.diskoff,
+ bx->size);
}
_mutex_unlock();
}
@@ -763,26 +877,31 @@ void block_table::destroy(void) {
toku_free(_inprogress.block_translation);
toku_free(_checkpointed.block_translation);
- _bt_block_allocator.destroy();
+ _bt_block_allocator->Destroy();
+ delete _bt_block_allocator;
toku_mutex_destroy(&_mutex);
nb_mutex_destroy(&_safe_file_size_lock);
}
-int block_table::_translation_deserialize_from_buffer(struct translation *t,
- DISKOFF location_on_disk,
- uint64_t size_on_disk,
- // out: buffer with serialized translation
- unsigned char *translation_buffer) {
+int block_table::_translation_deserialize_from_buffer(
+ struct translation *t,
+ DISKOFF location_on_disk,
+ uint64_t size_on_disk,
+ // out: buffer with serialized translation
+ unsigned char *translation_buffer) {
int r = 0;
- assert(location_on_disk != 0);
+ invariant(location_on_disk != 0);
t->type = TRANSLATION_CHECKPOINTED;
// check the checksum
uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4);
uint64_t offset = size_on_disk - 4;
- uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset));
+ uint32_t stored_x1764 = toku_dtoh32(*(int *)(translation_buffer + offset));
if (x1764 != stored_x1764) {
- fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764);
+ fprintf(stderr,
+ "Translation table checksum failure: calc=0x%08x read=0x%08x\n",
+ x1764,
+ stored_x1764);
r = TOKUDB_BAD_CHECKSUM;
goto exit;
}
@@ -790,42 +909,47 @@ int block_table::_translation_deserialize_from_buffer(struct translation *t,
struct rbuf rb;
rb.buf = translation_buffer;
rb.ndone = 0;
- rb.size = size_on_disk-4;//4==checksum
+ rb.size = size_on_disk - 4; // 4==checksum
- t->smallest_never_used_blocknum = rbuf_blocknum(&rb);
+ t->smallest_never_used_blocknum = rbuf_blocknum(&rb);
t->length_of_array = t->smallest_never_used_blocknum.b;
invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
- t->blocknum_freelist_head = rbuf_blocknum(&rb);
+ t->blocknum_freelist_head = rbuf_blocknum(&rb);
XMALLOC_N(t->length_of_array, t->block_translation);
for (int64_t i = 0; i < t->length_of_array; i++) {
t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb);
t->block_translation[i].size = rbuf_DISKOFF(&rb);
}
- invariant(_calculate_size_on_disk(t) == (int64_t) size_on_disk);
- invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk);
- invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk);
+ invariant(_calculate_size_on_disk(t) == (int64_t)size_on_disk);
+ invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size ==
+ (int64_t)size_on_disk);
+ invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff ==
+ location_on_disk);
exit:
return r;
}
int block_table::iterate(enum translation_type type,
- BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) {
+ BLOCKTABLE_CALLBACK f,
+ void *extra,
+ bool data_only,
+ bool used_only) {
struct translation *src;
-
+
int r = 0;
switch (type) {
- case TRANSLATION_CURRENT:
- src = &_current;
- break;
- case TRANSLATION_INPROGRESS:
- src = &_inprogress;
- break;
- case TRANSLATION_CHECKPOINTED:
- src = &_checkpointed;
- break;
- default:
- r = EINVAL;
+ case TRANSLATION_CURRENT:
+ src = &_current;
+ break;
+ case TRANSLATION_INPROGRESS:
+ src = &_inprogress;
+ break;
+ case TRANSLATION_CHECKPOINTED:
+ src = &_checkpointed;
+ break;
+ default:
+ r = EINVAL;
}
struct translation fakecurrent;
@@ -839,12 +963,15 @@ int block_table::iterate(enum translation_type type,
src->block_translation[RESERVED_BLOCKNUM_TRANSLATION];
_mutex_unlock();
int64_t i;
- for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
+ for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
struct block_translation_pair pair = t->block_translation[i];
- if (data_only && i< RESERVED_BLOCKNUMS) continue;
- if (used_only && pair.size <= 0) continue;
+ if (data_only && i < RESERVED_BLOCKNUMS)
+ continue;
+ if (used_only && pair.size <= 0)
+ continue;
r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra);
- if (r!=0) break;
+ if (r != 0)
+ break;
}
toku_free(t->block_translation);
}
@@ -856,8 +983,11 @@ typedef struct {
int64_t total_space;
} frag_extra;
-static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) {
- frag_extra *info = (frag_extra *) extra;
+static int frag_helper(BLOCKNUM UU(b),
+ int64_t size,
+ int64_t address,
+ void *extra) {
+ frag_extra *info = (frag_extra *)extra;
if (size + address > info->total_space)
info->total_space = size + address;
@@ -865,22 +995,30 @@ static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extr
return 0;
}
-void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) {
- frag_extra info = { 0, 0 };
+void block_table::internal_fragmentation(int64_t *total_sizep,
+ int64_t *used_sizep) {
+ frag_extra info = {0, 0};
int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true);
- assert_zero(r);
+ invariant_zero(r);
- if (total_sizep) *total_sizep = info.total_space;
- if (used_sizep) *used_sizep = info.used_space;
+ if (total_sizep)
+ *total_sizep = info.total_space;
+ if (used_sizep)
+ *used_sizep = info.used_space;
}
-void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) {
+void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size,
+ DISKOFF *offset,
+ FT ft) {
toku_mutex_assert_locked(&_mutex);
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
- _realloc_on_disk_internal(b, size, offset, ft, false, 0);
+ _realloc_on_disk_internal(b, size, offset, ft, false);
}
-void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) {
+void block_table::realloc_descriptor_on_disk(DISKOFF size,
+ DISKOFF *offset,
+ FT ft,
+ int fd) {
_mutex_lock();
_realloc_descriptor_on_disk_unlocked(size, offset, ft);
_ensure_safe_write_unlocked(fd, size, *offset);
@@ -897,11 +1035,12 @@ void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) {
void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
// Requires: blocktable lock is held.
// Requires: report->file_size_bytes is already filled in.
-
+
// Count the headers.
- report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ report->data_bytes = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
report->data_blocks = 1;
- report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ report->checkpoint_bytes_additional =
+ BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
report->checkpoint_blocks_additional = 1;
struct translation *current = &_current;
@@ -915,30 +1054,34 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
struct translation *checkpointed = &_checkpointed;
for (int64_t i = 0; i < checkpointed->length_of_array; i++) {
- struct block_translation_pair *pair = &checkpointed->block_translation[i];
- if (pair->size > 0 && !(i < current->length_of_array &&
- current->block_translation[i].size > 0 &&
- current->block_translation[i].u.diskoff == pair->u.diskoff)) {
- report->checkpoint_bytes_additional += pair->size;
- report->checkpoint_blocks_additional++;
+ struct block_translation_pair *pair =
+ &checkpointed->block_translation[i];
+ if (pair->size > 0 &&
+ !(i < current->length_of_array &&
+ current->block_translation[i].size > 0 &&
+ current->block_translation[i].u.diskoff == pair->u.diskoff)) {
+ report->checkpoint_bytes_additional += pair->size;
+ report->checkpoint_blocks_additional++;
}
}
struct translation *inprogress = &_inprogress;
for (int64_t i = 0; i < inprogress->length_of_array; i++) {
struct block_translation_pair *pair = &inprogress->block_translation[i];
- if (pair->size > 0 && !(i < current->length_of_array &&
- current->block_translation[i].size > 0 &&
- current->block_translation[i].u.diskoff == pair->u.diskoff) &&
- !(i < checkpointed->length_of_array &&
- checkpointed->block_translation[i].size > 0 &&
- checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) {
+ if (pair->size > 0 &&
+ !(i < current->length_of_array &&
+ current->block_translation[i].size > 0 &&
+ current->block_translation[i].u.diskoff == pair->u.diskoff) &&
+ !(i < checkpointed->length_of_array &&
+ checkpointed->block_translation[i].size > 0 &&
+ checkpointed->block_translation[i].u.diskoff ==
+ pair->u.diskoff)) {
report->checkpoint_bytes_additional += pair->size;
report->checkpoint_blocks_additional++;
}
}
- _bt_block_allocator.get_unused_statistics(report);
+ _bt_block_allocator->UnusedStatistics(report);
}
void block_table::get_info64(struct ftinfo64 *s) {
@@ -967,25 +1110,38 @@ void block_table::get_info64(struct ftinfo64 *s) {
_mutex_unlock();
}
-int block_table::iterate_translation_tables(uint64_t checkpoint_count,
- int (*iter)(uint64_t checkpoint_count,
- int64_t total_num_rows,
- int64_t blocknum,
- int64_t diskoff,
- int64_t size,
- void *extra),
- void *iter_extra) {
+int block_table::iterate_translation_tables(
+ uint64_t checkpoint_count,
+ int (*iter)(uint64_t checkpoint_count,
+ int64_t total_num_rows,
+ int64_t blocknum,
+ int64_t diskoff,
+ int64_t size,
+ void *extra),
+ void *iter_extra) {
int error = 0;
_mutex_lock();
- int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array;
+ int64_t total_num_rows =
+ _current.length_of_array + _checkpointed.length_of_array;
for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) {
struct block_translation_pair *block = &_current.block_translation[i];
- error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
+ error = iter(checkpoint_count,
+ total_num_rows,
+ i,
+ block->u.diskoff,
+ block->size,
+ iter_extra);
}
for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) {
- struct block_translation_pair *block = &_checkpointed.block_translation[i];
- error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
+ struct block_translation_pair *block =
+ &_checkpointed.block_translation[i];
+ error = iter(checkpoint_count - 1,
+ total_num_rows,
+ i,
+ block->u.diskoff,
+ block->size,
+ iter_extra);
}
_mutex_unlock();
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.h b/storage/tokudb/PerconaFT/ft/serialize/block_table.h
index 8d391674540..dd732d4f372 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.h
@@ -62,13 +62,16 @@ enum {
RESERVED_BLOCKNUMS
};
-typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
+typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b,
+ int64_t size,
+ int64_t address,
+ void *extra);
static inline BLOCKNUM make_blocknum(int64_t b) {
- BLOCKNUM result = { .b = b };
+ BLOCKNUM result = {.b = b};
return result;
}
-static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
+static const BLOCKNUM ROLLBACK_NONE = {.b = 0};
/**
* There are three copies of the translation table (btt) in the block table:
@@ -80,18 +83,20 @@ static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
*
* inprogress Is only filled by copying from current,
* and is the only version ever serialized to disk.
- * (It is serialized to disk on checkpoint and clean shutdown.)
+ * (It is serialized to disk on checkpoint and clean
+ *shutdown.)
* At end of checkpoint it replaces 'checkpointed'.
* During a checkpoint, any 'pending' dirty writes will update
* inprogress.
*
* current Is initialized by copying from checkpointed,
- * is the only version ever modified while the database is in use,
+ * is the only version ever modified while the database is in
+ *use,
* and is the only version ever copied to inprogress.
* It is never stored on disk.
*/
class block_table {
-public:
+ public:
enum translation_type {
TRANSLATION_NONE = 0,
TRANSLATION_CURRENT,
@@ -102,7 +107,10 @@ public:
void create();
- int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
+ int create_from_buffer(int fd,
+ DISKOFF location_on_disk,
+ DISKOFF size_on_disk,
+ unsigned char *translation_buffer);
void destroy();
@@ -114,11 +122,21 @@ public:
// Blocknums
void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
- void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat);
+ void realloc_on_disk(BLOCKNUM b,
+ DISKOFF size,
+ DISKOFF *offset,
+ struct ft *ft,
+ int fd,
+ bool for_checkpoint);
void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
- void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+ void translate_blocknum_to_offset_size(BLOCKNUM b,
+ DISKOFF *offset,
+ DISKOFF *size);
void free_unused_blocknums(BLOCKNUM root);
- void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
+ void realloc_descriptor_on_disk(DISKOFF size,
+ DISKOFF *offset,
+ struct ft *ft,
+ int fd);
void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size);
// External verfication
@@ -127,15 +145,22 @@ public:
void verify_no_free_blocknums();
// Serialization
- void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size);
+ void serialize_translation_to_wbuf(int fd,
+ struct wbuf *w,
+ int64_t *address,
+ int64_t *size);
// DEBUG ONLY (ftdump included), tests included
void blocknum_dump_translation(BLOCKNUM b);
void dump_translation_table_pretty(FILE *f);
void dump_translation_table(FILE *f);
- void block_free(uint64_t offset);
+ void block_free(uint64_t offset, uint64_t size);
- int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only);
+ int iterate(enum translation_type type,
+ BLOCKTABLE_CALLBACK f,
+ void *extra,
+ bool data_only,
+ bool used_only);
void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep);
// Requires: blocktable lock is held.
@@ -146,13 +171,16 @@ public:
void get_info64(struct ftinfo64 *);
- int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *);
+ int iterate_translation_tables(
+ uint64_t,
+ int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *),
+ void *);
-private:
+ private:
struct block_translation_pair {
// If in the freelist, use next_free_blocknum, otherwise diskoff.
union {
- DISKOFF diskoff;
+ DISKOFF diskoff;
BLOCKNUM next_free_blocknum;
} u;
@@ -173,7 +201,8 @@ private:
struct translation {
enum translation_type type;
- // Number of elements in array (block_translation). always >= smallest_never_used_blocknum
+ // Number of elements in array (block_translation). always >=
+ // smallest_never_used_blocknum
int64_t length_of_array;
BLOCKNUM smallest_never_used_blocknum;
@@ -181,20 +210,28 @@ private:
BLOCKNUM blocknum_freelist_head;
struct block_translation_pair *block_translation;
- // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
- // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
+ // size_on_disk is stored in
+ // block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
+ // location_on is stored in
+ // block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
};
void _create_internal();
- int _translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize
- DISKOFF location_on_disk, // location of translation_buffer
- uint64_t size_on_disk,
- unsigned char * translation_buffer); // buffer with serialized translation
-
- void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype);
+ int _translation_deserialize_from_buffer(
+ struct translation *t, // destination into which to deserialize
+ DISKOFF location_on_disk, // location of translation_buffer
+ uint64_t size_on_disk,
+ unsigned char *
+ translation_buffer); // buffer with serialized translation
+
+ void _copy_translation(struct translation *dst,
+ struct translation *src,
+ enum translation_type newtype);
void _maybe_optimize_translation(struct translation *t);
void _maybe_expand_translation(struct translation *t);
- bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair);
+ bool _translation_prevents_freeing(struct translation *t,
+ BLOCKNUM b,
+ struct block_translation_pair *old_pair);
void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b);
int64_t _calculate_size_on_disk(struct translation *t);
bool _pair_is_unallocated(struct block_translation_pair *pair);
@@ -203,14 +240,26 @@ private:
// Blocknum management
void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
- void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
- void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
- void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat);
- void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+ void _free_blocknum_unlocked(BLOCKNUM *bp,
+ struct ft *ft,
+ bool for_checkpoint);
+ void _realloc_descriptor_on_disk_unlocked(DISKOFF size,
+ DISKOFF *offset,
+ struct ft *ft);
+ void _realloc_on_disk_internal(BLOCKNUM b,
+ DISKOFF size,
+ DISKOFF *offset,
+ struct ft *ft,
+ bool for_checkpoint);
+ void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
+ DISKOFF *offset,
+ DISKOFF *size);
// File management
void _maybe_truncate_file(int fd, uint64_t size_needed_before);
- void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset);
+ void _ensure_safe_write_unlocked(int fd,
+ DISKOFF block_size,
+ DISKOFF block_offset);
// Verification
bool _is_valid_blocknum(struct translation *t, BLOCKNUM b);
@@ -220,29 +269,33 @@ private:
bool _no_data_blocks_except_root(BLOCKNUM root);
bool _blocknum_allocated(BLOCKNUM b);
- // Locking
+ // Locking
//
// TODO: Move the lock to the FT
void _mutex_lock();
void _mutex_unlock();
- // The current translation is the one used by client threads.
+ // The current translation is the one used by client threads.
// It is not represented on disk.
struct translation _current;
- // The translation used by the checkpoint currently in progress.
- // If the checkpoint thread allocates a block, it must also update the current translation.
+ // The translation used by the checkpoint currently in progress.
+ // If the checkpoint thread allocates a block, it must also update the
+ // current translation.
struct translation _inprogress;
- // The translation for the data that shall remain inviolate on disk until the next checkpoint finishes,
+ // The translation for the data that shall remain inviolate on disk until
+ // the next checkpoint finishes,
// after which any blocks used only in this translation can be freed.
struct translation _checkpointed;
- // The in-memory data structure for block allocation.
+ // The in-memory data structure for block allocation.
// There is no on-disk data structure for block allocation.
- // Note: This is *allocation* not *translation* - the block allocator is unaware of which
- // blocks are used for which translation, but simply allocates and deallocates blocks.
- block_allocator _bt_block_allocator;
+ // Note: This is *allocation* not *translation* - the block allocator is
+ // unaware of which
+ // blocks are used for which translation, but simply allocates and
+ // deallocates blocks.
+ BlockAllocator *_bt_block_allocator;
toku_mutex_t _mutex;
struct nb_mutex _safe_file_size_lock;
bool _checkpoint_skipped;
@@ -257,16 +310,16 @@ private:
#include "ft/serialize/wbuf.h"
-static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+static inline void wbuf_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
wbuf_ulonglong(w, b.b);
}
-static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+static inline void wbuf_nocrc_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
wbuf_nocrc_ulonglong(w, b.b);
}
static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) {
- wbuf_ulonglong(wb, (uint64_t) off);
+ wbuf_ulonglong(wb, (uint64_t)off);
}
#include "ft/serialize/rbuf.h"
@@ -280,6 +333,8 @@ static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) {
return result;
}
-static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) {
+static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb,
+ memarena *UU(ma),
+ BLOCKNUM *blocknum) {
*blocknum = rbuf_blocknum(rb);
}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/compress.cc b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
index 1719b6b7cb5..c2f815c6cf2 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/compress.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
@@ -235,7 +235,7 @@ void toku_decompress (Bytef *dest, uLongf destLen,
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
- char windowBits = source[1];
+ int8_t windowBits = source[1];
int r = inflateInit2(&strm, windowBits);
lazy_assert(r == Z_OK);
strm.next_out = dest;
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
index 49d4368a3ab..8fcb5293412 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
@@ -217,8 +217,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
// translation table itself won't fit in main memory.
ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read,
translation_address_on_disk);
- assert(readsz >= translation_size_on_disk);
- assert(readsz <= (ssize_t)size_to_read);
+ invariant(readsz >= translation_size_on_disk);
+ invariant(readsz <= (ssize_t)size_to_read);
}
// Create table and read in data.
r = ft->blocktable.create_from_buffer(fd,
@@ -411,73 +411,90 @@ exit:
return r;
}
-static size_t
-serialize_ft_min_size (uint32_t version) {
+static size_t serialize_ft_min_size(uint32_t version) {
size_t size = 0;
- switch(version) {
- case FT_LAYOUT_VERSION_29:
- size += sizeof(uint64_t); // logrows in ft
- case FT_LAYOUT_VERSION_28:
- size += sizeof(uint32_t); // fanout in ft
- case FT_LAYOUT_VERSION_27:
- case FT_LAYOUT_VERSION_26:
- case FT_LAYOUT_VERSION_25:
- case FT_LAYOUT_VERSION_24:
- case FT_LAYOUT_VERSION_23:
- case FT_LAYOUT_VERSION_22:
- case FT_LAYOUT_VERSION_21:
- size += sizeof(MSN); // max_msn_in_ft
- case FT_LAYOUT_VERSION_20:
- case FT_LAYOUT_VERSION_19:
- size += 1; // compression method
- size += sizeof(MSN); // highest_unused_msn_for_upgrade
- case FT_LAYOUT_VERSION_18:
- size += sizeof(uint64_t); // time_of_last_optimize_begin
- size += sizeof(uint64_t); // time_of_last_optimize_end
- size += sizeof(uint32_t); // count_of_optimize_in_progress
- size += sizeof(MSN); // msn_at_start_of_last_completed_optimize
- size -= 8; // removed num_blocks_to_upgrade_14
- size -= 8; // removed num_blocks_to_upgrade_13
- case FT_LAYOUT_VERSION_17:
- size += 16;
- invariant(sizeof(STAT64INFO_S) == 16);
- case FT_LAYOUT_VERSION_16:
- case FT_LAYOUT_VERSION_15:
- size += 4; // basement node size
- size += 8; // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14
- size += 8; // time of last verification
- case FT_LAYOUT_VERSION_14:
- size += 8; //TXNID that created
- case FT_LAYOUT_VERSION_13:
- size += ( 4 // build_id
- +4 // build_id_original
- +8 // time_of_creation
- +8 // time_of_last_modification
- );
+ switch (version) {
+ case FT_LAYOUT_VERSION_29:
+ size += sizeof(uint64_t); // logrows in ft
+ case FT_LAYOUT_VERSION_28:
+ size += sizeof(uint32_t); // fanout in ft
+ case FT_LAYOUT_VERSION_27:
+ case FT_LAYOUT_VERSION_26:
+ case FT_LAYOUT_VERSION_25:
+ case FT_LAYOUT_VERSION_24:
+ case FT_LAYOUT_VERSION_23:
+ case FT_LAYOUT_VERSION_22:
+ case FT_LAYOUT_VERSION_21:
+ size += sizeof(MSN); // max_msn_in_ft
+ case FT_LAYOUT_VERSION_20:
+ case FT_LAYOUT_VERSION_19:
+ size += 1; // compression method
+ size += sizeof(MSN); // highest_unused_msn_for_upgrade
+ case FT_LAYOUT_VERSION_18:
+ size += sizeof(uint64_t); // time_of_last_optimize_begin
+ size += sizeof(uint64_t); // time_of_last_optimize_end
+ size += sizeof(uint32_t); // count_of_optimize_in_progress
+ size += sizeof(MSN); // msn_at_start_of_last_completed_optimize
+ size -= 8; // removed num_blocks_to_upgrade_14
+ size -= 8; // removed num_blocks_to_upgrade_13
+ case FT_LAYOUT_VERSION_17:
+ size += 16;
+ invariant(sizeof(STAT64INFO_S) == 16);
+ case FT_LAYOUT_VERSION_16:
+ case FT_LAYOUT_VERSION_15:
+ size += 4; // basement node size
+ size += 8; // num_blocks_to_upgrade_14 (previously
+ // num_blocks_to_upgrade, now one int each for upgrade
+ // from 13, 14
+ size += 8; // time of last verification
+ case FT_LAYOUT_VERSION_14:
+ size += 8; // TXNID that created
+ case FT_LAYOUT_VERSION_13:
+ size += (4 // build_id
+ +
+ 4 // build_id_original
+ +
+ 8 // time_of_creation
+ +
+ 8 // time_of_last_modification
+ );
// fall through
- case FT_LAYOUT_VERSION_12:
- size += (+8 // "tokudata"
- +4 // version
- +4 // original_version
- +4 // size
- +8 // byte order verification
- +8 // checkpoint_count
- +8 // checkpoint_lsn
- +4 // tree's nodesize
- +8 // translation_size_on_disk
- +8 // translation_address_on_disk
- +4 // checksum
- +8 // Number of blocks in old version.
- +8 // diskoff
- +4 // flags
- );
- break;
- default:
- abort();
- }
-
- lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+ case FT_LAYOUT_VERSION_12:
+ size += (+8 // "tokudata"
+ +
+ 4 // version
+ +
+ 4 // original_version
+ +
+ 4 // size
+ +
+ 8 // byte order verification
+ +
+ 8 // checkpoint_count
+ +
+ 8 // checkpoint_lsn
+ +
+ 4 // tree's nodesize
+ +
+ 8 // translation_size_on_disk
+ +
+ 8 // translation_address_on_disk
+ +
+ 4 // checksum
+ +
+ 8 // Number of blocks in old version.
+ +
+ 8 // diskoff
+ +
+ 4 // flags
+ );
+ break;
+ default:
+ abort();
+ }
+
+ lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
return size;
}
@@ -486,7 +503,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
struct rbuf *rb,
uint64_t *checkpoint_count,
LSN *checkpoint_lsn,
- uint32_t * version_p)
+ uint32_t *version_p)
// Effect: Read and parse the header of a fractalal tree
//
// Simply reading the raw bytes of the header into an rbuf is insensitive
@@ -496,18 +513,18 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
// file AND the header is useless
{
int r = 0;
- const int64_t prefix_size = 8 + // magic ("tokudata")
- 4 + // version
- 4 + // build_id
- 4; // size
+ const int64_t prefix_size = 8 + // magic ("tokudata")
+ 4 + // version
+ 4 + // build_id
+ 4; // size
const int64_t read_size = roundup_to_multiple(512, prefix_size);
unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix);
rb->buf = NULL;
int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header);
if (n != read_size) {
- if (n==0) {
+ if (n == 0) {
r = TOKUDB_DICTIONARY_NO_HEADER;
- } else if (n<0) {
+ } else if (n < 0) {
r = get_error_errno();
} else {
r = EINVAL;
@@ -518,95 +535,102 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
rbuf_init(rb, prefix, prefix_size);
- //Check magic number
+ // Check magic number
const void *magic;
rbuf_literal_bytes(rb, &magic, 8);
- if (memcmp(magic,"tokudata",8)!=0) {
- if ((*(uint64_t*)magic) == 0) {
+ if (memcmp(magic, "tokudata", 8) != 0) {
+ if ((*(uint64_t *)magic) == 0) {
r = TOKUDB_DICTIONARY_NO_HEADER;
} else {
- r = EINVAL; //Not a tokudb file! Do not use.
+ r = EINVAL; // Not a tokudb file! Do not use.
}
goto exit;
}
- //Version MUST be in network order regardless of disk order.
+ // Version MUST be in network order regardless of disk order.
uint32_t version;
version = rbuf_network_int(rb);
*version_p = version;
if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) {
- r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use
+ r = TOKUDB_DICTIONARY_TOO_OLD; // Cannot use
goto exit;
} else if (version > FT_LAYOUT_VERSION) {
- r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use
+ r = TOKUDB_DICTIONARY_TOO_NEW; // Cannot use
goto exit;
}
- //build_id MUST be in network order regardless of disk order.
+ // build_id MUST be in network order regardless of disk order.
uint32_t build_id __attribute__((__unused__));
build_id = rbuf_network_int(rb);
int64_t min_header_size;
min_header_size = serialize_ft_min_size(version);
- //Size MUST be in network order regardless of disk order.
+ // Size MUST be in network order regardless of disk order.
uint32_t size;
size = rbuf_network_int(rb);
- //If too big, it is corrupt. We would probably notice during checksum
- //but may have to do a multi-gigabyte malloc+read to find out.
- //If its too small reading rbuf would crash, so verify.
- if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) {
+ // If too big, it is corrupt. We would probably notice during checksum
+ // but may have to do a multi-gigabyte malloc+read to find out.
+ // If its too small reading rbuf would crash, so verify.
+ if (size > BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE ||
+ size < min_header_size) {
r = TOKUDB_DICTIONARY_NO_HEADER;
goto exit;
}
- lazy_assert(rb->ndone==prefix_size);
+ lazy_assert(rb->ndone == prefix_size);
rb->size = size;
{
toku_free(rb->buf);
uint32_t size_to_read = roundup_to_multiple(512, size);
XMALLOC_N_ALIGNED(512, size_to_read, rb->buf);
- assert(offset_of_header%512==0);
+ invariant(offset_of_header % 512 == 0);
n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header);
if (n != size_to_read) {
if (n < 0) {
r = get_error_errno();
} else {
- r = EINVAL; //Header might be useless (wrong size) or could be a disk read error.
+ r = EINVAL; // Header might be useless (wrong size) or could be
+ // a disk read error.
}
goto exit;
}
}
- //It's version 14 or later. Magic looks OK.
- //We have an rbuf that represents the header.
- //Size is within acceptable bounds.
+ // It's version 14 or later. Magic looks OK.
+ // We have an rbuf that represents the header.
+ // Size is within acceptable bounds.
- //Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed)
+ // Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function
+ // changed)
uint32_t calculated_x1764;
- calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4);
+ calculated_x1764 = toku_x1764_memory(rb->buf, rb->size - 4);
uint32_t stored_x1764;
- stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4));
+ stored_x1764 = toku_dtoh32(*(int *)(rb->buf + rb->size - 4));
if (calculated_x1764 != stored_x1764) {
- r = TOKUDB_BAD_CHECKSUM; //Header useless
- fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764);
+ r = TOKUDB_BAD_CHECKSUM; // Header useless
+ fprintf(stderr,
+ "Header checksum failure: calc=0x%08x read=0x%08x\n",
+ calculated_x1764,
+ stored_x1764);
goto exit;
}
- //Verify byte order
+ // Verify byte order
const void *tmp_byte_order_check;
lazy_assert((sizeof toku_byte_order_host) == 8);
- rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order
+ rbuf_literal_bytes(
+ rb, &tmp_byte_order_check, 8); // Must not translate byte order
int64_t byte_order_stored;
- byte_order_stored = *(int64_t*)tmp_byte_order_check;
+ byte_order_stored = *(int64_t *)tmp_byte_order_check;
if (byte_order_stored != toku_byte_order_host) {
- r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary
+ r = TOKUDB_DICTIONARY_NO_HEADER; // Cannot use dictionary
goto exit;
}
- //Load checkpoint count
+ // Load checkpoint count
*checkpoint_count = rbuf_ulonglong(rb);
*checkpoint_lsn = rbuf_LSN(rb);
- //Restart at beginning during regular deserialization
+ // Restart at beginning during regular deserialization
rb->ndone = 0;
exit:
@@ -620,11 +644,7 @@ exit:
// Read ft from file into struct. Read both headers and use one.
// We want the latest acceptable header whose checkpoint_lsn is no later
// than max_acceptable_lsn.
-int
-toku_deserialize_ft_from(int fd,
- LSN max_acceptable_lsn,
- FT *ft)
-{
+int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft) {
struct rbuf rb_0;
struct rbuf rb_1;
uint64_t checkpoint_count_0 = 0;
@@ -638,13 +658,23 @@ toku_deserialize_ft_from(int fd,
int r0, r1, r;
toku_off_t header_0_off = 0;
- r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0);
+ r0 = deserialize_ft_from_fd_into_rbuf(fd,
+ header_0_off,
+ &rb_0,
+ &checkpoint_count_0,
+ &checkpoint_lsn_0,
+ &version_0);
if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) {
h0_acceptable = true;
}
- toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
- r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1);
+ toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ r1 = deserialize_ft_from_fd_into_rbuf(fd,
+ header_1_off,
+ &rb_1,
+ &checkpoint_count_1,
+ &checkpoint_lsn_1,
+ &version_1);
if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) {
h1_acceptable = true;
}
@@ -655,24 +685,29 @@ toku_deserialize_ft_from(int fd,
// We were unable to read either header or at least one is too
// new. Certain errors are higher priority than others. Order of
// these if/else if is important.
- if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
+ if (r0 == TOKUDB_DICTIONARY_TOO_NEW ||
+ r1 == TOKUDB_DICTIONARY_TOO_NEW) {
r = TOKUDB_DICTIONARY_TOO_NEW;
- } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) {
+ } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD ||
+ r1 == TOKUDB_DICTIONARY_TOO_OLD) {
r = TOKUDB_DICTIONARY_TOO_OLD;
} else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) {
fprintf(stderr, "Both header checksums failed.\n");
r = TOKUDB_BAD_CHECKSUM;
- } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) {
+ } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER ||
+ r1 == TOKUDB_DICTIONARY_NO_HEADER) {
r = TOKUDB_DICTIONARY_NO_HEADER;
} else {
- r = r0 ? r0 : r1; //Arbitrarily report the error from the
- //first header, unless it's readable
+ r = r0 ? r0 : r1; // Arbitrarily report the error from the
+ // first header, unless it's readable
}
- // it should not be possible for both headers to be later than the max_acceptable_lsn
- invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
- (r1==0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
- invariant(r!=0);
+ // it should not be possible for both headers to be later than the
+ // max_acceptable_lsn
+ invariant(
+ !((r0 == 0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
+ (r1 == 0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
+ invariant(r != 0);
goto exit;
}
@@ -682,8 +717,7 @@ toku_deserialize_ft_from(int fd,
invariant(version_0 >= version_1);
rb = &rb_0;
version = version_0;
- }
- else {
+ } else {
invariant(checkpoint_count_1 == checkpoint_count_0 + 1);
invariant(version_1 >= version_0);
rb = &rb_1;
@@ -692,14 +726,18 @@ toku_deserialize_ft_from(int fd,
} else if (h0_acceptable) {
if (r1 == TOKUDB_BAD_CHECKSUM) {
// print something reassuring
- fprintf(stderr, "Header 2 checksum failed, but header 1 ok. Proceeding.\n");
+ fprintf(
+ stderr,
+ "Header 2 checksum failed, but header 1 ok. Proceeding.\n");
}
rb = &rb_0;
version = version_0;
} else if (h1_acceptable) {
if (r0 == TOKUDB_BAD_CHECKSUM) {
// print something reassuring
- fprintf(stderr, "Header 1 checksum failed, but header 2 ok. Proceeding.\n");
+ fprintf(
+ stderr,
+ "Header 1 checksum failed, but header 2 ok. Proceeding.\n");
}
rb = &rb_1;
version = version_1;
@@ -718,15 +756,13 @@ exit:
return r;
}
-
-size_t toku_serialize_ft_size (FT_HEADER h) {
+size_t toku_serialize_ft_size(FT_HEADER h) {
size_t size = serialize_ft_min_size(h->layout_version);
- //There is no dynamic data.
- lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+ // There is no dynamic data.
+ lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
return size;
}
-
void toku_serialize_ft_to_wbuf (
struct wbuf *wbuf,
FT_HEADER h,
@@ -771,52 +807,60 @@ void toku_serialize_ft_to_wbuf (
}
void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) {
- lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS);
+ lazy_assert(h->type == FT_CHECKPOINT_INPROGRESS);
struct wbuf w_translation;
int64_t size_translation;
int64_t address_translation;
// Must serialize translation first, to get address,size for header.
- bt->serialize_translation_to_wbuf(fd, &w_translation,
- &address_translation,
- &size_translation);
- assert(size_translation == w_translation.ndone);
+ bt->serialize_translation_to_wbuf(
+ fd, &w_translation, &address_translation, &size_translation);
+ invariant(size_translation == w_translation.ndone);
- // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized.
- assert(w_translation.size % 512 == 0);
+ // the number of bytes available in the buffer is 0 mod 512, and those last
+ // bytes are all initialized.
+ invariant(w_translation.size % 512 == 0);
struct wbuf w_main;
- size_t size_main = toku_serialize_ft_size(h);
+ size_t size_main = toku_serialize_ft_size(h);
size_t size_main_aligned = roundup_to_multiple(512, size_main);
- assert(size_main_aligned<block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+ invariant(size_main_aligned <
+ BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
char *XMALLOC_N_ALIGNED(512, size_main_aligned, mainbuf);
- for (size_t i=size_main; i<size_main_aligned; i++) mainbuf[i]=0; // initialize the end of the buffer with zeros
+ for (size_t i = size_main; i < size_main_aligned; i++)
+ mainbuf[i] = 0; // initialize the end of the buffer with zeros
wbuf_init(&w_main, mainbuf, size_main);
- toku_serialize_ft_to_wbuf(&w_main, h, address_translation, size_translation);
+ toku_serialize_ft_to_wbuf(
+ &w_main, h, address_translation, size_translation);
lazy_assert(w_main.ndone == size_main);
// Actually write translation table
- // This write is guaranteed to read good data at the end of the buffer, since the
+ // This write is guaranteed to read good data at the end of the buffer,
+ // since the
// w_translation.buf is padded with zeros to a 512-byte boundary.
- toku_os_full_pwrite(fd, w_translation.buf, roundup_to_multiple(512, size_translation), address_translation);
-
- //Everything but the header MUST be on disk before header starts.
- //Otherwise we will think the header is good and some blocks might not
- //yet be on disk.
- //If the header has a cachefile we need to do cachefile fsync (to
- //prevent crash if we redirected to dev null)
- //If there is no cachefile we still need to do an fsync.
+ toku_os_full_pwrite(fd,
+ w_translation.buf,
+ roundup_to_multiple(512, size_translation),
+ address_translation);
+
+ // Everything but the header MUST be on disk before header starts.
+ // Otherwise we will think the header is good and some blocks might not
+ // yet be on disk.
+ // If the header has a cachefile we need to do cachefile fsync (to
+ // prevent crash if we redirected to dev null)
+ // If there is no cachefile we still need to do an fsync.
if (cf) {
toku_cachefile_fsync(cf);
- }
- else {
+ } else {
toku_file_fsync(fd);
}
- //Alternate writing header to two locations:
+ // Alternate writing header to two locations:
// Beginning (0) or BLOCK_ALLOCATOR_HEADER_RESERVE
toku_off_t main_offset;
- main_offset = (h->checkpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ main_offset = (h->checkpoint_count & 0x1)
+ ? 0
+ : BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset);
toku_free(w_main.buf);
toku_free(w_translation.buf);
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
index c4f4886b6a0..5914f8a1050 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
@@ -99,13 +99,11 @@ void toku_ft_serialize_layer_init(void) {
num_cores = toku_os_get_number_active_processors();
int r = toku_thread_pool_create(&ft_pool, num_cores);
lazy_assert_zero(r);
- block_allocator::maybe_initialize_trace();
toku_serialize_in_parallel = false;
}
void toku_ft_serialize_layer_destroy(void) {
toku_thread_pool_destroy(&ft_pool);
- block_allocator::maybe_close_trace();
}
enum { FILE_CHANGE_INCREMENT = (16 << 20) };
@@ -773,19 +771,23 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
return 0;
}
-int
-toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) {
-
+int toku_serialize_ftnode_to(int fd,
+ BLOCKNUM blocknum,
+ FTNODE node,
+ FTNODE_DISK_DATA *ndd,
+ bool do_rebalancing,
+ FT ft,
+ bool for_checkpoint) {
size_t n_to_write;
size_t n_uncompressed_bytes;
char *compressed_buf = nullptr;
- // because toku_serialize_ftnode_to is only called for
+ // because toku_serialize_ftnode_to is only called for
// in toku_ftnode_flush_callback, we pass false
// for in_parallel. The reasoning is that when we write
- // nodes to disk via toku_ftnode_flush_callback, we
+ // nodes to disk via toku_ftnode_flush_callback, we
// assume that it is being done on a non-critical
- // background thread (probably for checkpointing), and therefore
+ // background thread (probably for checkpointing), and therefore
// should not hog CPU,
//
// Should the above facts change, we may want to revisit
@@ -802,32 +804,32 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
toku_unsafe_fetch(&toku_serialize_in_parallel),
&n_to_write,
&n_uncompressed_bytes,
- &compressed_buf
- );
+ &compressed_buf);
if (r != 0) {
return r;
}
- // If the node has never been written, then write the whole buffer, including the zeros
- invariant(blocknum.b>=0);
+ // If the node has never been written, then write the whole buffer,
+ // including the zeros
+ invariant(blocknum.b >= 0);
DISKOFF offset;
// Dirties the ft
- ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
- ft, fd, for_checkpoint,
- // Allocations for nodes high in the tree are considered 'hot',
- // as they are likely to move again in the next checkpoint.
- node->height);
+ ft->blocktable.realloc_on_disk(
+ blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
tokutime_t t0 = toku_time_now();
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
tokutime_t t1 = toku_time_now();
tokutime_t io_time = t1 - t0;
- toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
+ toku_ft_status_update_flush_reason(
+ node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
toku_free(compressed_buf);
- node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+ node->dirty = 0; // See #1957. Must set the node to be clean after
+ // serializing it so that it doesn't get written again on
+ // the next checkpoint or eviction.
return 0;
}
@@ -994,6 +996,7 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
bn->seqinsert = orig_bn->seqinsert;
bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
bn->stat64_delta = orig_bn->stat64_delta;
+ bn->logical_rows_delta = orig_bn->logical_rows_delta;
bn->data_buffer.clone(&orig_bn->data_buffer);
return bn;
}
@@ -1004,6 +1007,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
bn->seqinsert = 0;
bn->stale_ancestor_messages_applied = false;
bn->stat64_delta = ZEROSTATS;
+ bn->logical_rows_delta = 0;
bn->data_buffer.init_zero();
return bn;
}
@@ -1897,7 +1901,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
/* out */ int *layout_version_p);
// This function upgrades a version 14 or 13 ftnode to the current
-// verison. NOTE: This code assumes the first field of the rbuf has
+// version. NOTE: This code assumes the first field of the rbuf has
// already been read from the buffer (namely the layout_version of the
// ftnode.)
static int
@@ -2488,9 +2492,12 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL
serialized->blocknum = log->blocknum;
}
-int
-toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
- FT ft, bool for_checkpoint) {
+int toku_serialize_rollback_log_to(int fd,
+ ROLLBACK_LOG_NODE log,
+ SERIALIZED_ROLLBACK_LOG_NODE serialized_log,
+ bool is_serialized,
+ FT ft,
+ bool for_checkpoint) {
size_t n_to_write;
char *compressed_buf;
struct serialized_rollback_log_node serialized_local;
@@ -2511,21 +2518,21 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
serialized_log->n_sub_blocks,
serialized_log->sub_block,
ft->h->compression_method,
- &n_to_write, &compressed_buf);
+ &n_to_write,
+ &compressed_buf);
// Dirties the ft
DISKOFF offset;
- ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
- ft, fd, for_checkpoint,
- // We consider rollback log flushing the hottest possible allocation,
- // since rollback logs are short-lived compared to FT nodes.
- INT_MAX);
+ ft->blocktable.realloc_on_disk(
+ blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
toku_free(compressed_buf);
if (!is_serialized) {
toku_static_serialized_rollback_log_destroy(&serialized_local);
- log->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+ log->dirty = 0; // See #1957. Must set the node to be clean after
+ // serializing it so that it doesn't get written again
+ // on the next checkpoint or eviction.
}
return 0;
}
@@ -2704,7 +2711,7 @@ exit:
}
static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
- // This function exists solely to accomodate future changes in compression.
+ // This function exists solely to accommodate future changes in compression.
int r = 0;
if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) ||
(FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) ||
diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
new file mode 100644
index 00000000000..922850fb3e0
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
@@ -0,0 +1,833 @@
+/*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILIT or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License, version 3,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "ft/serialize/rbtree_mhs.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_portability.h"
+#include <algorithm>
+
+namespace MhsRbTree {
+
+ Tree::Tree() : _root(NULL), _align(1) {}
+
+ Tree::Tree(uint64_t align) : _root(NULL), _align(align) {}
+
+ Tree::~Tree() { Destroy(); }
+
+ void Tree::PreOrder(Node *tree) const {
+ if (tree != NULL) {
+ fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+ PreOrder(tree->_left);
+ PreOrder(tree->_right);
+ }
+ }
+
+ void Tree::PreOrder() { PreOrder(_root); }
+
+ void Tree::InOrder(Node *tree) const {
+ if (tree != NULL) {
+ InOrder(tree->_left);
+ fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+ InOrder(tree->_right);
+ }
+ }
+
+ // yeah, i only care about in order visitor. -Jun
+ void Tree::InOrderVisitor(Node *tree,
+ void (*f)(void *, Node *, uint64_t),
+ void *extra,
+ uint64_t depth) {
+ if (tree != NULL) {
+ InOrderVisitor(tree->_left, f, extra, depth + 1);
+ f(extra, tree, depth);
+ InOrderVisitor(tree->_right, f, extra, depth + 1);
+ }
+ }
+
+ void Tree::InOrderVisitor(void (*f)(void *, Node *, uint64_t),
+ void *extra) {
+ InOrderVisitor(_root, f, extra, 0);
+ }
+
+ void Tree::InOrder() { InOrder(_root); }
+
+ void Tree::PostOrder(Node *tree) const {
+ if (tree != NULL) {
+ PostOrder(tree->_left);
+ PostOrder(tree->_right);
+ fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+ }
+ }
+
+ void Tree::PostOrder() { PostOrder(_root); }
+
+ Node *Tree::SearchByOffset(uint64_t offset) {
+ Node *x = _root;
+ while ((x != NULL) && (rbn_offset(x).ToInt() != offset)) {
+ if (offset < rbn_offset(x).ToInt())
+ x = x->_left;
+ else
+ x = x->_right;
+ }
+
+ return x;
+ }
+
+ // mostly for testing
+ Node *Tree::SearchFirstFitBySize(uint64_t size) {
+ if (EffectiveSize(_root) < size && rbn_left_mhs(_root) < size &&
+ rbn_right_mhs(_root) < size) {
+ return nullptr;
+ } else {
+ return SearchFirstFitBySizeHelper(_root, size);
+ }
+ }
+
+ Node *Tree::SearchFirstFitBySizeHelper(Node *x, uint64_t size) {
+ if (EffectiveSize(x) >= size) {
+ // only possible to go left
+ if (rbn_left_mhs(x) >= size)
+ return SearchFirstFitBySizeHelper(x->_left, size);
+ else
+ return x;
+ }
+ if (rbn_left_mhs(x) >= size)
+ return SearchFirstFitBySizeHelper(x->_left, size);
+
+ if (rbn_right_mhs(x) >= size)
+ return SearchFirstFitBySizeHelper(x->_right, size);
+
+ // this is an invalid state
+ Dump();
+ ValidateBalance();
+ ValidateMhs();
+ invariant(0);
+ return NULL;
+ }
+
+ Node *Tree::MinNode(Node *tree) {
+ if (tree == NULL)
+ return NULL;
+
+ while (tree->_left != NULL)
+ tree = tree->_left;
+ return tree;
+ }
+
+ Node *Tree::MinNode() { return MinNode(_root); }
+
+ Node *Tree::MaxNode(Node *tree) {
+ if (tree == NULL)
+ return NULL;
+
+ while (tree->_right != NULL)
+ tree = tree->_right;
+ return tree;
+ }
+
+ Node *Tree::MaxNode() { return MaxNode(_root); }
+
+ Node *Tree::SuccessorHelper(Node *y, Node *x) {
+ while ((y != NULL) && (x == y->_right)) {
+ x = y;
+ y = y->_parent;
+ }
+ return y;
+ }
+ Node *Tree::Successor(Node *x) {
+ if (x->_right != NULL)
+ return MinNode(x->_right);
+
+ Node *y = x->_parent;
+ return SuccessorHelper(y, x);
+ }
+
+ Node *Tree::PredecessorHelper(Node *y, Node *x) {
+ while ((y != NULL) && (x == y->_left)) {
+ x = y;
+ y = y->_parent;
+ }
+
+ return y;
+ }
+ Node *Tree::Predecessor(Node *x) {
+ if (x->_left != NULL)
+ return MaxNode(x->_left);
+
+ Node *y = x->_parent;
+ return SuccessorHelper(y, x);
+ }
+
+ /*
+ * px px
+ * / /
+ * x y
+ * / \ --(left rotation)--> / \ #
+ * lx y x ry
+ * / \ / \
+ * ly ry lx ly
+ * max_hole_size updates are pretty local
+ */
+
+ void Tree::LeftRotate(Node *&root, Node *x) {
+ Node *y = x->_right;
+
+ x->_right = y->_left;
+ rbn_right_mhs(x) = rbn_left_mhs(y);
+
+ if (y->_left != NULL)
+ y->_left->_parent = x;
+
+ y->_parent = x->_parent;
+
+ if (x->_parent == NULL) {
+ root = y;
+ } else {
+ if (x->_parent->_left == x) {
+ x->_parent->_left = y;
+ } else {
+ x->_parent->_right = y;
+ }
+ }
+ y->_left = x;
+ rbn_left_mhs(y) = mhs_of_subtree(x);
+
+ x->_parent = y;
+ }
+
+ /* py py
+ * / /
+ * y x
+ * / \ --(right rotate)--> / \ #
+ * x ry lx y
+ * / \ / \ #
+ * lx rx rx ry
+ *
+ */
+
+ void Tree::RightRotate(Node *&root, Node *y) {
+ Node *x = y->_left;
+
+ y->_left = x->_right;
+ rbn_left_mhs(y) = rbn_right_mhs(x);
+
+ if (x->_right != NULL)
+ x->_right->_parent = y;
+
+ x->_parent = y->_parent;
+
+ if (y->_parent == NULL) {
+ root = x;
+ } else {
+ if (y == y->_parent->_right)
+ y->_parent->_right = x;
+ else
+ y->_parent->_left = x;
+ }
+
+ x->_right = y;
+ rbn_right_mhs(x) = mhs_of_subtree(y);
+ y->_parent = x;
+ }
+
+ // walking from this node up to update the mhs info
+ // whenver there is change on left/right mhs or size we should recalculate.
+ // prerequisit: the children of the node are mhs up-to-date.
+ void Tree::RecalculateMhs(Node *node) {
+ uint64_t *p_node_mhs = 0;
+ Node *parent = node->_parent;
+
+ if (!parent)
+ return;
+
+ uint64_t max_mhs = mhs_of_subtree(node);
+ if (node == parent->_left) {
+ p_node_mhs = &rbn_left_mhs(parent);
+ } else if (node == parent->_right) {
+ p_node_mhs = &rbn_right_mhs(parent);
+ } else {
+ return;
+ }
+ if (*p_node_mhs != max_mhs) {
+ *p_node_mhs = max_mhs;
+ RecalculateMhs(parent);
+ }
+ }
+
+ void Tree::IsNewNodeMergable(Node *pred,
+ Node *succ,
+ Node::BlockPair pair,
+ bool *left_merge,
+ bool *right_merge) {
+ if (pred) {
+ OUUInt64 end_of_pred = rbn_size(pred) + rbn_offset(pred);
+ if (end_of_pred < pair._offset)
+ *left_merge = false;
+ else {
+ invariant(end_of_pred == pair._offset);
+ *left_merge = true;
+ }
+ }
+ if (succ) {
+ OUUInt64 begin_of_succ = rbn_offset(succ);
+ OUUInt64 end_of_node = pair._offset + pair._size;
+ if (end_of_node < begin_of_succ) {
+ *right_merge = false;
+ } else {
+ invariant(end_of_node == begin_of_succ);
+ *right_merge = true;
+ }
+ }
+ }
+
+ void Tree::AbsorbNewNode(Node *pred,
+ Node *succ,
+ Node::BlockPair pair,
+ bool left_merge,
+ bool right_merge,
+ bool is_right_child) {
+ invariant(left_merge || right_merge);
+ if (left_merge && right_merge) {
+ // merge to the succ
+ if (!is_right_child) {
+ rbn_size(succ) += pair._size;
+ rbn_offset(succ) = pair._offset;
+ // merge to the pred
+ rbn_size(pred) += rbn_size(succ);
+ // to keep the invariant of the tree -no overlapping holes
+ rbn_offset(succ) += rbn_size(succ);
+ rbn_size(succ) = 0;
+ RecalculateMhs(succ);
+ RecalculateMhs(pred);
+ // pred dominates succ. this is going to
+ // update the pred labels separately.
+ // remove succ
+ RawRemove(_root, succ);
+ } else {
+ rbn_size(pred) += pair._size;
+ rbn_offset(succ) = rbn_offset(pred);
+ rbn_size(succ) += rbn_size(pred);
+ rbn_offset(pred) += rbn_size(pred);
+ rbn_size(pred) = 0;
+ RecalculateMhs(pred);
+ RecalculateMhs(succ);
+ // now remove pred
+ RawRemove(_root, pred);
+ }
+ } else if (left_merge) {
+ rbn_size(pred) += pair._size;
+ RecalculateMhs(pred);
+ } else if (right_merge) {
+ rbn_offset(succ) -= pair._size;
+ rbn_size(succ) += pair._size;
+ RecalculateMhs(succ);
+ }
+ }
+ // this is the most tedious part, but not complicated:
+ // 1.find where to insert the pair
+ // 2.if the pred and succ can merge with the pair. merge with them. either
+ // pred
+ // or succ can be removed.
+ // 3. if only left-mergable or right-mergeable, just merge
+ // 4. non-mergable case. insert the node and run the fixup.
+
+ int Tree::Insert(Node *&root, Node::BlockPair pair) {
+ Node *x = _root;
+ Node *y = NULL;
+ bool left_merge = false;
+ bool right_merge = false;
+ Node *node = NULL;
+
+ while (x != NULL) {
+ y = x;
+ if (pair._offset < rbn_key(x))
+ x = x->_left;
+ else
+ x = x->_right;
+ }
+
+ // we found where to insert, lets find out the pred and succ for
+ // possible
+ // merges.
+ // node->parent = y;
+ Node *pred, *succ;
+ if (y != NULL) {
+ if (pair._offset < rbn_key(y)) {
+ // as the left child
+ pred = PredecessorHelper(y->_parent, y);
+ succ = y;
+ IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
+ if (left_merge || right_merge) {
+ AbsorbNewNode(
+ pred, succ, pair, left_merge, right_merge, false);
+ } else {
+ // construct the node
+ Node::Pair mhsp {0, 0};
+ node =
+ new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+ if (!node)
+ return -1;
+ y->_left = node;
+ node->_parent = y;
+ RecalculateMhs(node);
+ }
+
+ } else {
+ // as the right child
+ pred = y;
+ succ = SuccessorHelper(y->_parent, y);
+ IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
+ if (left_merge || right_merge) {
+ AbsorbNewNode(
+ pred, succ, pair, left_merge, right_merge, true);
+ } else {
+ // construct the node
+ Node::Pair mhsp {0, 0};
+ node =
+ new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+ if (!node)
+ return -1;
+ y->_right = node;
+ node->_parent = y;
+ RecalculateMhs(node);
+ }
+ }
+ } else {
+ Node::Pair mhsp {0, 0};
+ node = new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+ if (!node)
+ return -1;
+ root = node;
+ }
+ if (!left_merge && !right_merge) {
+ invariant_notnull(node);
+ node->_color = EColor::RED;
+ return InsertFixup(root, node);
+ }
+ return 0;
+ }
+
+ int Tree::InsertFixup(Node *&root, Node *node) {
+ Node *parent, *gparent;
+ while ((parent = rbn_parent(node)) && rbn_is_red(parent)) {
+ gparent = rbn_parent(parent);
+ if (parent == gparent->_left) {
+ {
+ Node *uncle = gparent->_right;
+ if (uncle && rbn_is_red(uncle)) {
+ rbn_set_black(uncle);
+ rbn_set_black(parent);
+ rbn_set_red(gparent);
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->_right == node) {
+ Node *tmp;
+ LeftRotate(root, parent);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ rbn_set_black(parent);
+ rbn_set_red(gparent);
+ RightRotate(root, gparent);
+ } else {
+ {
+ Node *uncle = gparent->_left;
+ if (uncle && rbn_is_red(uncle)) {
+ rbn_set_black(uncle);
+ rbn_set_black(parent);
+ rbn_set_red(gparent);
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->_left == node) {
+ Node *tmp;
+ RightRotate(root, parent);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+ rbn_set_black(parent);
+ rbn_set_red(gparent);
+ LeftRotate(root, gparent);
+ }
+ }
+ rbn_set_black(root);
+ return 0;
+ }
+
+ int Tree::Insert(Node::BlockPair pair) { return Insert(_root, pair); }
+
+ uint64_t Tree::Remove(size_t size) {
+ Node *node = SearchFirstFitBySize(size);
+ return Remove(_root, node, size);
+ }
+
+ void Tree::RawRemove(Node *&root, Node *node) {
+ Node *child, *parent;
+ EColor color;
+
+ if ((node->_left != NULL) && (node->_right != NULL)) {
+ Node *replace = node;
+ replace = replace->_right;
+ while (replace->_left != NULL)
+ replace = replace->_left;
+
+ if (rbn_parent(node)) {
+ if (rbn_parent(node)->_left == node)
+ rbn_parent(node)->_left = replace;
+ else
+ rbn_parent(node)->_right = replace;
+ } else {
+ root = replace;
+ }
+ child = replace->_right;
+ parent = rbn_parent(replace);
+ color = rbn_color(replace);
+
+ if (parent == node) {
+ parent = replace;
+ } else {
+ if (child)
+ rbn_parent(child) = parent;
+
+ parent->_left = child;
+ rbn_left_mhs(parent) = rbn_right_mhs(replace);
+ RecalculateMhs(parent);
+ replace->_right = node->_right;
+ rbn_set_parent(node->_right, replace);
+ rbn_right_mhs(replace) = rbn_right_mhs(node);
+ }
+
+ replace->_parent = node->_parent;
+ replace->_color = node->_color;
+ replace->_left = node->_left;
+ rbn_left_mhs(replace) = rbn_left_mhs(node);
+ node->_left->_parent = replace;
+ RecalculateMhs(replace);
+ if (color == EColor::BLACK)
+ RawRemoveFixup(root, child, parent);
+ delete node;
+ return;
+ }
+
+ if (node->_left != NULL)
+ child = node->_left;
+ else
+ child = node->_right;
+
+ parent = node->_parent;
+ color = node->_color;
+
+ if (child)
+ child->_parent = parent;
+
+ if (parent) {
+ if (parent->_left == node) {
+ parent->_left = child;
+ rbn_left_mhs(parent) = child ? mhs_of_subtree(child) : 0;
+ } else {
+ parent->_right = child;
+ rbn_right_mhs(parent) = child ? mhs_of_subtree(child) : 0;
+ }
+ RecalculateMhs(parent);
+ } else
+ root = child;
+ if (color == EColor::BLACK)
+ RawRemoveFixup(root, child, parent);
+ delete node;
+ }
+
+ void Tree::RawRemove(uint64_t offset) {
+ Node *node = SearchByOffset(offset);
+ RawRemove(_root, node);
+ }
+ static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
+ return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
+ }
+ uint64_t Tree::Remove(Node *&root, Node *node, size_t size) {
+ OUUInt64 n_offset = rbn_offset(node);
+ OUUInt64 n_size = rbn_size(node);
+ OUUInt64 answer_offset(align(rbn_offset(node).ToInt(), _align));
+
+ invariant((answer_offset + size) <= (n_offset + n_size));
+ if (answer_offset == n_offset) {
+ rbn_offset(node) += size;
+ rbn_size(node) -= size;
+ RecalculateMhs(node);
+ if (rbn_size(node) == 0) {
+ RawRemove(root, node);
+ }
+
+ } else {
+ if (answer_offset + size == n_offset + n_size) {
+ rbn_size(node) -= size;
+ RecalculateMhs(node);
+ } else {
+ // well, cut in the middle...
+ rbn_size(node) = answer_offset - n_offset;
+ RecalculateMhs(node);
+ Insert(_root,
+ {(answer_offset + size),
+ (n_offset + n_size) - (answer_offset + size)});
+ }
+ }
+ return answer_offset.ToInt();
+ }
+
+ void Tree::RawRemoveFixup(Node *&root, Node *node, Node *parent) {
+ Node *other;
+ while ((!node || rbn_is_black(node)) && node != root) {
+ if (parent->_left == node) {
+ other = parent->_right;
+ if (rbn_is_red(other)) {
+ // Case 1: the brother of X, w, is read
+ rbn_set_black(other);
+ rbn_set_red(parent);
+ LeftRotate(root, parent);
+ other = parent->_right;
+ }
+ if ((!other->_left || rbn_is_black(other->_left)) &&
+ (!other->_right || rbn_is_black(other->_right))) {
+ // Case 2: w is black and both of w's children are black
+ rbn_set_red(other);
+ node = parent;
+ parent = rbn_parent(node);
+ } else {
+ if (!other->_right || rbn_is_black(other->_right)) {
+ // Case 3: w is black and left child of w is red but
+ // right
+ // child is black
+ rbn_set_black(other->_left);
+ rbn_set_red(other);
+ RightRotate(root, other);
+ other = parent->_right;
+ }
+ // Case 4: w is black and right child of w is red,
+ // regardless of
+ // left child's color
+ rbn_set_color(other, rbn_color(parent));
+ rbn_set_black(parent);
+ rbn_set_black(other->_right);
+ LeftRotate(root, parent);
+ node = root;
+ break;
+ }
+ } else {
+ other = parent->_left;
+ if (rbn_is_red(other)) {
+ // Case 1: w is red
+ rbn_set_black(other);
+ rbn_set_red(parent);
+ RightRotate(root, parent);
+ other = parent->_left;
+ }
+ if ((!other->_left || rbn_is_black(other->_left)) &&
+ (!other->_right || rbn_is_black(other->_right))) {
+ // Case 2: w is black and both children are black
+ rbn_set_red(other);
+ node = parent;
+ parent = rbn_parent(node);
+ } else {
+ if (!other->_left || rbn_is_black(other->_left)) {
+ // Case 3: w is black and left child of w is red whereas
+ // right child is black
+ rbn_set_black(other->_right);
+ rbn_set_red(other);
+ LeftRotate(root, other);
+ other = parent->_left;
+ }
+ // Case 4:w is black and right child of w is red, regardless
+ // of
+ // the left child's color
+ rbn_set_color(other, rbn_color(parent));
+ rbn_set_black(parent);
+ rbn_set_black(other->_left);
+ RightRotate(root, parent);
+ node = root;
+ break;
+ }
+ }
+ }
+ if (node)
+ rbn_set_black(node);
+ }
+
+ void Tree::Destroy(Node *&tree) {
+ if (tree == NULL)
+ return;
+
+ if (tree->_left != NULL)
+ Destroy(tree->_left);
+ if (tree->_right != NULL)
+ Destroy(tree->_right);
+
+ delete tree;
+ tree = NULL;
+ }
+
+ void Tree::Destroy() { Destroy(_root); }
+
+ void Tree::Dump(Node *tree, Node::BlockPair pair, EDirection dir) {
+ if (tree != NULL) {
+ if (dir == EDirection::NONE)
+ fprintf(stderr,
+ "(%" PRIu64 ",%" PRIu64 ", mhs:(%" PRIu64 ",%" PRIu64
+ "))(B) is root\n",
+ rbn_offset(tree).ToInt(),
+ rbn_size(tree).ToInt(),
+ rbn_left_mhs(tree),
+ rbn_right_mhs(tree));
+ else
+ fprintf(stderr,
+ "(%" PRIu64 ",%" PRIu64 ",mhs:(%" PRIu64 ",%" PRIu64
+ "))(%c) is %" PRIu64 "'s %s\n",
+ rbn_offset(tree).ToInt(),
+ rbn_size(tree).ToInt(),
+ rbn_left_mhs(tree),
+ rbn_right_mhs(tree),
+ rbn_is_red(tree) ? 'R' : 'B',
+ pair._offset.ToInt(),
+ dir == EDirection::RIGHT ? "right child" : "left child");
+
+ Dump(tree->_left, tree->_hole, EDirection::LEFT);
+ Dump(tree->_right, tree->_hole, EDirection::RIGHT);
+ }
+ }
+
+ uint64_t Tree::EffectiveSize(Node *node) {
+ OUUInt64 offset = rbn_offset(node);
+ OUUInt64 size = rbn_size(node);
+ OUUInt64 end = offset + size;
+ OUUInt64 aligned_offset(align(offset.ToInt(), _align));
+ if (aligned_offset > end) {
+ return 0;
+ }
+ return (end - aligned_offset).ToInt();
+ }
+
+ void Tree::Dump() {
+ if (_root != NULL)
+ Dump(_root, _root->_hole, (EDirection)0);
+ }
+
+ static void vis_bal_f(void *extra, Node *node, uint64_t depth) {
+ uint64_t **p = (uint64_t **)extra;
+ uint64_t min = *p[0];
+ uint64_t max = *p[1];
+ if (node->_left) {
+ Node *left = node->_left;
+ invariant(node == left->_parent);
+ }
+
+ if (node->_right) {
+ Node *right = node->_right;
+ invariant(node == right->_parent);
+ }
+
+ if (!node->_left || !node->_right) {
+ if (min > depth) {
+ *p[0] = depth;
+ } else if (max < depth) {
+ *p[1] = depth;
+ }
+ }
+ }
+
+ void Tree::ValidateBalance() {
+ uint64_t min_depth = 0xffffffffffffffff;
+ uint64_t max_depth = 0;
+ if (!_root) {
+ return;
+ }
+ uint64_t *p[2] = {&min_depth, &max_depth};
+ InOrderVisitor(vis_bal_f, (void *)p);
+ invariant((min_depth + 1) * 2 >= max_depth + 1);
+ }
+
+ static void vis_cmp_f(void *extra, Node *node, uint64_t UU(depth)) {
+ Node::BlockPair **p = (Node::BlockPair **)extra;
+
+ invariant_notnull(*p);
+ invariant((*p)->_offset == node->_hole._offset);
+
+ *p = *p + 1;
+ }
+
+ // validate the input pairs matches with sorted pairs
+ void Tree::ValidateInOrder(Node::BlockPair *pairs) {
+ InOrderVisitor(vis_cmp_f, &pairs);
+ }
+
+ uint64_t Tree::ValidateMhs(Node *node) {
+ if (!node)
+ return 0;
+ else {
+ uint64_t mhs_left = ValidateMhs(node->_left);
+ uint64_t mhs_right = ValidateMhs(node->_right);
+ if (mhs_left != rbn_left_mhs(node)) {
+ printf("assert failure: mhs_left = %" PRIu64 "\n", mhs_left);
+ Dump(node, node->_hole, (EDirection)0);
+ }
+ invariant(mhs_left == rbn_left_mhs(node));
+
+ if (mhs_right != rbn_right_mhs(node)) {
+ printf("assert failure: mhs_right = %" PRIu64 "\n", mhs_right);
+ Dump(node, node->_hole, (EDirection)0);
+ }
+ invariant(mhs_right == rbn_right_mhs(node));
+ return std::max(EffectiveSize(node), std::max(mhs_left, mhs_right));
+ }
+ }
+
+ void Tree::ValidateMhs() {
+ if (!_root)
+ return;
+ uint64_t mhs_left = ValidateMhs(_root->_left);
+ uint64_t mhs_right = ValidateMhs(_root->_right);
+ invariant(mhs_left == rbn_left_mhs(_root));
+ invariant(mhs_right == rbn_right_mhs(_root));
+ }
+
+} // namespace MhsRbTree
diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
new file mode 100644
index 00000000000..eb8c953b08c
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
@@ -0,0 +1,355 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License, version 3,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <db.h>
+
+#include "portability/toku_pthread.h"
+#include "portability/toku_stdint.h"
+#include "portability/toku_stdlib.h"
+
+// RBTree(Red-black tree) with max hole sizes for subtrees.
+
+// This is a tentative data struct to improve the block allocation time
+// complexity from the linear time to the log time. Please be noted this DS only
+// supports first-fit for now. It is actually easier to do it with
+// best-fit.(just
+// sort by size).
+
+// RBTree is a classic data struct with O(log(n)) for insertion, deletion and
+// search. Many years have seen its efficiency.
+
+// a *hole* is the representation of an available BlockPair for allocation.
+// defined as (start_address,size) or (offset, size) interchangably.
+
+// each node has a *label* to indicate a pair of the max hole sizes for its
+// subtree.
+
+// We are implementing a RBTree with max hole sizes for subtree. It is a red
+// black tree that is sorted by the start_address but also labeld with the max
+// hole sizes of the subtrees.
+
+// [(6,3)] -> [(offset, size)], the hole
+// [{2,5}] -> [{mhs_of_left, mhs_of_right}], the label
+/* / \ */
+// [(0, 1)] [(10, 5)]
+// [{0, 2}] [{0, 0}]
+/* \ */
+// [(3, 2)]
+// [{0, 0}]
+// request of allocation size=2 goes from root to [(3,2)].
+
+// above example shows a simplified RBTree_max_holes.
+// it is easier to tell the search time is O(log(n)) as we can make a decision
+// on each descent until we get to the target.
+
+// the only question is if we can keep the maintenance cost low -- and i think
+// it is not a problem becoz an insertion/deletion is only going to update the
+// max_hole_sizes of the nodes along the path from the root to the node to be
+// deleted/inserted. The path can be cached and search is anyway O(log(n)).
+
+// unlike the typical rbtree, Tree has to handle the inserts and deletes
+// with more care: an allocation that triggers the delete might leave some
+// unused space which we can simply update the start_addr and size without
+// worrying overlapping. An free might not only mean the insertion but also
+// *merging* with the adjacent holes.
+
+namespace MhsRbTree {
+
+#define offset_t uint64_t
+ enum class EColor { RED, BLACK };
+ enum class EDirection { NONE = 0, LEFT, RIGHT };
+
+ // I am a bit tired of fixing overflow/underflow, just quickly craft some
+ // int
+ // class that has an infinity-like max value and prevents overflow and
+ // underflow. If you got a file offset larger than MHS_MAX_VAL, it is not
+ // a problem here. :-/ - JYM
+ class OUUInt64 {
+ public:
+ static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff;
+ OUUInt64() : _value(0) {}
+ OUUInt64(uint64_t s) : _value(s) {}
+ OUUInt64(const OUUInt64& o) : _value(o._value) {}
+ bool operator<(const OUUInt64 &r) const {
+ invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+ return _value < r.ToInt();
+ }
+ bool operator>(const OUUInt64 &r) const {
+ invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+ return _value > r.ToInt();
+ }
+ bool operator<=(const OUUInt64 &r) const {
+ invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+ return _value <= r.ToInt();
+ }
+ bool operator>=(const OUUInt64 &r) const {
+ invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+ return _value >= r.ToInt();
+ }
+ OUUInt64 operator+(const OUUInt64 &r) const {
+ if (_value == MHS_MAX_VAL || r.ToInt() == MHS_MAX_VAL) {
+ OUUInt64 tmp(MHS_MAX_VAL);
+ return tmp;
+ } else {
+ // detecting overflow
+ invariant((MHS_MAX_VAL - _value) >= r.ToInt());
+ uint64_t plus = _value + r.ToInt();
+ OUUInt64 tmp(plus);
+ return tmp;
+ }
+ }
+ OUUInt64 operator-(const OUUInt64 &r) const {
+ invariant(r.ToInt() != MHS_MAX_VAL);
+ if (_value == MHS_MAX_VAL) {
+ return *this;
+ } else {
+ invariant(_value >= r.ToInt());
+ uint64_t minus = _value - r.ToInt();
+ OUUInt64 tmp(minus);
+ return tmp;
+ }
+ }
+ OUUInt64 operator-=(const OUUInt64 &r) {
+ if (_value != MHS_MAX_VAL) {
+ invariant(r.ToInt() != MHS_MAX_VAL);
+ invariant(_value >= r.ToInt());
+ _value -= r.ToInt();
+ }
+ return *this;
+ }
+ OUUInt64 operator+=(const OUUInt64 &r) {
+ if (_value != MHS_MAX_VAL) {
+ if (r.ToInt() == MHS_MAX_VAL) {
+ _value = MHS_MAX_VAL;
+ } else {
+ invariant((MHS_MAX_VAL - _value) >= r.ToInt());
+ this->_value += r.ToInt();
+ }
+ }
+ return *this;
+ }
+ bool operator==(const OUUInt64 &r) const {
+ return _value == r.ToInt();
+ }
+ bool operator!=(const OUUInt64 &r) const {
+ return _value != r.ToInt();
+ }
+ OUUInt64 operator=(const OUUInt64 &r) {
+ _value = r.ToInt();
+ return *this;
+ }
+ uint64_t ToInt() const { return _value; }
+
+ private:
+ uint64_t _value;
+ };
+
+ class Node {
+ public:
+ class BlockPair {
+ public:
+ OUUInt64 _offset;
+ OUUInt64 _size;
+
+ BlockPair() : _offset(0), _size(0) {}
+ BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
+ BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {}
+ BlockPair(const BlockPair &o)
+ : _offset(o._offset), _size(o._size) {}
+
+ int operator<(const BlockPair &rhs) const {
+ return _offset < rhs._offset;
+ }
+ int operator<(const uint64_t &o) const { return _offset < o; }
+ };
+
+ struct Pair {
+ uint64_t _left;
+ uint64_t _right;
+ Pair(uint64_t l, uint64_t r) : _left(l), _right(r) {}
+ };
+
+ EColor _color;
+ BlockPair _hole;
+ Pair _label;
+ Node *_left;
+ Node *_right;
+ Node *_parent;
+
+ Node(EColor c,
+ Node::BlockPair h,
+ Pair lb,
+ Node *l,
+ Node *r,
+ Node *p)
+ : _color(c),
+ _hole(h),
+ _label(lb),
+ _left(l),
+ _right(r),
+ _parent(p) {}
+ };
+
+ class Tree {
+ private:
+ Node *_root;
+ uint64_t _align;
+
+ public:
+ Tree();
+ Tree(uint64_t);
+ ~Tree();
+
+ void PreOrder();
+ void InOrder();
+ void PostOrder();
+ // immutable operations
+ Node *SearchByOffset(uint64_t addr);
+ Node *SearchFirstFitBySize(uint64_t size);
+
+ Node *MinNode();
+ Node *MaxNode();
+
+ Node *Successor(Node *);
+ Node *Predecessor(Node *);
+
+ // mapped from tree_allocator::free_block
+ int Insert(Node::BlockPair pair);
+ // mapped from tree_allocator::alloc_block
+ uint64_t Remove(size_t size);
+ // mapped from tree_allocator::alloc_block_after
+
+ void RawRemove(uint64_t offset);
+ void Destroy();
+ // print the tree
+ void Dump();
+ // validation
+ // balance
+ void ValidateBalance();
+ void ValidateInOrder(Node::BlockPair *);
+ void InOrderVisitor(void (*f)(void *, Node *, uint64_t), void *);
+ void ValidateMhs();
+
+ private:
+ void PreOrder(Node *node) const;
+ void InOrder(Node *node) const;
+ void PostOrder(Node *node) const;
+ Node *SearchByOffset(Node *node, offset_t addr) const;
+ Node *SearchFirstFitBySize(Node *node, size_t size) const;
+
+ Node *MinNode(Node *node);
+ Node *MaxNode(Node *node);
+
+ // rotations to fix up. we will have to update the labels too.
+ void LeftRotate(Node *&root, Node *x);
+ void RightRotate(Node *&root, Node *y);
+
+ int Insert(Node *&root, Node::BlockPair pair);
+ int InsertFixup(Node *&root, Node *node);
+
+ void RawRemove(Node *&root, Node *node);
+ uint64_t Remove(Node *&root, Node *node, size_t size);
+ void RawRemoveFixup(Node *&root, Node *node, Node *parent);
+
+ void Destroy(Node *&tree);
+ void Dump(Node *tree, Node::BlockPair pair, EDirection dir);
+ void RecalculateMhs(Node *node);
+ void IsNewNodeMergable(Node *, Node *, Node::BlockPair, bool *, bool *);
+ void AbsorbNewNode(Node *, Node *, Node::BlockPair, bool, bool, bool);
+ Node *SearchFirstFitBySizeHelper(Node *x, uint64_t size);
+
+ Node *SuccessorHelper(Node *y, Node *x);
+
+ Node *PredecessorHelper(Node *y, Node *x);
+
+ void InOrderVisitor(Node *,
+ void (*f)(void *, Node *, uint64_t),
+ void *,
+ uint64_t);
+ uint64_t ValidateMhs(Node *);
+
+ uint64_t EffectiveSize(Node *);
+// mixed with some macros.....
+#define rbn_parent(r) ((r)->_parent)
+#define rbn_color(r) ((r)->_color)
+#define rbn_is_red(r) ((r)->_color == EColor::RED)
+#define rbn_is_black(r) ((r)->_color == EColor::BLACK)
+#define rbn_set_black(r) \
+ do { \
+ (r)->_color = EColor::BLACK; \
+ } while (0)
+#define rbn_set_red(r) \
+ do { \
+ (r)->_color = EColor::RED; \
+ } while (0)
+#define rbn_set_parent(r, p) \
+ do { \
+ (r)->_parent = (p); \
+ } while (0)
+#define rbn_set_color(r, c) \
+ do { \
+ (r)->_color = (c); \
+ } while (0)
+#define rbn_set_offset(r) \
+ do { \
+ (r)->_hole._offset = (c); \
+ } while (0)
+#define rbn_set_size(r, c) \
+ do { \
+ (r)->_hole._size = (c); \
+ } while (0)
+#define rbn_set_left_mhs(r, c) \
+ do { \
+ (r)->_label._left = (c); \
+ } while (0)
+#define rbn_set_right_mhs(r, c) \
+ do { \
+ (r)->_label._right = (c); \
+ } while (0)
+#define rbn_size(r) ((r)->_hole._size)
+#define rbn_offset(r) ((r)->_hole._offset)
+#define rbn_key(r) ((r)->_hole._offset)
+#define rbn_left_mhs(r) ((r)->_label._left)
+#define rbn_right_mhs(r) ((r)->_label._right)
+#define mhs_of_subtree(y) \
+ (std::max(std::max(rbn_left_mhs(y), rbn_right_mhs(y)), EffectiveSize(y)))
+ };
+
+} // namespace MhsRbTree
diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
deleted file mode 100644
index 3670ef81cc2..00000000000
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License, version 2,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License, version 3,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-#include "ft/tests/test.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static const uint64_t alignment = 4096;
-
-static void test_first_vs_best_fit(void) {
- struct block_allocator::blockpair pairs[] = {
- block_allocator::blockpair(1 * alignment, 6 * alignment),
- // hole between 7x align -> 8x align
- block_allocator::blockpair(8 * alignment, 4 * alignment),
- // hole between 12x align -> 16x align
- block_allocator::blockpair(16 * alignment, 1 * alignment),
- block_allocator::blockpair(17 * alignment, 2 * alignment),
- // hole between 19 align -> 21x align
- block_allocator::blockpair(21 * alignment, 2 * alignment),
- };
- const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-
- block_allocator::blockpair *bp;
-
- // first fit
- bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment);
- assert(bp == &pairs[0]);
- bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment);
- assert(bp == &pairs[0]);
- bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment);
- assert(bp == &pairs[1]);
- bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment);
- assert(bp == nullptr);
-
- // best fit
- bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment);
- assert(bp == &pairs[0]);
- bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment);
- assert(bp == &pairs[3]);
- bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment);
- assert(bp == &pairs[1]);
- bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment);
- assert(bp == nullptr);
-}
-
-static void test_padded_fit(void) {
- struct block_allocator::blockpair pairs[] = {
- block_allocator::blockpair(1 * alignment, 1 * alignment),
- // 4096 byte hole after bp[0]
- block_allocator::blockpair(3 * alignment, 1 * alignment),
- // 8192 byte hole after bp[1]
- block_allocator::blockpair(6 * alignment, 1 * alignment),
- // 16384 byte hole after bp[2]
- block_allocator::blockpair(11 * alignment, 1 * alignment),
- // 32768 byte hole after bp[3]
- block_allocator::blockpair(17 * alignment, 1 * alignment),
- // 116kb hole after bp[4]
- block_allocator::blockpair(113 * alignment, 1 * alignment),
- // 256kb hole after bp[5]
- block_allocator::blockpair(371 * alignment, 1 * alignment),
- };
- const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-
- block_allocator::blockpair *bp;
-
- // padding for a 100 byte allocation will be < than standard alignment,
- // so it should fit in the first 4096 byte hole.
- bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment);
- assert(bp == &pairs[0]);
-
- // Even padded, a 12kb alloc will fit in a 16kb hole
- bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment);
- assert(bp == &pairs[2]);
-
- // would normally fit in the 116kb hole but the padding will bring it over
- bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment);
- assert(bp == &pairs[5]);
-
- bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment);
- assert(bp == &pairs[5]);
-}
-
-int test_main(int argc, const char *argv[]) {
- (void) argc;
- (void) argv;
-
- test_first_vs_best_fit();
- test_padded_fit();
-
- return 0;
-}
diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
index d80ee83cbc9..3eff52b915d 100644
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
@@ -38,253 +38,243 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "test.h"
-static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
- ba->validate();
+static void ba_alloc(BlockAllocator *ba, uint64_t size, uint64_t *answer) {
+ ba->Validate();
uint64_t actual_answer;
- const uint64_t heat = random() % 2;
- ba->alloc_block(512 * size, heat, &actual_answer);
- ba->validate();
+ ba->AllocBlock(512 * size, &actual_answer);
+ ba->Validate();
- assert(actual_answer%512==0);
- *answer = actual_answer/512;
+ invariant(actual_answer % 512 == 0);
+ *answer = actual_answer / 512;
}
-static void ba_free(block_allocator *ba, uint64_t offset) {
- ba->validate();
- ba->free_block(offset * 512);
- ba->validate();
+static void ba_free(BlockAllocator *ba, uint64_t offset, uint64_t size) {
+ ba->Validate();
+ ba->FreeBlock(offset * 512, 512 * size);
+ ba->Validate();
}
-static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order,
- uint64_t expected_offset, uint64_t expected_size) {
+static void ba_check_l(BlockAllocator *ba,
+ uint64_t blocknum_in_layout_order,
+ uint64_t expected_offset,
+ uint64_t expected_size) {
uint64_t actual_offset, actual_size;
- int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
- assert(r==0);
- assert(expected_offset*512 == actual_offset);
- assert(expected_size *512 == actual_size);
+ int r = ba->NthBlockInLayoutOrder(
+ blocknum_in_layout_order, &actual_offset, &actual_size);
+ invariant(r == 0);
+ invariant(expected_offset * 512 == actual_offset);
+ invariant(expected_size * 512 == actual_size);
}
-static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) {
+static void ba_check_none(BlockAllocator *ba,
+ uint64_t blocknum_in_layout_order) {
uint64_t actual_offset, actual_size;
- int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
- assert(r==-1);
+ int r = ba->NthBlockInLayoutOrder(
+ blocknum_in_layout_order, &actual_offset, &actual_size);
+ invariant(r == -1);
}
-
// Simple block allocator test
-static void test_ba0(block_allocator::allocation_strategy strategy) {
- block_allocator allocator;
- block_allocator *ba = &allocator;
- ba->create(100*512, 1*512);
- ba->set_strategy(strategy);
- assert(ba->allocated_limit()==100*512);
+static void test_ba0() {
+ BlockAllocator allocator;
+ BlockAllocator *ba = &allocator;
+ ba->Create(100 * 512, 1 * 512);
+ invariant(ba->AllocatedLimit() == 100 * 512);
uint64_t b2, b3, b4, b5, b6, b7;
- ba_alloc(ba, 100, &b2);
- ba_alloc(ba, 100, &b3);
- ba_alloc(ba, 100, &b4);
- ba_alloc(ba, 100, &b5);
- ba_alloc(ba, 100, &b6);
- ba_alloc(ba, 100, &b7);
- ba_free(ba, b2);
- ba_alloc(ba, 100, &b2);
- ba_free(ba, b4);
- ba_free(ba, b6);
+ ba_alloc(ba, 100, &b2);
+ ba_alloc(ba, 100, &b3);
+ ba_alloc(ba, 100, &b4);
+ ba_alloc(ba, 100, &b5);
+ ba_alloc(ba, 100, &b6);
+ ba_alloc(ba, 100, &b7);
+ ba_free(ba, b2, 100);
+ ba_alloc(ba, 100, &b2);
+ ba_free(ba, b4, 100);
+ ba_free(ba, b6, 100);
uint64_t b8, b9;
- ba_alloc(ba, 100, &b4);
- ba_free(ba, b2);
- ba_alloc(ba, 100, &b6);
- ba_alloc(ba, 100, &b8);
- ba_alloc(ba, 100, &b9);
- ba_free(ba, b6);
- ba_free(ba, b7);
- ba_free(ba, b8);
- ba_alloc(ba, 100, &b6);
- ba_alloc(ba, 100, &b7);
- ba_free(ba, b4);
- ba_alloc(ba, 100, &b4);
-
- ba->destroy();
+ ba_alloc(ba, 100, &b4);
+ ba_free(ba, b2, 100);
+ ba_alloc(ba, 100, &b6);
+ ba_alloc(ba, 100, &b8);
+ ba_alloc(ba, 100, &b9);
+ ba_free(ba, b6, 100);
+ ba_free(ba, b7, 100);
+ ba_free(ba, b8, 100);
+ ba_alloc(ba, 100, &b6);
+ ba_alloc(ba, 100, &b7);
+ ba_free(ba, b4, 100);
+ ba_alloc(ba, 100, &b4);
+
+ ba->Destroy();
}
// Manually to get coverage of all the code in the block allocator.
-static void
-test_ba1(block_allocator::allocation_strategy strategy, int n_initial) {
- block_allocator allocator;
- block_allocator *ba = &allocator;
- ba->create(0*512, 1*512);
- ba->set_strategy(strategy);
-
- int n_blocks=0;
+static void test_ba1(int n_initial) {
+ BlockAllocator allocator;
+ BlockAllocator *ba = &allocator;
+ ba->Create(0 * 512, 1 * 512);
+
+ int n_blocks = 0;
uint64_t blocks[1000];
for (int i = 0; i < 1000; i++) {
- if (i < n_initial || random() % 2 == 0) {
- if (n_blocks < 1000) {
- ba_alloc(ba, 1, &blocks[n_blocks]);
- //printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
- n_blocks++;
- }
- } else {
- if (n_blocks > 0) {
- int blocknum = random()%n_blocks;
- //printf("F[%d]%ld\n", blocknum, blocks[blocknum]);
- ba_free(ba, blocks[blocknum]);
- blocks[blocknum]=blocks[n_blocks-1];
- n_blocks--;
- }
- }
+ if (i < n_initial || random() % 2 == 0) {
+ if (n_blocks < 1000) {
+ ba_alloc(ba, 1, &blocks[n_blocks]);
+ // printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
+ n_blocks++;
+ }
+ } else {
+ if (n_blocks > 0) {
+ int blocknum = random() % n_blocks;
+ // printf("F[%d]=%ld\n", blocknum, blocks[blocknum]);
+ ba_free(ba, blocks[blocknum], 1);
+ blocks[blocknum] = blocks[n_blocks - 1];
+ n_blocks--;
+ }
+ }
}
-
- ba->destroy();
+
+ ba->Destroy();
}
-
+
// Check to see if it is first fit or best fit.
-static void
-test_ba2 (void)
-{
- block_allocator allocator;
- block_allocator *ba = &allocator;
+static void test_ba2(void) {
+ BlockAllocator allocator;
+ BlockAllocator *ba = &allocator;
uint64_t b[6];
enum { BSIZE = 1024 };
- ba->create(100*512, BSIZE*512);
- ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT);
- assert(ba->allocated_limit()==100*512);
-
- ba_check_l (ba, 0, 0, 100);
- ba_check_none (ba, 1);
-
- ba_alloc (ba, 100, &b[0]);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_none (ba, 2);
-
- ba_alloc (ba, BSIZE + 100, &b[1]);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_none (ba, 3);
-
- ba_alloc (ba, 100, &b[2]);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_none (ba, 4);
-
- ba_alloc (ba, 100, &b[3]);
- ba_alloc (ba, 100, &b[4]);
- ba_alloc (ba, 100, &b[5]);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_l (ba, 4, 5*BSIZE, 100);
- ba_check_l (ba, 5, 6*BSIZE, 100);
- ba_check_l (ba, 6, 7*BSIZE, 100);
- ba_check_none (ba, 7);
-
- ba_free (ba, 4*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 5*BSIZE, 100);
- ba_check_l (ba, 4, 6*BSIZE, 100);
- ba_check_l (ba, 5, 7*BSIZE, 100);
- ba_check_none (ba, 6);
+ ba->Create(100 * 512, BSIZE * 512);
+ invariant(ba->AllocatedLimit() == 100 * 512);
+
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_none(ba, 1);
+
+ ba_alloc(ba, 100, &b[0]);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_none(ba, 2);
+
+ ba_alloc(ba, BSIZE + 100, &b[1]);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_none(ba, 3);
+
+ ba_alloc(ba, 100, &b[2]);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_none(ba, 4);
+
+ ba_alloc(ba, 100, &b[3]);
+ ba_alloc(ba, 100, &b[4]);
+ ba_alloc(ba, 100, &b[5]);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_l(ba, 4, 5 * BSIZE, 100);
+ ba_check_l(ba, 5, 6 * BSIZE, 100);
+ ba_check_l(ba, 6, 7 * BSIZE, 100);
+ ba_check_none(ba, 7);
+
+ ba_free(ba, 4 * BSIZE, 100);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 5 * BSIZE, 100);
+ ba_check_l(ba, 4, 6 * BSIZE, 100);
+ ba_check_l(ba, 5, 7 * BSIZE, 100);
+ ba_check_none(ba, 6);
uint64_t b2;
ba_alloc(ba, 100, &b2);
- assert(b2==4*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_l (ba, 4, 5*BSIZE, 100);
- ba_check_l (ba, 5, 6*BSIZE, 100);
- ba_check_l (ba, 6, 7*BSIZE, 100);
- ba_check_none (ba, 7);
-
- ba_free (ba, BSIZE);
- ba_free (ba, 5*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 2, 4*BSIZE, 100);
- ba_check_l (ba, 3, 6*BSIZE, 100);
- ba_check_l (ba, 4, 7*BSIZE, 100);
- ba_check_none (ba, 5);
-
- // This alloc will allocate the first block after the reserve space in the case of first fit.
+ invariant(b2 == 4 * BSIZE);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_l(ba, 4, 5 * BSIZE, 100);
+ ba_check_l(ba, 5, 6 * BSIZE, 100);
+ ba_check_l(ba, 6, 7 * BSIZE, 100);
+ ba_check_none(ba, 7);
+
+ ba_free(ba, BSIZE, 100);
+ ba_free(ba, 5 * BSIZE, 100);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 2, 4 * BSIZE, 100);
+ ba_check_l(ba, 3, 6 * BSIZE, 100);
+ ba_check_l(ba, 4, 7 * BSIZE, 100);
+ ba_check_none(ba, 5);
+
+ // This alloc will allocate the first block after the reserve space in the
+ // case of first fit.
uint64_t b3;
ba_alloc(ba, 100, &b3);
- assert(b3== BSIZE); // First fit.
+ invariant(b3 == BSIZE); // First fit.
// if (b3==5*BSIZE) then it is next fit.
// Now 5*BSIZE is free
uint64_t b5;
ba_alloc(ba, 100, &b5);
- assert(b5==5*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_l (ba, 4, 5*BSIZE, 100);
- ba_check_l (ba, 5, 6*BSIZE, 100);
- ba_check_l (ba, 6, 7*BSIZE, 100);
- ba_check_none (ba, 7);
+ invariant(b5 == 5 * BSIZE);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_l(ba, 4, 5 * BSIZE, 100);
+ ba_check_l(ba, 5, 6 * BSIZE, 100);
+ ba_check_l(ba, 6, 7 * BSIZE, 100);
+ ba_check_none(ba, 7);
// Now all blocks are busy
uint64_t b6, b7, b8;
ba_alloc(ba, 100, &b6);
ba_alloc(ba, 100, &b7);
ba_alloc(ba, 100, &b8);
- assert(b6==8*BSIZE);
- assert(b7==9*BSIZE);
- assert(b8==10*BSIZE);
- ba_check_l (ba, 0, 0, 100);
- ba_check_l (ba, 1, BSIZE, 100);
- ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
- ba_check_l (ba, 3, 4*BSIZE, 100);
- ba_check_l (ba, 4, 5*BSIZE, 100);
- ba_check_l (ba, 5, 6*BSIZE, 100);
- ba_check_l (ba, 6, 7*BSIZE, 100);
- ba_check_l (ba, 7, 8*BSIZE, 100);
- ba_check_l (ba, 8, 9*BSIZE, 100);
- ba_check_l (ba, 9, 10*BSIZE, 100);
- ba_check_none (ba, 10);
-
- ba_free(ba, 9*BSIZE);
- ba_free(ba, 7*BSIZE);
+ invariant(b6 == 8 * BSIZE);
+ invariant(b7 == 9 * BSIZE);
+ invariant(b8 == 10 * BSIZE);
+ ba_check_l(ba, 0, 0, 100);
+ ba_check_l(ba, 1, BSIZE, 100);
+ ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+ ba_check_l(ba, 3, 4 * BSIZE, 100);
+ ba_check_l(ba, 4, 5 * BSIZE, 100);
+ ba_check_l(ba, 5, 6 * BSIZE, 100);
+ ba_check_l(ba, 6, 7 * BSIZE, 100);
+ ba_check_l(ba, 7, 8 * BSIZE, 100);
+ ba_check_l(ba, 8, 9 * BSIZE, 100);
+ ba_check_l(ba, 9, 10 * BSIZE, 100);
+ ba_check_none(ba, 10);
+
+ ba_free(ba, 9 * BSIZE, 100);
+ ba_free(ba, 7 * BSIZE, 100);
uint64_t b9;
ba_alloc(ba, 100, &b9);
- assert(b9==7*BSIZE);
+ invariant(b9 == 7 * BSIZE);
- ba_free(ba, 5*BSIZE);
- ba_free(ba, 2*BSIZE);
+ ba_free(ba, 5 * BSIZE, 100);
+ ba_free(ba, 2 * BSIZE, BSIZE + 100);
uint64_t b10, b11;
ba_alloc(ba, 100, &b10);
- assert(b10==2*BSIZE);
+ invariant(b10 == 2 * BSIZE);
ba_alloc(ba, 100, &b11);
- assert(b11==3*BSIZE);
+ invariant(b11 == 3 * BSIZE);
ba_alloc(ba, 100, &b11);
- assert(b11==5*BSIZE);
+ invariant(b11 == 5 * BSIZE);
- ba->destroy();
+ ba->Destroy();
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
- enum block_allocator::allocation_strategy strategies[] = {
- block_allocator::BA_STRATEGY_FIRST_FIT,
- block_allocator::BA_STRATEGY_BEST_FIT,
- block_allocator::BA_STRATEGY_PADDED_FIT,
- block_allocator::BA_STRATEGY_HEAT_ZONE,
- };
- for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) {
- test_ba0(strategies[i]);
- test_ba1(strategies[i], 0);
- test_ba1(strategies[i], 10);
- test_ba1(strategies[i], 20);
- }
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
+ test_ba0();
+ test_ba1(0);
+ test_ba1(10);
+ test_ba1(20);
test_ba2();
return 0;
}
diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
index a7c48ef709a..ee68ab3ef0b 100644
--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
@@ -45,7 +45,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// #5978 is fixed. Here is what we do. We have four pairs with
// blocknums and fullhashes of 1,2,3,4. The cachetable has only
// two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4.
-// We pin all four with expensive write locks. Then, on backgroud threads,
+// We pin all four with expensive write locks. Then, on background threads,
// we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and
// we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this
// enough times, and we should see a deadlock before the fix, and no deadlock
diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
index be4bae898be..51cf70c3e76 100644
--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
@@ -77,7 +77,7 @@ flush (
//
// test the following things for simple cloning:
-// - verifies that after teh checkpoint ends, the PAIR is properly
+// - verifies that after the checkpoint ends, the PAIR is properly
// dirty or clean based on the second unpin
//
static void
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
index cb03a23e0fc..7abd2267a7e 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
@@ -38,69 +38,72 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "test.h"
-static int
-int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) {
- int64_t x = *(int64_t *) a->data;
- int64_t y = *(int64_t *) b->data;
-
- if (x<y) return -1;
- if (x>y) return 1;
+static int int64_key_cmp(DB *db UU(), const DBT *a, const DBT *b) {
+ int64_t x = *(int64_t *)a->data;
+ int64_t y = *(int64_t *)b->data;
+
+ if (x < y)
+ return -1;
+ if (x > y)
+ return 1;
return 0;
}
-static void
-test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
+static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
int r;
FT_CURSOR XMALLOC(cursor);
FTNODE dn = NULL;
PAIR_ATTR attr;
-
+
// first test that prefetching everything should work
- memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
- memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
+ memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
+ memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
cursor->left_is_neg_infty = true;
cursor->right_is_pos_infty = true;
cursor->disable_prefetching = false;
-
+
ftnode_fetch_extra bfe;
// quick test to see that we have the right behavior when we set
// disable_prefetching to true
cursor->disable_prefetching = true;
- bfe.create_for_prefetch( ft_h, cursor);
+ bfe.create_for_prefetch(ft_h, cursor);
FTNODE_DISK_DATA ndd = NULL;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
// now enable prefetching again
cursor->disable_prefetching = false;
-
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_COMPRESSED);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
@@ -108,21 +111,23 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
uint64_t left_key = 150;
toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t));
cursor->left_is_neg_infty = false;
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
@@ -130,63 +135,69 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
uint64_t right_key = 151;
toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t));
cursor->right_is_pos_infty = false;
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
left_key = 100000;
right_key = 100000;
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
bfe.destroy();
toku_free(ndd);
toku_ftnode_free(&dn);
left_key = 100;
right_key = 100;
- bfe.create_for_prefetch( ft_h, cursor);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_COMPRESSED);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ bfe.create_for_prefetch(ft_h, cursor);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
@@ -194,20 +205,19 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
toku_free(cursor);
}
-static void
-test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
+static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
int r;
FT_CURSOR XMALLOC(cursor);
FTNODE dn = NULL;
FTNODE_DISK_DATA ndd = NULL;
PAIR_ATTR attr;
-
+
// first test that prefetching everything should work
- memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
- memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
+ memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
+ memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
cursor->left_is_neg_infty = true;
cursor->right_is_pos_infty = true;
-
+
uint64_t left_key = 150;
uint64_t right_key = 151;
DBT left, right;
@@ -216,101 +226,106 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
ftnode_fetch_extra bfe;
bfe.create_for_subset_read(
- ft_h,
- NULL,
- &left,
- &right,
- false,
- false,
- false,
- false
- );
-
+ ft_h, NULL, &left, &right, false, false, false, false);
+
// fake the childnum to read
// set disable_prefetching ON
bfe.child_to_read = 2;
bfe.disable_prefetching = true;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- // need to call this twice because we had a subset read before, that touched the clock
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ // need to call this twice because we had a subset read before, that touched
+ // the clock
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_ON_DISK);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
toku_ftnode_free(&dn);
toku_free(ndd);
// fake the childnum to read
bfe.child_to_read = 2;
bfe.disable_prefetching = false;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- // need to call this twice because we had a subset read before, that touched the clock
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_AVAIL);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_COMPRESSED);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ // need to call this twice because we had a subset read before, that touched
+ // the clock
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_ON_DISK);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_AVAIL);
+ invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_AVAIL);
toku_ftnode_free(&dn);
toku_free(ndd);
// fake the childnum to read
bfe.child_to_read = 0;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
- assert(r==0);
- assert(dn->n_children == 3);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
- // need to call this twice because we had a subset read before, that touched the clock
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
- toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(dn,0) == PT_COMPRESSED);
- assert(BP_STATE(dn,1) == PT_COMPRESSED);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+ invariant(r == 0);
+ invariant(dn->n_children == 3);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+ // need to call this twice because we had a subset read before, that touched
+ // the clock
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+ toku_ftnode_pe_callback(
+ dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
- assert(BP_STATE(dn,0) == PT_AVAIL);
- assert(BP_STATE(dn,1) == PT_AVAIL);
- assert(BP_STATE(dn,2) == PT_ON_DISK);
+ invariant(BP_STATE(dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(dn, 1) == PT_AVAIL);
+ invariant(BP_STATE(dn, 2) == PT_ON_DISK);
toku_ftnode_free(&dn);
toku_free(ndd);
toku_free(cursor);
}
-
-static void
-test_prefetching(void) {
+static void test_prefetching(void) {
// struct ft_handle source_ft;
struct ftnode sn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -327,7 +342,7 @@ test_prefetching(void) {
uint64_t key1 = 100;
uint64_t key2 = 200;
-
+
MALLOC_N(sn.n_children, sn.bp);
DBT pivotkeys[2];
toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1));
@@ -336,13 +351,13 @@ test_prefetching(void) {
BP_BLOCKNUM(&sn, 0).b = 30;
BP_BLOCKNUM(&sn, 1).b = 35;
BP_BLOCKNUM(&sn, 2).b = 40;
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
- BP_STATE(&sn,2) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
+ BP_STATE(&sn, 2) = PT_AVAIL;
set_BNC(&sn, 0, toku_create_empty_nl());
set_BNC(&sn, 1, toku_create_empty_nl());
set_BNC(&sn, 2, toku_create_empty_nl());
- //Create XIDS
+ // Create XIDS
XIDS xids_0 = toku_xids_get_root_xids();
XIDS xids_123;
XIDS xids_234;
@@ -352,7 +367,7 @@ test_prefetching(void) {
CKERR(r);
// data in the buffers does not matter in this test
- //Cleanup:
+ // Cleanup:
toku_xids_destroy(&xids_0);
toku_xids_destroy(&xids_123);
toku_xids_destroy(&xids_234);
@@ -363,41 +378,48 @@ test_prefetching(void) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(int64_key_cmp, nullptr);
ft->ft = ft_h;
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA ndd = NULL;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
- test_prefetch_read(fd, ft, ft_h);
+ test_prefetch_read(fd, ft, ft_h);
test_subset_read(fd, ft, ft_h);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
ft_h->cmp.destroy();
toku_free(ft_h->h);
@@ -405,11 +427,12 @@ test_prefetching(void) {
toku_free(ft);
toku_free(ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
test_prefetching();
return 0;
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
index ceef3772e2a..26a3dae673c 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
@@ -40,38 +40,28 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "ft/cursor.h"
-enum ftnode_verify_type {
- read_all=1,
- read_compressed,
- read_none
-};
+enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif
-static int
-string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
char *CAST_FROM_VOIDP(s, a->data);
char *CAST_FROM_VOIDP(t, b->data);
return strcmp(s, t);
}
-static void
-le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char *val, int vallen)
-{
+static void le_add_to_bn(bn_data *bn,
+ uint32_t idx,
+ const char *key,
+ int keylen,
+ const char *val,
+ int vallen) {
LEAFENTRY r = NULL;
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
void *maybe_free = nullptr;
- bn->get_space_for_insert(
- idx,
- key,
- keylen,
- size_needed,
- &r,
- &maybe_free
- );
+ bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
if (maybe_free) {
toku_free(maybe_free);
}
@@ -81,70 +71,67 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char
memcpy(r->u.clean.val, val, vallen);
}
-
-static void
-le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val)
-{
+static void le_malloc(bn_data *bn,
+ uint32_t idx,
+ const char *key,
+ const char *val) {
int keylen = strlen(key) + 1;
int vallen = strlen(val) + 1;
le_add_to_bn(bn, idx, key, keylen, val, vallen);
}
-
-static void
-test1(int fd, FT ft_h, FTNODE *dn) {
+static void test1(int fd, FT ft_h, FTNODE *dn) {
int r;
ftnode_fetch_extra bfe_all;
bfe_all.create_for_full_read(ft_h);
FTNODE_DISK_DATA ndd = NULL;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_all);
bool is_leaf = ((*dn)->height == 0);
- assert(r==0);
+ invariant(r == 0);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
// should sweep and NOT get rid of anything
PAIR_ATTR attr;
- memset(&attr,0,sizeof(attr));
+ memset(&attr, 0, sizeof(attr));
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
// should sweep and get compress all
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
if (!is_leaf) {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED);
- }
- else {
- assert(BP_STATE(*dn,i) == PT_ON_DISK);
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
+ } else {
+ invariant(BP_STATE(*dn, i) == PT_ON_DISK);
}
}
PAIR_ATTR size;
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
- assert(req);
+ invariant(req);
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
// should sweep and get compress all
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
if (!is_leaf) {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED);
- }
- else {
- assert(BP_STATE(*dn,i) == PT_ON_DISK);
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
+ } else {
+ invariant(BP_STATE(*dn, i) == PT_ON_DISK);
}
- }
+ }
req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
- assert(req);
+ invariant(req);
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
(*dn)->dirty = 1;
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
@@ -152,101 +139,102 @@ test1(int fd, FT ft_h, FTNODE *dn) {
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
toku_free(ndd);
toku_ftnode_free(dn);
}
-
-static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) {
+static int search_cmp(const struct ft_search &UU(so), const DBT *UU(key)) {
return 0;
}
-static void
-test2(int fd, FT ft_h, FTNODE *dn) {
+static void test2(int fd, FT ft_h, FTNODE *dn) {
DBT left, right;
DB dummy_db;
memset(&dummy_db, 0, sizeof(dummy_db));
memset(&left, 0, sizeof(left));
memset(&right, 0, sizeof(right));
ft_search search;
-
+
ftnode_fetch_extra bfe_subset;
bfe_subset.create_for_subset_read(
ft_h,
- ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
+ ft_search_init(
+ &search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
&left,
&right,
true,
true,
false,
- false
- );
+ false);
FTNODE_DISK_DATA ndd = NULL;
- int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset);
- assert(r==0);
+ int r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_subset);
+ invariant(r == 0);
bool is_leaf = ((*dn)->height == 0);
- // at this point, although both partitions are available, only the
+ // at this point, although both partitions are available, only the
// second basement node should have had its clock
// touched
- assert(BP_STATE(*dn, 0) == PT_AVAIL);
- assert(BP_STATE(*dn, 1) == PT_AVAIL);
- assert(BP_SHOULD_EVICT(*dn, 0));
- assert(!BP_SHOULD_EVICT(*dn, 1));
+ invariant(BP_STATE(*dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+ invariant(BP_SHOULD_EVICT(*dn, 0));
+ invariant(!BP_SHOULD_EVICT(*dn, 1));
PAIR_ATTR attr;
- memset(&attr,0,sizeof(attr));
+ memset(&attr, 0, sizeof(attr));
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
- assert(BP_STATE(*dn, 1) == PT_AVAIL);
- assert(BP_SHOULD_EVICT(*dn, 1));
+ invariant(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
+ invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+ invariant(BP_SHOULD_EVICT(*dn, 1));
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
- assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
+ invariant(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset);
- assert(req);
+ invariant(req);
toku_ftnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr);
- assert(BP_STATE(*dn, 0) == PT_AVAIL);
- assert(BP_STATE(*dn, 1) == PT_AVAIL);
- assert(BP_SHOULD_EVICT(*dn, 0));
- assert(!BP_SHOULD_EVICT(*dn, 1));
+ invariant(BP_STATE(*dn, 0) == PT_AVAIL);
+ invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+ invariant(BP_SHOULD_EVICT(*dn, 0));
+ invariant(!BP_SHOULD_EVICT(*dn, 1));
toku_free(ndd);
toku_ftnode_free(dn);
}
-static void
-test3_leaf(int fd, FT ft_h, FTNODE *dn) {
+static void test3_leaf(int fd, FT ft_h, FTNODE *dn) {
DBT left, right;
DB dummy_db;
memset(&dummy_db, 0, sizeof(dummy_db));
memset(&left, 0, sizeof(left));
memset(&right, 0, sizeof(right));
-
+
ftnode_fetch_extra bfe_min;
bfe_min.create_for_min_read(ft_h);
FTNODE_DISK_DATA ndd = NULL;
- int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min);
- assert(r==0);
+ int r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_min);
+ invariant(r == 0);
//
// make sure we have a leaf
//
- assert((*dn)->height == 0);
+ invariant((*dn)->height == 0);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn, i) == PT_ON_DISK);
+ invariant(BP_STATE(*dn, i) == PT_ON_DISK);
}
toku_ftnode_free(dn);
toku_free(ndd);
}
-static void
-test_serialize_nonleaf(void) {
+static void test_serialize_nonleaf(void) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -265,11 +253,11 @@ test_serialize_nonleaf(void) {
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
BP_BLOCKNUM(&sn, 0).b = 30;
BP_BLOCKNUM(&sn, 1).b = 35;
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
set_BNC(&sn, 0, toku_create_empty_nl());
set_BNC(&sn, 1, toku_create_empty_nl());
- //Create XIDS
+ // Create XIDS
XIDS xids_0 = toku_xids_get_root_xids();
XIDS xids_123;
XIDS xids_234;
@@ -281,11 +269,38 @@ test_serialize_nonleaf(void) {
toku::comparator cmp;
cmp.create(string_key_cmp, nullptr);
- toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
- toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
- toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
-
- //Cleanup:
+ toku_bnc_insert_msg(BNC(&sn, 0),
+ "a",
+ 2,
+ "aval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_0,
+ true,
+ cmp);
+ toku_bnc_insert_msg(BNC(&sn, 0),
+ "b",
+ 2,
+ "bval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_123,
+ false,
+ cmp);
+ toku_bnc_insert_msg(BNC(&sn, 1),
+ "x",
+ 2,
+ "xval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_234,
+ true,
+ cmp);
+
+ // Cleanup:
toku_xids_destroy(&xids_0);
toku_xids_destroy(&xids_123);
toku_xids_destroy(&xids_234);
@@ -297,35 +312,41 @@ test_serialize_nonleaf(void) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(string_key_cmp, nullptr);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA ndd = NULL;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
test1(fd, ft_h, &dn);
test2(fd, ft_h, &dn);
@@ -333,22 +354,26 @@ test_serialize_nonleaf(void) {
toku_destroy_ftnode_internals(&sn);
toku_free(ndd);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
ft_h->cmp.destroy();
toku_free(ft_h);
toku_free(ft);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_leaf(void) {
+static void test_serialize_leaf(void) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -364,8 +389,8 @@ test_serialize_leaf(void) {
MALLOC_N(sn.n_children, sn.bp);
DBT pivotkey;
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
set_BLB(&sn, 0, toku_create_empty_bn());
set_BLB(&sn, 1, toku_create_empty_bn());
le_malloc(BLB_DATA(&sn, 0), 0, "a", "aval");
@@ -378,51 +403,59 @@ test_serialize_leaf(void) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA ndd = NULL;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
test1(fd, ft_h, &dn);
- test3_leaf(fd, ft_h,&dn);
+ test3_leaf(fd, ft_h, &dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
toku_free(ft);
toku_free(ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
initialize_dummymsn();
test_serialize_nonleaf();
test_serialize_leaf();
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
index 9828f49513c..d50488ae197 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
@@ -41,27 +41,21 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include <sys/time.h>
#include "test.h"
-
-
#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif
const double USECS_PER_SEC = 1000000.0;
-static void
-le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int vallen)
-{
+static void le_add_to_bn(bn_data *bn,
+ uint32_t idx,
+ char *key,
+ int keylen,
+ char *val,
+ int vallen) {
LEAFENTRY r = NULL;
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
void *maybe_free = nullptr;
- bn->get_space_for_insert(
- idx,
- key,
- keylen,
- size_needed,
- &r,
- &maybe_free
- );
+ bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
if (maybe_free) {
toku_free(maybe_free);
}
@@ -71,20 +65,24 @@ le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int va
memcpy(r->u.clean.val, val, vallen);
}
-static int
-long_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
const long *CAST_FROM_VOIDP(x, a->data);
const long *CAST_FROM_VOIDP(y, b->data);
return (*x > *y) - (*x < *y);
}
-static void
-test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
+static void test_serialize_leaf(int valsize,
+ int nelts,
+ double entropy,
+ int ser_runs,
+ int deser_runs) {
// struct ft_handle source_ft;
struct ftnode *sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -102,7 +100,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
MALLOC_N(sn->n_children, sn->bp);
sn->pivotkeys.create_empty();
for (int i = 0; i < sn->n_children; ++i) {
- BP_STATE(sn,i) = PT_AVAIL;
+ BP_STATE(sn, i) = PT_AVAIL;
set_BLB(sn, i, toku_create_empty_bn());
}
int nperbn = nelts / sn->n_children;
@@ -112,24 +110,19 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
k = ck * nperbn + i;
char buf[valsize];
int c;
- for (c = 0; c < valsize * entropy; ) {
- int *p = (int *) &buf[c];
+ for (c = 0; c < valsize * entropy;) {
+ int *p = (int *)&buf[c];
*p = rand();
c += sizeof(*p);
}
memset(&buf[c], 0, valsize - c);
le_add_to_bn(
- BLB_DATA(sn,ck),
- i,
- (char *)&k,
- sizeof k,
- buf,
- sizeof buf
- );
+ BLB_DATA(sn, ck), i, (char *)&k, sizeof k, buf, sizeof buf);
}
if (ck < 7) {
DBT pivotkey;
- sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
+ sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)),
+ ck);
}
}
@@ -139,31 +132,36 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(long_key_cmp, nullptr);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
struct timeval total_start;
@@ -176,8 +174,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
gettimeofday(&t[0], NULL);
ndd = NULL;
sn->dirty = 1;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
gettimeofday(&t[1], NULL);
total_start.tv_sec += t[0].tv_sec;
total_start.tv_usec += t[0].tv_usec;
@@ -186,12 +185,14 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
toku_free(ndd);
}
double dt;
- dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
+ dt = (total_end.tv_sec - total_start.tv_sec) +
+ ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
dt *= 1000;
dt /= ser_runs;
- printf("serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
+ printf(
+ "serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
- //reset
+ // reset
total_start.tv_sec = total_start.tv_usec = 0;
total_end.tv_sec = total_end.tv_usec = 0;
@@ -200,8 +201,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
bfe.create_for_full_read(ft_h);
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd2 = NULL;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
- assert(r==0);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
+ invariant(r == 0);
gettimeofday(&t[1], NULL);
total_start.tv_sec += t[0].tv_sec;
@@ -212,35 +214,46 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
toku_ftnode_free(&dn);
toku_free(ndd2);
}
- dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
+ dt = (total_end.tv_sec - total_start.tv_sec) +
+ ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
dt *= 1000;
dt /= deser_runs;
- printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
- printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n",
- tokutime_to_seconds(bfe.io_time)*1000,
- tokutime_to_seconds(bfe.decompress_time)*1000,
- tokutime_to_seconds(bfe.deserialize_time)*1000,
- deser_runs
- );
+ printf(
+ "deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
+ printf(
+ "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
+ "(average of %d runs)\n",
+ tokutime_to_seconds(bfe.io_time) * 1000,
+ tokutime_to_seconds(bfe.decompress_time) * 1000,
+ tokutime_to_seconds(bfe.deserialize_time) * 1000,
+ deser_runs);
toku_ftnode_free(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
ft_h->cmp.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
toku_free(ft);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
+static void test_serialize_nonleaf(int valsize,
+ int nelts,
+ double entropy,
+ int ser_runs,
+ int deser_runs) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -257,11 +270,11 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
MALLOC_N(sn.n_children, sn.bp);
sn.pivotkeys.create_empty();
for (int i = 0; i < sn.n_children; ++i) {
- BP_BLOCKNUM(&sn, i).b = 30 + (i*5);
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_BLOCKNUM(&sn, i).b = 30 + (i * 5);
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BNC(&sn, i, toku_create_empty_nl());
}
- //Create XIDS
+ // Create XIDS
XIDS xids_0 = toku_xids_get_root_xids();
XIDS xids_123;
r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
@@ -276,14 +289,23 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
k = ck * nperchild + i;
char buf[valsize];
int c;
- for (c = 0; c < valsize * entropy; ) {
- int *p = (int *) &buf[c];
+ for (c = 0; c < valsize * entropy;) {
+ int *p = (int *)&buf[c];
*p = rand();
c += sizeof(*p);
}
memset(&buf[c], 0, valsize - c);
- toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp);
+ toku_bnc_insert_msg(bnc,
+ &k,
+ sizeof k,
+ buf,
+ valsize,
+ FT_NONE,
+ next_dummymsn(),
+ xids_123,
+ true,
+ cmp);
}
if (ck < 7) {
DBT pivotkey;
@@ -291,7 +313,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
}
}
- //Cleanup:
+ // Cleanup:
toku_xids_destroy(&xids_0);
toku_xids_destroy(&xids_123);
cmp.destroy();
@@ -302,65 +324,78 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(long_key_cmp, nullptr);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
struct timeval t[2];
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd = NULL;
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+ invariant(r == 0);
gettimeofday(&t[1], NULL);
double dt;
- dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
+ dt = (t[1].tv_sec - t[0].tv_sec) +
+ ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
dt *= 1000;
- printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
+ printf(
+ "serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_h);
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd2 = NULL;
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
- assert(r==0);
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
+ invariant(r == 0);
gettimeofday(&t[1], NULL);
- dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
+ dt = (t[1].tv_sec - t[0].tv_sec) +
+ ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
dt *= 1000;
- printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
- printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n",
- tokutime_to_seconds(bfe.io_time)*1000,
- tokutime_to_seconds(bfe.decompress_time)*1000,
- tokutime_to_seconds(bfe.deserialize_time)*1000,
- deser_runs
- );
+ printf(
+ "deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
+ printf(
+ "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
+ "(IGNORED RUNS=%d)\n",
+ tokutime_to_seconds(bfe.io_time) * 1000,
+ tokutime_to_seconds(bfe.decompress_time) * 1000,
+ tokutime_to_seconds(bfe.deserialize_time) * 1000,
+ deser_runs);
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
ft_h->cmp.destroy();
@@ -369,17 +404,21 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
toku_free(ndd);
toku_free(ndd2);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
const int DEFAULT_RUNS = 5;
long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS;
double entropy = 0.3;
if (argc != 3 && argc != 5) {
- fprintf(stderr, "Usage: %s <valsize> <nelts> [<serialize_runs> <deserialize_runs>]\n", argv[0]);
+ fprintf(stderr,
+ "Usage: %s <valsize> <nelts> [<serialize_runs> "
+ "<deserialize_runs>]\n",
+ argv[0]);
fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS);
return 2;
}
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
index 332aaa0c170..0cddaf19651 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
@@ -39,26 +39,20 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "test.h"
#include "bndata.h"
-
-
#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif
-static size_t
-le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const char *val, int valsize)
-{
+static size_t le_add_to_bn(bn_data *bn,
+ uint32_t idx,
+ const char *key,
+ int keysize,
+ const char *val,
+ int valsize) {
LEAFENTRY r = NULL;
uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize);
void *maybe_free = nullptr;
- bn->get_space_for_insert(
- idx,
- key,
- keysize,
- size_needed,
- &r,
- &maybe_free
- );
+ bn->get_space_for_insert(idx, key, keysize, size_needed, &r, &maybe_free);
if (maybe_free) {
toku_free(maybe_free);
}
@@ -70,16 +64,19 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const cha
}
class test_key_le_pair {
- public:
+ public:
uint32_t keylen;
- char* keyp;
+ char *keyp;
LEAFENTRY le;
test_key_le_pair() : keylen(), keyp(), le() {}
void init(const char *_keyp, const char *_val) {
init(_keyp, strlen(_keyp) + 1, _val, strlen(_val) + 1);
}
- void init(const char * _keyp, uint32_t _keylen, const char*_val, uint32_t _vallen) {
+ void init(const char *_keyp,
+ uint32_t _keylen,
+ const char *_val,
+ uint32_t _vallen) {
keylen = _keylen;
CAST_FROM_VOIDP(le, toku_malloc(LE_CLEAN_MEMSIZE(_vallen)));
@@ -95,126 +92,144 @@ class test_key_le_pair {
}
};
-enum ftnode_verify_type {
- read_all=1,
- read_compressed,
- read_none
-};
+enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
-static int
-string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
char *CAST_FROM_VOIDP(s, a->data);
char *CAST_FROM_VOIDP(t, b->data);
return strcmp(s, t);
}
-static void
-setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) {
+static void setup_dn(enum ftnode_verify_type bft,
+ int fd,
+ FT ft_h,
+ FTNODE *dn,
+ FTNODE_DISK_DATA *ndd) {
int r;
if (bft == read_all) {
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_h);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
- assert(r==0);
- }
- else if (bft == read_compressed || bft == read_none) {
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe);
+ invariant(r == 0);
+ } else if (bft == read_compressed || bft == read_none) {
ftnode_fetch_extra bfe;
bfe.create_for_min_read(ft_h);
- r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
- assert(r==0);
- // assert all bp's are compressed or on disk.
+ r = toku_deserialize_ftnode_from(
+ fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe);
+ invariant(r == 0);
+ // invariant all bp's are compressed or on disk.
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED || BP_STATE(*dn, i) == PT_ON_DISK);
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED ||
+ BP_STATE(*dn, i) == PT_ON_DISK);
}
// if read_none, get rid of the compressed bp's
if (bft == read_none) {
if ((*dn)->height == 0) {
- toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
- // assert all bp's are on disk
+ toku_ftnode_pe_callback(*dn,
+ make_pair_attr(0xffffffff),
+ ft_h,
+ def_pe_finalize_impl,
+ nullptr);
+ // invariant all bp's are on disk
for (int i = 0; i < (*dn)->n_children; i++) {
if ((*dn)->height == 0) {
- assert(BP_STATE(*dn,i) == PT_ON_DISK);
- assert(is_BNULL(*dn, i));
- }
- else {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED);
+ invariant(BP_STATE(*dn, i) == PT_ON_DISK);
+ invariant(is_BNULL(*dn, i));
+ } else {
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
}
}
- }
- else {
+ } else {
// first decompress everything, and make sure
// that it is available
// then run partial eviction to get it compressed
PAIR_ATTR attr;
bfe.create_for_full_read(ft_h);
- assert(toku_ftnode_pf_req_callback(*dn, &bfe));
+ invariant(toku_ftnode_pf_req_callback(*dn, &bfe));
r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
- assert(r==0);
- // assert all bp's are available
+ invariant(r == 0);
+ // invariant all bp's are available
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
- toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ toku_ftnode_pe_callback(*dn,
+ make_pair_attr(0xffffffff),
+ ft_h,
+ def_pe_finalize_impl,
+ nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- // assert all bp's are still available, because we touched the clock
- assert(BP_STATE(*dn,i) == PT_AVAIL);
- // now assert all should be evicted
- assert(BP_SHOULD_EVICT(*dn, i));
+ // invariant all bp's are still available, because we touched
+ // the clock
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
+ // now invariant all should be evicted
+ invariant(BP_SHOULD_EVICT(*dn, i));
}
- toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+ toku_ftnode_pe_callback(*dn,
+ make_pair_attr(0xffffffff),
+ ft_h,
+ def_pe_finalize_impl,
+ nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_COMPRESSED);
+ invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
}
}
}
// now decompress them
bfe.create_for_full_read(ft_h);
- assert(toku_ftnode_pf_req_callback(*dn, &bfe));
+ invariant(toku_ftnode_pf_req_callback(*dn, &bfe));
PAIR_ATTR attr;
r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
- assert(r==0);
- // assert all bp's are available
+ invariant(r == 0);
+ // invariant all bp's are available
for (int i = 0; i < (*dn)->n_children; i++) {
- assert(BP_STATE(*dn,i) == PT_AVAIL);
+ invariant(BP_STATE(*dn, i) == PT_AVAIL);
}
// continue on with test
- }
- else {
+ } else {
// if we get here, this is a test bug, NOT a bug in development code
- assert(false);
+ invariant(false);
}
}
-static void write_sn_to_disk(int fd, FT_HANDLE ft, FTNODE sn, FTNODE_DISK_DATA* src_ndd, bool do_clone) {
+static void write_sn_to_disk(int fd,
+ FT_HANDLE ft,
+ FTNODE sn,
+ FTNODE_DISK_DATA *src_ndd,
+ bool do_clone) {
int r;
if (do_clone) {
- void* cloned_node_v = NULL;
+ void *cloned_node_v = NULL;
PAIR_ATTR attr;
long clone_size;
- toku_ftnode_clone_callback(sn, &cloned_node_v, &clone_size, &attr, false, ft->ft);
+ toku_ftnode_clone_callback(
+ sn, &cloned_node_v, &clone_size, &attr, false, ft->ft);
FTNODE CAST_FROM_VOIDP(cloned_node, cloned_node_v);
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false);
- assert(r==0);
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false);
+ invariant(r == 0);
toku_ftnode_free(&cloned_node);
- }
- else {
- r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false);
- assert(r==0);
+ } else {
+ r = toku_serialize_ftnode_to(
+ fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false);
+ invariant(r == 0);
}
}
-static void
-test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_check_msn(enum ftnode_verify_type bft,
+ bool do_clone) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
-#define PRESERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 42 })
-#define POSTSERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 84 })
+#define PRESERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 42})
+#define POSTSERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 84})
sn.max_msn_applied_to_node_on_disk = PRESERIALIZE_MSN_ON_DISK;
sn.flags = 0x11223344;
@@ -228,14 +243,14 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
MALLOC_N(sn.n_children, sn.bp);
DBT pivotkey;
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
set_BLB(&sn, 0, toku_create_empty_bn());
set_BLB(&sn, 1, toku_create_empty_bn());
le_add_to_bn(BLB_DATA(&sn, 0), 0, "a", 2, "aval", 5);
le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5);
le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5);
- BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN) { MIN_MSN.msn + 73 });
+ BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN){MIN_MSN.msn + 73});
BLB_MAX_MSN_APPLIED(&sn, 1) = POSTSERIALIZE_MSN_ON_DISK;
FT_HANDLE XMALLOC(ft);
@@ -244,30 +259,35 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
- //Want to use block #20
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
FTNODE_DISK_DATA dest_ndd = NULL;
@@ -276,16 +296,18 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
- assert(dn->height == 0);
- assert(dn->n_children>=1);
- assert(dn->max_msn_applied_to_node_on_disk.msn == POSTSERIALIZE_MSN_ON_DISK.msn);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+ invariant(dn->height == 0);
+ invariant(dn->n_children >= 1);
+ invariant(dn->max_msn_applied_to_node_on_disk.msn ==
+ POSTSERIALIZE_MSN_ON_DISK.msn);
{
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
test_key_le_pair elts[3];
elts[0].init("a", "aval");
@@ -294,34 +316,41 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
const uint32_t npartitions = dn->n_children;
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(BLB_MAX_MSN_APPLIED(dn, bn).msn == POSTSERIALIZE_MSN_ON_DISK.msn);
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(BLB_MAX_MSN_APPLIED(dn, bn).msn ==
+ POSTSERIALIZE_MSN_ON_DISK.msn);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
- assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0);
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(elts[last_i].le));
+ invariant(memcmp(curr_le,
+ elts[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+ elts[last_i].keyp) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
}
-
}
- assert(last_i == 3);
+ invariant(last_i == 3);
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -329,17 +358,21 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft,
+ bool do_clone) {
int r;
struct ftnode sn, *dn;
- const int keylens = 256*1024, vallens = 0;
+ const int keylens = 256 * 1024, vallens = 0;
const uint32_t nrows = 8;
- // assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ // invariant(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.flags = 0x11223344;
@@ -354,21 +387,27 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
MALLOC_N(sn.n_children, sn.bp);
sn.pivotkeys.create_empty();
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn());
}
for (uint32_t i = 0; i < nrows; ++i) { // one basement per row
char key[keylens], val[vallens];
- key[keylens-1] = '\0';
+ key[keylens - 1] = '\0';
char c = 'a' + i;
- memset(key, c, keylens-1);
- le_add_to_bn(BLB_DATA(&sn, i), 0, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
- if (i < nrows-1) {
+ memset(key, c, keylens - 1);
+ le_add_to_bn(BLB_DATA(&sn, i),
+ 0,
+ (char *)&key,
+ sizeof(key),
+ (char *)&val,
+ sizeof(val));
+ if (i < nrows - 1) {
uint32_t keylen;
- void* curr_key;
+ void *curr_key;
BLB_DATA(&sn, i)->fetch_key_and_len(0, &keylen, &curr_key);
DBT pivotkey;
- sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), i);
+ sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen),
+ i);
}
}
@@ -378,29 +417,34 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
FTNODE_DISK_DATA dest_ndd = NULL;
@@ -408,55 +452,64 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone);
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
-
- assert(dn->blocknum.b==20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+ invariant(dn->blocknum.b == 20);
+
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
{
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
test_key_le_pair *les = new test_key_le_pair[nrows];
{
char key[keylens], val[vallens];
- key[keylens-1] = '\0';
+ key[keylens - 1] = '\0';
for (uint32_t i = 0; i < nrows; ++i) {
char c = 'a' + i;
- memset(key, c, keylens-1);
- les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val));
+ memset(key, c, keylens - 1);
+ les[i].init(
+ (char *)&key, sizeof(key), (char *)&val, sizeof(val));
}
}
const uint32_t npartitions = dn->n_children;
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
- assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+ invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
- assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0);
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(les[last_i].le));
+ invariant(memcmp(curr_le,
+ les[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+ les[last_i].keyp) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
}
}
- assert(last_i == nrows);
+ invariant(last_i == nrows);
delete[] les;
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -464,15 +517,19 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft,
+ bool do_clone) {
int r;
struct ftnode sn, *dn;
- const uint32_t nrows = 196*1024;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ const uint32_t nrows = 196 * 1024;
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.flags = 0x11223344;
@@ -487,14 +544,19 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
XMALLOC_N(sn.n_children, sn.bp);
sn.pivotkeys.create_empty();
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
- set_BLB(&sn, i, toku_create_empty_bn());
+ BP_STATE(&sn, i) = PT_AVAIL;
+ set_BLB(&sn, i, toku_create_empty_bn());
}
size_t total_size = 0;
for (uint32_t i = 0; i < nrows; ++i) {
uint32_t key = i;
uint32_t val = i;
- total_size += le_add_to_bn(BLB_DATA(&sn, 0), i, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
+ total_size += le_add_to_bn(BLB_DATA(&sn, 0),
+ i,
+ (char *)&key,
+ sizeof(key),
+ (char *)&val,
+ sizeof(val));
}
FT_HANDLE XMALLOC(ft);
@@ -503,30 +565,35 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
@@ -535,56 +602,66 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
{
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
test_key_le_pair *les = new test_key_le_pair[nrows];
{
int key = 0, val = 0;
for (uint32_t i = 0; i < nrows; ++i, key++, val++) {
- les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val));
+ les[i].init(
+ (char *)&key, sizeof(key), (char *)&val, sizeof(val));
}
}
const uint32_t npartitions = dn->n_children;
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
- assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+ invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
- assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data);
- void* tmp = les[last_i].keyp;
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(les[last_i].le));
+ invariant(memcmp(curr_le,
+ les[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ uint32_t *CAST_FROM_VOIDP(pivot,
+ dn->pivotkeys.get_pivot(bn).data);
+ void *tmp = les[last_i].keyp;
uint32_t *CAST_FROM_VOIDP(item, tmp);
- assert(*pivot >= *item);
+ invariant(*pivot >= *item);
}
// TODO for later, get a key comparison here as well
last_i++;
}
// don't check soft_copy_is_up_to_date or seqinsert
- assert(BLB_DATA(dn, bn)->get_disk_size() < 128*1024); // BN_MAX_SIZE, apt to change
+ invariant(BLB_DATA(dn, bn)->get_disk_size() <
+ 128 * 1024); // BN_MAX_SIZE, apt to change
}
- assert(last_i == nrows);
+ invariant(last_i == nrows);
delete[] les;
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -592,19 +669,22 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-
-static void
-test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft,
+ bool do_clone) {
int r;
struct ftnode sn, *dn;
const uint32_t nrows = 7;
const size_t key_size = 8;
- const size_t val_size = 512*1024;
- // assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ const size_t val_size = 512 * 1024;
+ // invariant(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.flags = 0x11223344;
@@ -615,21 +695,21 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
sn.n_children = 1;
sn.dirty = 1;
sn.oldest_referenced_xid_known = TXNID_NONE;
-
+
MALLOC_N(sn.n_children, sn.bp);
sn.pivotkeys.create_empty();
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn());
}
for (uint32_t i = 0; i < nrows; ++i) {
char key[key_size], val[val_size];
- key[key_size-1] = '\0';
- val[val_size-1] = '\0';
+ key[key_size - 1] = '\0';
+ val[val_size - 1] = '\0';
char c = 'a' + i;
- memset(key, c, key_size-1);
- memset(val, c, val_size-1);
- le_add_to_bn(BLB_DATA(&sn, 0), i,key, 8, val, val_size);
+ memset(key, c, key_size - 1);
+ memset(val, c, val_size - 1);
+ le_add_to_bn(BLB_DATA(&sn, 0), i, key, 8, val, val_size);
}
FT_HANDLE XMALLOC(ft);
@@ -638,30 +718,35 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
@@ -670,58 +755,66 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
{
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
test_key_le_pair *les = new test_key_le_pair[nrows];
{
char key[key_size], val[val_size];
- key[key_size-1] = '\0';
- val[val_size-1] = '\0';
+ key[key_size - 1] = '\0';
+ val[val_size - 1] = '\0';
for (uint32_t i = 0; i < nrows; ++i) {
char c = 'a' + i;
- memset(key, c, key_size-1);
- memset(val, c, val_size-1);
+ memset(key, c, key_size - 1);
+ memset(val, c, val_size - 1);
les[i].init(key, key_size, val, val_size);
}
}
const uint32_t npartitions = dn->n_children;
- assert(npartitions == nrows);
+ invariant(npartitions == nrows);
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
- assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+ invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
- assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0);
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(les[last_i].le));
+ invariant(memcmp(curr_le,
+ les[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+ (char *)(les[last_i].keyp)) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
}
// don't check soft_copy_is_up_to_date or seqinsert
}
- assert(last_i == 7);
+ invariant(last_i == 7);
delete[] les;
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -729,15 +822,19 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-
-static void
-test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_empty_basement_nodes(
+ enum ftnode_verify_type bft,
+ bool do_clone) {
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -760,7 +857,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
toku_fill_dbt(&pivotkeys[5], "x", 2);
sn.pivotkeys.create_from_dbts(pivotkeys, 6);
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn());
BLB_SEQINSERT(&sn, i) = 0;
}
@@ -774,30 +871,35 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
FTNODE_DISK_DATA dest_ndd = NULL;
@@ -805,17 +907,18 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
- assert(dn->height == 0);
- assert(dn->n_children>0);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+ invariant(dn->height == 0);
+ invariant(dn->n_children > 0);
{
test_key_le_pair elts[3];
- // Man, this is way too ugly. This entire test suite needs to be refactored.
+ // Man, this is way too ugly. This entire test suite needs to be
+ // refactored.
// Create a dummy mempool and put the leaves there. Ugh.
elts[0].init("a", "aval");
elts[1].init("b", "bval");
@@ -823,33 +926,39 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
const uint32_t npartitions = dn->n_children;
uint32_t last_i = 0;
for (uint32_t bn = 0; bn < npartitions; ++bn) {
- assert(dest_ndd[bn].start > 0);
- assert(dest_ndd[bn].size > 0);
+ invariant(dest_ndd[bn].start > 0);
+ invariant(dest_ndd[bn].size > 0);
if (bn > 0) {
- assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+ invariant(dest_ndd[bn].start >=
+ dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
}
for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
LEAFENTRY curr_le;
uint32_t curr_keylen;
- void* curr_key;
- BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
- assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
- assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
- if (bn < npartitions-1) {
- assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0);
+ void *curr_key;
+ BLB_DATA(dn, bn)
+ ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+ invariant(leafentry_memsize(curr_le) ==
+ leafentry_memsize(elts[last_i].le));
+ invariant(memcmp(curr_le,
+ elts[last_i].le,
+ leafentry_memsize(curr_le)) == 0);
+ if (bn < npartitions - 1) {
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+ (char *)(elts[last_i].keyp)) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
}
-
}
- assert(last_i == 3);
+ invariant(last_i == 3);
}
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -857,14 +966,19 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-static void
-test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_multiple_empty_basement_nodes(
+ enum ftnode_verify_type bft,
+ bool do_clone) {
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -884,7 +998,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
toku_fill_dbt(&pivotkeys[2], "A", 2);
sn.pivotkeys.create_from_dbts(pivotkeys, 3);
for (int i = 0; i < sn.n_children; ++i) {
- BP_STATE(&sn,i) = PT_AVAIL;
+ BP_STATE(&sn, i) = PT_AVAIL;
set_BLB(&sn, i, toku_create_empty_bn());
}
@@ -894,30 +1008,35 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
@@ -926,29 +1045,31 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
- assert(dn->height == 0);
- assert(dn->n_children == 1);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+ invariant(dn->height == 0);
+ invariant(dn->n_children == 1);
{
const uint32_t npartitions = dn->n_children;
for (uint32_t i = 0; i < npartitions; ++i) {
- assert(dest_ndd[i].start > 0);
- assert(dest_ndd[i].size > 0);
+ invariant(dest_ndd[i].start > 0);
+ invariant(dest_ndd[i].size > 0);
if (i > 0) {
- assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size);
+ invariant(dest_ndd[i].start >=
+ dest_ndd[i - 1].start + dest_ndd[i - 1].size);
}
- assert(BLB_DATA(dn, i)->num_klpairs() == 0);
+ invariant(BLB_DATA(dn, i)->num_klpairs() == 0);
}
}
-
+
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
toku_free(ft_h->h);
toku_free(ft_h);
@@ -956,16 +1077,18 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-
-static void
-test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
- int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+ int fd = open(TOKU_TEST_FILENAME,
+ O_RDWR | O_CREAT | O_BINARY,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ invariant(fd >= 0);
int r;
@@ -984,11 +1107,11 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
BP_BLOCKNUM(&sn, 0).b = 30;
BP_BLOCKNUM(&sn, 1).b = 35;
- BP_STATE(&sn,0) = PT_AVAIL;
- BP_STATE(&sn,1) = PT_AVAIL;
+ BP_STATE(&sn, 0) = PT_AVAIL;
+ BP_STATE(&sn, 1) = PT_AVAIL;
set_BNC(&sn, 0, toku_create_empty_nl());
set_BNC(&sn, 1, toku_create_empty_nl());
- //Create XIDS
+ // Create XIDS
XIDS xids_0 = toku_xids_get_root_xids();
XIDS xids_123;
XIDS xids_234;
@@ -1000,11 +1123,38 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
toku::comparator cmp;
cmp.create(string_key_cmp, nullptr);
- toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
- toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
- toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
-
- //Cleanup:
+ toku_bnc_insert_msg(BNC(&sn, 0),
+ "a",
+ 2,
+ "aval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_0,
+ true,
+ cmp);
+ toku_bnc_insert_msg(BNC(&sn, 0),
+ "b",
+ 2,
+ "bval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_123,
+ false,
+ cmp);
+ toku_bnc_insert_msg(BNC(&sn, 1),
+ "x",
+ 2,
+ "xval",
+ 5,
+ FT_NONE,
+ next_dummymsn(),
+ xids_234,
+ true,
+ cmp);
+
+ // Cleanup:
toku_xids_destroy(&xids_0);
toku_xids_destroy(&xids_123);
toku_xids_destroy(&xids_234);
@@ -1016,31 +1166,36 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
make_blocknum(0),
ZERO_LSN,
TXNID_NONE,
- 4*1024*1024,
- 128*1024,
+ 4 * 1024 * 1024,
+ 128 * 1024,
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
ft_h->cmp.create(string_key_cmp, nullptr);
ft->ft = ft_h;
-
+
ft_h->blocktable.create();
- { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
- //Want to use block #20
+ {
+ int r_truncate = ftruncate(fd, 0);
+ CKERR(r_truncate);
+ }
+ // Want to use block #20
BLOCKNUM b = make_blocknum(0);
while (b.b < 20) {
ft_h->blocktable.allocate_blocknum(&b, ft_h);
}
- assert(b.b == 20);
+ invariant(b.b == 20);
{
DISKOFF offset;
DISKOFF size;
- ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
- assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
- assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
- assert(size == 100);
+ invariant(offset ==
+ (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ invariant(size == 100);
}
FTNODE_DISK_DATA src_ndd = NULL;
FTNODE_DISK_DATA dest_ndd = NULL;
@@ -1048,30 +1203,31 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
- assert(dn->blocknum.b==20);
+ invariant(dn->blocknum.b == 20);
- assert(dn->layout_version ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
- assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
- assert(dn->height == 1);
- assert(dn->n_children==2);
- assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0);
- assert(dn->pivotkeys.get_pivot(0).size==6);
- assert(BP_BLOCKNUM(dn,0).b==30);
- assert(BP_BLOCKNUM(dn,1).b==35);
+ invariant(dn->layout_version == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+ invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+ invariant(dn->height == 1);
+ invariant(dn->n_children == 2);
+ invariant(strcmp((char *)dn->pivotkeys.get_pivot(0).data, "hello") == 0);
+ invariant(dn->pivotkeys.get_pivot(0).size == 6);
+ invariant(BP_BLOCKNUM(dn, 0).b == 30);
+ invariant(BP_BLOCKNUM(dn, 1).b == 35);
message_buffer *src_msg_buffer1 = &BNC(&sn, 0)->msg_buffer;
message_buffer *src_msg_buffer2 = &BNC(&sn, 1)->msg_buffer;
message_buffer *dest_msg_buffer1 = &BNC(dn, 0)->msg_buffer;
message_buffer *dest_msg_buffer2 = &BNC(dn, 1)->msg_buffer;
- assert(src_msg_buffer1->equals(dest_msg_buffer1));
- assert(src_msg_buffer2->equals(dest_msg_buffer2));
+ invariant(src_msg_buffer1->equals(dest_msg_buffer1));
+ invariant(src_msg_buffer2->equals(dest_msg_buffer2));
toku_ftnode_free(&dn);
toku_destroy_ftnode_internals(&sn);
- ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+ ft_h->blocktable.block_free(
+ BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
ft_h->blocktable.destroy();
ft_h->cmp.destroy();
toku_free(ft_h->h);
@@ -1080,11 +1236,12 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
toku_free(src_ndd);
toku_free(dest_ndd);
- r = close(fd); assert(r != -1);
+ r = close(fd);
+ invariant(r != -1);
}
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+ const char *argv[] __attribute__((__unused__))) {
initialize_dummymsn();
test_serialize_nonleaf(read_none, false);
@@ -1103,10 +1260,12 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, false);
test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, false);
- test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, false);
+ test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed,
+ false);
test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, true);
test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, true);
- test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, true);
+ test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed,
+ true);
test_serialize_leaf_with_empty_basement_nodes(read_none, false);
test_serialize_leaf_with_empty_basement_nodes(read_all, false);
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
index 598a1cc7085..706bd94fbc3 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
@@ -164,17 +164,16 @@ static void test_read_what_was_written (void) {
int r;
const int NVALS=10000;
- if (verbose) printf("test_read_what_was_written(): "); fflush(stdout);
+ if (verbose) {
+ printf("test_read_what_was_written(): "); fflush(stdout);
+ }
unlink(fname);
-
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
- toku_cachetable_close(&ct);
-
-
+ toku_cachetable_close(&ct);
/* Now see if we can read an empty tree in. */
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
@@ -189,8 +188,6 @@ static void test_read_what_was_written (void) {
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
toku_cachetable_close(&ct);
-
-
/* Now see if we can read it in and get the value. */
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
diff --git a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
index 53973794eae..aeb5a897c48 100644
--- a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
@@ -109,7 +109,9 @@ static int run_test(void)
r = pqueue_pop(pq, &node); assert(r==0);
if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data));
if ( *(int*)(node->key->data) != i ) {
- if (verbose) printf("FAIL\n"); return -1;
+ if (verbose)
+ printf("FAIL\n");
+ return -1;
}
}
pqueue_free(pq);
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
index a78f787cdf2..f2004964862 100644
--- a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
@@ -793,7 +793,7 @@ static void test_le_garbage_collection_birdie(void) {
do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200);
invariant(do_garbage_collect);
- // It is definately worth doing when the above case is true
+ // It is definitely worth doing when the above case is true
// and there is more than one provisional entry.
ule.num_cuxrs = 1;
ule.num_puxrs = 2;
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
index 419af550545..71357a1e16a 100644
--- a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
@@ -72,7 +72,7 @@ static void dummy_update_status(FTNODE UU(child), int UU(dirtied), void* UU(extr
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
-static void test_oldest_referenced_xid_gets_propogated(void) {
+static void test_oldest_referenced_xid_gets_propagated(void) {
int r;
CACHETABLE ct;
FT_HANDLE t;
@@ -166,7 +166,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
toku_ft_flush_some_child(t->ft, node, &fa);
// pin the child, verify that oldest referenced xid was
- // propogated from parent to child during the flush
+ // propagated from parent to child during the flush
toku_pin_ftnode(
t->ft,
child_nonleaf_blocknum,
@@ -185,6 +185,6 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
default_parse_args(argc, argv);
- test_oldest_referenced_xid_gets_propogated();
+ test_oldest_referenced_xid_gets_propagated();
return 0;
}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc
index 8aded3898c1..ea4f9374dc3 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h
+++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc
@@ -36,30 +36,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-#pragma once
-
-#include <db.h>
-
-#include "ft/serialize/block_allocator.h"
-
-// Block allocation strategy implementations
-
-class block_allocator_strategy {
-public:
- static struct block_allocator::blockpair *
- first_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
- static struct block_allocator::blockpair *
- best_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
- static struct block_allocator::blockpair *
- padded_fit(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
- static struct block_allocator::blockpair *
- heat_zone(struct block_allocator::blockpair *blocks_array,
- uint64_t n_blocks, uint64_t size, uint64_t alignment,
- uint64_t heat);
-};
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+static void test_insert_remove(void) {
+ uint64_t i;
+ MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+ verbose = 0;
+
+ tree->Insert({0, 100});
+
+ for (i = 0; i < 10; i++) {
+ tree->Remove(3);
+ tree->Remove(2);
+ }
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+
+ for (i = 0; i < 10; i++) {
+ tree->Insert({5 * i, 3});
+ }
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+
+ uint64_t offset = tree->Remove(2);
+ invariant(offset == 0);
+ offset = tree->Remove(10);
+ invariant(offset == 50);
+ offset = tree->Remove(3);
+ invariant(offset == 5);
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+
+ tree->Insert({48, 2});
+ tree->Insert({50, 10});
+
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+
+ tree->Insert({3, 7});
+ offset = tree->Remove(10);
+ invariant(offset == 2);
+ tree->ValidateBalance();
+ tree->ValidateMhs();
+ tree->Dump();
+ delete tree;
+}
+
+int test_main(int argc, const char *argv[]) {
+ default_parse_args(argc, argv);
+
+ test_insert_remove();
+ if (verbose)
+ printf("test ok\n");
+ return 0;
+}
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
new file mode 100644
index 00000000000..cefe66335a6
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
@@ -0,0 +1,103 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License, version 3,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+#define N 1000000
+std::vector<MhsRbTree::Node::BlockPair> input_vector;
+MhsRbTree::Node::BlockPair old_vector[N];
+
+static int myrandom(int i) { return std::rand() % i; }
+
+static void generate_random_input() {
+ std::srand(unsigned(std::time(0)));
+
+ // set some values:
+ for (uint64_t i = 0; i < N; ++i) {
+ MhsRbTree::Node::BlockPair bp = {i+1, 0};
+ input_vector.push_back(bp);
+ old_vector[i] = bp;
+ }
+ // using built-in random generator:
+ std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom);
+}
+
+static void test_insert_remove(void) {
+ int i;
+ MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+ verbose = 0;
+ generate_random_input();
+ if (verbose) {
+ printf("\n we are going to insert the following block offsets\n");
+ for (i = 0; i < N; i++)
+ printf("%" PRIu64 "\t", input_vector[i]._offset.ToInt());
+ }
+ for (i = 0; i < N; i++) {
+ tree->Insert(input_vector[i]);
+ // tree->ValidateBalance();
+ }
+ tree->ValidateBalance();
+ MhsRbTree::Node::BlockPair *p_bps = &old_vector[0];
+ tree->ValidateInOrder(p_bps);
+ printf("min node of the tree:%" PRIu64 "\n",
+ rbn_offset(tree->MinNode()).ToInt());
+ printf("max node of the tree:%" PRIu64 "\n",
+ rbn_offset(tree->MaxNode()).ToInt());
+
+ for (i = 0; i < N; i++) {
+ // tree->ValidateBalance();
+ tree->RawRemove(input_vector[i]._offset.ToInt());
+ }
+
+ tree->Destroy();
+ delete tree;
+}
+
+int test_main(int argc, const char *argv[]) {
+ default_parse_args(argc, argv);
+
+ test_insert_remove();
+ if (verbose)
+ printf("test ok\n");
+ return 0;
+}
diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc
index 407116b983c..9f3977743a0 100644
--- a/storage/tokudb/PerconaFT/ft/txn/roll.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc
@@ -38,18 +38,18 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
/* rollback and rollforward routines. */
-
-#include "ft/ft.h"
+#include <memory>
#include "ft/ft-ops.h"
+#include "ft/ft.h"
#include "ft/log_header.h"
#include "ft/logger/log-internal.h"
-#include "ft/txn/xids.h"
#include "ft/txn/rollback-apply.h"
+#include "ft/txn/xids.h"
// functionality provided by roll.c is exposed by an autogenerated
// header file, logheader.h
//
-// this (poorly) explains the absense of "roll.h"
+// this (poorly) explains the absence of "roll.h"
// these flags control whether or not we send commit messages for
// various operations
@@ -162,10 +162,122 @@ toku_rollback_fcreate (FILENUM filenum,
// directory row lock for its dname) and we would not get this
// far if there were other live handles.
toku_cachefile_unlink_on_close(cf);
+ toku_cachefile_skip_log_recover_on_close(cf);
done:
return 0;
}
+int toku_commit_frename(BYTESTRING /* old_name */,
+ BYTESTRING /* new_iname */,
+ TOKUTXN /* txn */,
+ LSN UU(oplsn)) {
+ return 0;
+}
+
+int toku_rollback_frename(BYTESTRING old_iname,
+ BYTESTRING new_iname,
+ TOKUTXN txn,
+ LSN UU(oplsn)) {
+ assert(txn);
+ assert(txn->logger);
+ assert(txn->logger->ct);
+
+ CACHETABLE cachetable = txn->logger->ct;
+
+ toku_struct_stat stat;
+ bool old_exist = true;
+ bool new_exist = true;
+
+ std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(
+ toku_cachetable_get_fname_in_cwd(cachetable, old_iname.data),
+ &toku_free);
+ std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(
+ toku_cachetable_get_fname_in_cwd(cachetable, new_iname.data),
+ &toku_free);
+
+ if (toku_stat(old_iname_full.get(), &stat) == -1) {
+ if (ENOENT == errno)
+ old_exist = false;
+ else
+ return 1;
+ }
+
+ if (toku_stat(new_iname_full.get(), &stat) == -1) {
+ if (ENOENT == errno)
+ new_exist = false;
+ else
+ return 1;
+ }
+
+ // Both old and new files can exist if:
+ // - rename() is not completed
+ // - fcreate was replayed during recovery
+ // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains
+ // closed but not yet evicted cachefiles and the key of this container is
+ // fs-dependent file id - (device id, inode number) pair. To preserve the
+ // new cachefile
+ // file's id and keep it in 'stalled cachefiles' container the old file is
+ // removed
+ // and the new file is renamed.
+ if (old_exist && new_exist &&
+ (toku_os_unlink(old_iname_full.get()) == -1 ||
+ toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
+ toku_fsync_directory(new_iname_full.get()) == -1 ||
+ toku_fsync_directory(old_iname_full.get()) == -1))
+ return 1;
+
+ if (!old_exist && new_exist &&
+ (toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
+ toku_fsync_directory(new_iname_full.get()) == -1 ||
+ toku_fsync_directory(old_iname_full.get()) == -1))
+ return 1;
+
+ // it's ok if both files do not exist on recovery
+ if (!old_exist && !new_exist)
+ assert(txn->for_recovery);
+
+ CACHEFILE cf;
+ int r = toku_cachefile_of_iname_in_env(cachetable, new_iname.data, &cf);
+ if (r != ENOENT) {
+ char *old_fname_in_cf = toku_cachefile_fname_in_env(cf);
+ toku_cachefile_set_fname_in_env(cf, toku_xstrdup(old_iname.data));
+ toku_free(old_fname_in_cf);
+ // There is at least one case when fclose logging cause error:
+ // 1) start transaction
+ // 2) create ft 'a'(write "fcreate" in recovery log)
+ // 3) rename ft 'a' to 'b'(write "frename" in recovery log)
+ // 4) abort transaction:
+ // a) rollback rename ft (renames 'b' to 'a')
+ // b) rollback create ft (removes 'a'):
+ // invokes toku_cachefile_unlink_on_close - lazy unlink on file
+ // close,
+ // it just sets corresponding flag in cachefile object
+ // c) write "unlink" for 'a' in recovery log
+ // (when transaction is aborted all locks are released,
+ // when file lock is released the file is closed and unlinked if
+ // corresponding flag is set in cachefile object)
+ // 5) crash
+ //
+ // After this we have the following records in recovery log:
+ // - create ft 'a',
+ // - rename 'a' to 'b',
+ // - unlink 'a'
+ //
+ // On recovery:
+ // - create 'a'
+ // - rename 'a' to 'b'
+ // - unlink 'a' - as 'a' file does not exist we have crash on assert
+ // here
+ //
+ // There is no need to write "unlink" in recovery log in (4a) because
+ // 'a' will be removed
+ // on transaction rollback on recovery.
+ toku_cachefile_skip_log_recover_on_close(cf);
+ }
+
+ return 0;
+}
+
int find_ft_from_filenum (const FT &ft, const FILENUM &filenum);
int find_ft_from_filenum (const FT &ft, const FILENUM &filenum) {
FILENUM thisfnum = toku_cachefile_filenum(ft->cf);
diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
index df830afd0df..c9464c3ed60 100644
--- a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
@@ -169,7 +169,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
txn->roll_info.spilled_rollback_head = ROLLBACK_NONE;
txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE;
}
- // if we're commiting a child rollback, put its entries into the parent
+ // if we're committing a child rollback, put its entries into the parent
// by pinning both child and parent and then linking the child log entry
// list to the end of the parent log entry list.
if (txn_has_current_rollback_log(txn)) {
diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
index 68c94c2ad11..08d7c8874e5 100644
--- a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
@@ -59,21 +59,18 @@ rollback_log_destroy(ROLLBACK_LOG_NODE log) {
// flush an ununused log to disk, by allocating a size 0 blocknum in
// the blocktable
-static void
-toku_rollback_flush_unused_log(
- ROLLBACK_LOG_NODE log,
- BLOCKNUM logname,
- int fd,
- FT ft,
- bool write_me,
- bool keep_me,
- bool for_checkpoint,
- bool is_clone
- )
-{
+static void toku_rollback_flush_unused_log(ROLLBACK_LOG_NODE log,
+ BLOCKNUM logname,
+ int fd,
+ FT ft,
+ bool write_me,
+ bool keep_me,
+ bool for_checkpoint,
+ bool is_clone) {
if (write_me) {
DISKOFF offset;
- ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX);
+ ft->blocktable.realloc_on_disk(
+ logname, 0, &offset, ft, fd, for_checkpoint);
}
if (!keep_me && !is_clone) {
toku_free(log);
diff --git a/storage/tokudb/PerconaFT/ft/ule.cc b/storage/tokudb/PerconaFT/ft/ule.cc
index ac393fbf179..e3dce6d27dd 100644
--- a/storage/tokudb/PerconaFT/ft/ule.cc
+++ b/storage/tokudb/PerconaFT/ft/ule.cc
@@ -587,8 +587,8 @@ bool toku_le_worth_running_garbage_collection(
// by new txns.
// 2.) There is only one committed entry, but the outermost
// provisional entry is older than the oldest known referenced
-// xid, so it must have commited. Therefor we can promote it to
-// committed and get rid of the old commited entry.
+// xid, so it must have committed. Therefor we can promote it to
+// committed and get rid of the old committed entry.
if (le->type != LE_MVCC) {
return false;
}
diff --git a/storage/tokudb/PerconaFT/portability/CMakeLists.txt b/storage/tokudb/PerconaFT/portability/CMakeLists.txt
index 9f84d9b03df..4793db63cc1 100644
--- a/storage/tokudb/PerconaFT/portability/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/portability/CMakeLists.txt
@@ -14,12 +14,11 @@ set(tokuportability_srcs
)
add_library(${LIBTOKUPORTABILITY} SHARED ${tokuportability_srcs})
-target_link_libraries(${LIBTOKUPORTABILITY} LINK_PRIVATE ${LIBJEMALLOC})
target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
add_library(tokuportability_static_conv STATIC ${tokuportability_srcs})
set_target_properties(tokuportability_static_conv PROPERTIES POSITION_INDEPENDENT_CODE ON)
-set(tokuportability_source_libs tokuportability_static_conv ${LIBJEMALLOC} ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
+set(tokuportability_source_libs tokuportability_static_conv ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
toku_merge_static_libs(${LIBTOKUPORTABILITY}_static ${LIBTOKUPORTABILITY}_static "${tokuportability_source_libs}")
maybe_add_gcov_to_libraries(${LIBTOKUPORTABILITY} tokuportability_static_conv)
diff --git a/storage/tokudb/PerconaFT/portability/file.cc b/storage/tokudb/PerconaFT/portability/file.cc
index 5332a2dff55..0e3efc1a12a 100644
--- a/storage/tokudb/PerconaFT/portability/file.cc
+++ b/storage/tokudb/PerconaFT/portability/file.cc
@@ -356,6 +356,12 @@ toku_os_close(int fd) { // if EINTR, retry until success
return r;
}
+int toku_os_rename(const char *old_name, const char *new_name) {
+ return rename(old_name, new_name);
+}
+
+int toku_os_unlink(const char *path) { return unlink(path); }
+
ssize_t
toku_os_read(int fd, void *buf, size_t count) {
ssize_t r;
diff --git a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
index bc48e93937d..8e73c56a6c5 100644
--- a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
+++ b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
@@ -90,7 +90,13 @@ static bool check_huge_pages_in_practice(void)
const long pagesize = 4096;
const long n_pages = TWO_MB/pagesize;
+#ifdef __linux__
+ // On linux mincore is defined as mincore(void *, size_t, unsigned char *)
unsigned char vec[n_pages];
+#else
+ // On BSD (OS X included) it is defined as mincore(void *, size_t, char *)
+ char vec[n_pages];
+#endif
{
int r = mincore(second, TWO_MB, vec);
if (r!=0 && errno==ENOMEM) {
diff --git a/storage/tokudb/PerconaFT/portability/memory.cc b/storage/tokudb/PerconaFT/portability/memory.cc
index 2de12699c61..5430ff84b70 100644
--- a/storage/tokudb/PerconaFT/portability/memory.cc
+++ b/storage/tokudb/PerconaFT/portability/memory.cc
@@ -313,6 +313,15 @@ toku_strdup(const char *s) {
return (char *) toku_memdup(s, strlen(s)+1);
}
+char *toku_strndup(const char *s, size_t n) {
+ size_t s_size = strlen(s);
+ size_t bytes_to_copy = n > s_size ? s_size : n;
+ ++bytes_to_copy;
+ char *result = (char *)toku_memdup(s, bytes_to_copy);
+ result[bytes_to_copy - 1] = 0;
+ return result;
+}
+
void
toku_free(void *p) {
if (p) {
diff --git a/storage/tokudb/PerconaFT/portability/memory.h b/storage/tokudb/PerconaFT/portability/memory.h
index 7780536f279..5ae652d39fc 100644
--- a/storage/tokudb/PerconaFT/portability/memory.h
+++ b/storage/tokudb/PerconaFT/portability/memory.h
@@ -125,7 +125,9 @@ size_t toku_malloc_usable_size(void *p) __attribute__((__visibility__("default")
void *toku_memdup (const void *v, size_t len);
/* Toku-version of strdup. Use this so that it calls toku_malloc() */
char *toku_strdup (const char *s) __attribute__((__visibility__("default")));
-
+/* Toku-version of strndup. Use this so that it calls toku_malloc() */
+char *toku_strndup(const char *s, size_t n)
+ __attribute__((__visibility__("default")));
/* Copy memory. Analogous to strdup() Crashes instead of returning NULL */
void *toku_xmemdup (const void *v, size_t len) __attribute__((__visibility__("default")));
/* Toku-version of strdup. Use this so that it calls toku_xmalloc() Crashes instead of returning NULL */
diff --git a/storage/tokudb/PerconaFT/portability/portability.cc b/storage/tokudb/PerconaFT/portability/portability.cc
index ba9f8d48ed5..19f445a85d7 100644
--- a/storage/tokudb/PerconaFT/portability/portability.cc
+++ b/storage/tokudb/PerconaFT/portability/portability.cc
@@ -63,6 +63,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#if defined(HAVE_SYS_SYSCTL_H)
# include <sys/sysctl.h>
#endif
+#if defined(HAVE_PTHREAD_H)
+# include <pthread.h>
+#endif
#if defined(HAVE_PTHREAD_NP_H)
# include <pthread_np.h>
#endif
@@ -102,7 +105,11 @@ toku_os_getpid(void) {
int
toku_os_gettid(void) {
-#if defined(__NR_gettid)
+#if defined(HAVE_PTHREAD_THREADID_NP)
+ uint64_t result;
+ pthread_threadid_np(NULL, &result);
+ return (int) result; // Used for instrumentation so overflow is ok here.
+#elif defined(__NR_gettid)
return syscall(__NR_gettid);
#elif defined(SYS_gettid)
return syscall(SYS_gettid);
diff --git a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
index 880f9a3a9bb..dbbea974a49 100644
--- a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
+++ b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
@@ -64,7 +64,7 @@ int main(int argc, char *const argv[]) {
if (verbose) printf("maxdata=%" PRIu64 " 0x%" PRIx64 "\n", maxdata, maxdata);
// check the data size
-#if __x86_64__
+#if defined(__x86_64__) || defined(__aarch64__)
assert(maxdata > (1ULL << 32));
#elif __i386__
assert(maxdata < (1ULL << 32));
diff --git a/storage/tokudb/PerconaFT/portability/tests/test-xid.cc b/storage/tokudb/PerconaFT/portability/tests/test-xid.cc
index 9ee68906bb3..71736f898ef 100644
--- a/storage/tokudb/PerconaFT/portability/tests/test-xid.cc
+++ b/storage/tokudb/PerconaFT/portability/tests/test-xid.cc
@@ -51,11 +51,18 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#if defined(HAVE_PTHREAD_NP_H)
# include <pthread_np.h>
#endif
+#if defined(HAVE_PTHREAD_H)
+# include <pthread.h>
+#endif
// since we implement the same thing here as in toku_os_gettid, this test
// is pretty pointless
static int gettid(void) {
-#if defined(__NR_gettid)
+#if defined(HAVE_PTHREAD_THREADID_NP)
+ uint64_t result;
+ pthread_threadid_np(NULL, &result);
+ return (int) result;
+#elif defined(__NR_gettid)
return syscall(__NR_gettid);
#elif defined(SYS_gettid)
return syscall(SYS_gettid);
diff --git a/storage/tokudb/PerconaFT/portability/toku_config.h.in b/storage/tokudb/PerconaFT/portability/toku_config.h.in
index e1412cc9e14..18f6779796f 100644
--- a/storage/tokudb/PerconaFT/portability/toku_config.h.in
+++ b/storage/tokudb/PerconaFT/portability/toku_config.h.in
@@ -42,7 +42,6 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#cmakedefine TOKU_DEBUG_PARANOID 1
#cmakedefine USE_VALGRIND 1
-
#cmakedefine HAVE_ALLOCA_H 1
#cmakedefine HAVE_ARPA_INET_H 1
#cmakedefine HAVE_BYTESWAP_H 1
@@ -88,6 +87,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#cmakedefine HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP 1
#cmakedefine HAVE_PTHREAD_YIELD 1
#cmakedefine HAVE_PTHREAD_YIELD_NP 1
+#cmakedefine HAVE_PTHREAD_THREADID_NP 1
#cmakedefine HAVE_PTHREAD_GETTHREADID_NP 1
#cmakedefine PTHREAD_YIELD_RETURNS_INT 1
diff --git a/storage/tokudb/PerconaFT/portability/toku_portability.h b/storage/tokudb/PerconaFT/portability/toku_portability.h
index 921d3a309f6..f127b0fe172 100644
--- a/storage/tokudb/PerconaFT/portability/toku_portability.h
+++ b/storage/tokudb/PerconaFT/portability/toku_portability.h
@@ -246,6 +246,8 @@ int toku_os_open(const char *path, int oflag, int mode);
int toku_os_open_direct(const char *path, int oflag, int mode);
int toku_os_close(int fd);
int toku_os_fclose(FILE * stream);
+int toku_os_rename(const char *old_name, const char *new_name);
+int toku_os_unlink(const char *path);
ssize_t toku_os_read(int fd, void *buf, size_t count);
ssize_t toku_os_pread(int fd, void *buf, size_t count, off_t offset);
void toku_os_recursive_delete(const char *path);
diff --git a/storage/tokudb/PerconaFT/portability/toku_time.h b/storage/tokudb/PerconaFT/portability/toku_time.h
index 11a3f3aa2b9..a1278ef0337 100644
--- a/storage/tokudb/PerconaFT/portability/toku_time.h
+++ b/storage/tokudb/PerconaFT/portability/toku_time.h
@@ -98,9 +98,17 @@ double tokutime_to_seconds(tokutime_t) __attribute__((__visibility__("default")
// Get the value of tokutime for right now. We want this to be fast, so we expose the implementation as RDTSC.
static inline tokutime_t toku_time_now(void) {
+#if defined(__x86_64__) || defined(__i386__)
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
+#elif defined (__aarch64__)
+ uint64_t result;
+ __asm __volatile__ ("mrs %[rt], cntvct_el0" : [rt] "=r" (result));
+ return result;
+#else
+#error No timer implementation for this platform
+#endif
}
static inline uint64_t toku_current_time_microsec(void) {
diff --git a/storage/tokudb/PerconaFT/src/indexer-internal.h b/storage/tokudb/PerconaFT/src/indexer-internal.h
index 48e62ee49b2..fdaa561e3d0 100644
--- a/storage/tokudb/PerconaFT/src/indexer-internal.h
+++ b/storage/tokudb/PerconaFT/src/indexer-internal.h
@@ -42,7 +42,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include <toku_pthread.h>
// the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array.
-// the array is a resizeable array with max size "max_keys" and current size "current_keys".
+// the array is a resizable array with max size "max_keys" and current size "current_keys".
// the ordered set is used by the hotindex undo function to collect the commit keys.
struct indexer_commit_keys {
int max_keys; // max number of keys
diff --git a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
index 8d0b080b9fe..4c7f5336161 100644
--- a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
+++ b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
@@ -528,7 +528,7 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u
}
// inject "delete" message into ft with logging in recovery and rollback logs,
-// and making assocation between txn and ft
+// and making association between txn and ft
static int
indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn) {
int result = 0;
@@ -577,7 +577,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
}
// inject "insert" message into ft with logging in recovery and rollback logs,
-// and making assocation between txn and ft
+// and making association between txn and ft
static int
indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn) {
int result = 0;
diff --git a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
index 47f6aa44a75..c01a8f0d628 100644
--- a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
@@ -108,11 +108,11 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
foreach(ov c d r)
if (ov STREQUAL c)
- set(gset 0)
set(hset 0)
+ set(iset 0)
else ()
- set(gset 0 1 2 3 4 5)
- set(hset 0 1)
+ set(hset 0 1 2 3 4 5)
+ set(iset 0 1)
endif ()
foreach(av 0 1)
@@ -130,25 +130,27 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
foreach(dv ${dset})
foreach(ev ${eset})
foreach(fv 0 1)
- foreach(gv ${gset})
+ foreach(gv 0 1)
foreach(hv ${hset})
-
- if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv)))
- set(iset 0 1)
- else ()
- set(iset 0)
- endif ()
-
foreach(iv ${iset})
- set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}")
- set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}")
- set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}.ctest-errors")
- add_test(NAME ${testname}
- COMMAND run_recovery_fileops_unit.sh $<TARGET_FILE:recovery_fileops_unit.tdb> ${errfile} 137
- -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv}
- )
- setup_toku_test_properties(${testname} ${envdir})
- set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}")
+
+ if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv)))
+ set(jset 0 1)
+ else ()
+ set(jset 0)
+ endif ()
+
+ foreach(jv ${jset})
+ set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}")
+ set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}")
+ set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}.ctest-errors")
+ add_test(NAME ${testname}
+ COMMAND run_recovery_fileops_unit.sh $<TARGET_FILE:recovery_fileops_unit.tdb> ${errfile} 137
+ -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv} -J ${jv}
+ )
+ setup_toku_test_properties(${testname} ${envdir})
+ set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}")
+ endforeach(jv)
endforeach(iv)
endforeach(hv)
endforeach(gv)
diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
index 20df13923e6..7cce68e6ff8 100644
--- a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
+++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
@@ -1,3 +1,3 @@
-# commited insert
+# committed insert
key k1
insert committed 0 v100
diff --git a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
index 3f2f8d7455a..aaf77c503cc 100644
--- a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
+++ b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
@@ -51,7 +51,7 @@ int DISALLOW_PUTS=0;
int COMPRESS=0;
enum {MAGIC=311};
-bool dup_row_at_end = false; // false: duplicate at the begining. true: duplicate at the end. The duplicated row is row 0.
+bool dup_row_at_end = false; // false: duplicate at the beginning. true: duplicate at the end. The duplicated row is row 0.
int dup_row_id = 0; // 0 means to use row 1 if inserting at the end, row NUM_ROWS if inserting at the beginning. Otherwise insert the row specified here.
//
diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
index a4dc0ea9236..cc99ab560d8 100644
--- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
+++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
@@ -36,17 +36,17 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-#include "test.h"
-#include "toku_pthread.h"
#include <db.h>
-#include <sys/stat.h>
#include <stdlib.h>
-
+#include <sys/stat.h>
+#include "ft/logger/logger.h"
+#include "test.h"
+#include "toku_pthread.h"
static int do_recover;
static int do_crash;
static char fileop;
-static int choices['I'-'A'+1];
+static int choices['J' - 'A' + 1];
const int num_choices = sizeof(choices)/sizeof(choices[0]);
static DB_TXN *txn;
const char *oldname = "oldfoo";
@@ -58,11 +58,14 @@ static char *cmd;
static void
usage(void) {
- fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# -F# [-G# -H# -I#]\n"
- " fileop = c/r/d (create/rename/delete)\n"
- " Where # is a single digit number > 0.\n"
- " A-F are required for fileop=create\n"
- " A-I are required for fileop=delete, fileop=rename\n", cmd);
+ fprintf(stderr,
+ "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# "
+ "-F# -G# [-H# -I# -J#]\n"
+ " fileop = c/r/d (create/rename/delete)\n"
+ " Where # is a single digit number > 0.\n"
+ " A-G are required for fileop=create\n"
+ " A-I are required for fileop=delete, fileop=rename\n",
+ cmd);
exit(1);
}
@@ -129,19 +132,18 @@ get_choice_flush_log_before_crash(void) {
return get_bool_choice('F');
}
-static int
-get_choice_create_type(void) {
- return get_x_choice('G', 6);
-}
+static int get_choice_dir_per_db(void) { return get_bool_choice('G'); }
+
+static int get_choice_create_type(void) { return get_x_choice('H', 6); }
static int
get_choice_txn_does_open_close_before_fileop(void) {
- return get_bool_choice('H');
+ return get_bool_choice('I');
}
static int
get_choice_lock_table_split_fcreate(void) {
- int choice = get_bool_choice('I');
+ int choice = get_bool_choice('J');
if (choice)
assert(fileop_did_commit());
return choice;
@@ -157,62 +159,64 @@ do_args(int argc, char * const argv[]) {
}
char c;
- while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:X:")) != -1) {
- switch(c) {
- case 'v':
- verbose++;
- break;
- case 'q':
- verbose--;
- if (verbose<0) verbose=0;
- break;
- case 'h':
- case '?':
- usage();
- break;
- case 'c':
- do_crash = 1;
- break;
- case 'r':
- do_recover = 1;
- break;
- case 'O':
- if (fileop != '\0')
+ while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:J:X:")) != -1) {
+ switch (c) {
+ case 'v':
+ verbose++;
+ break;
+ case 'q':
+ verbose--;
+ if (verbose < 0)
+ verbose = 0;
+ break;
+ case 'h':
+ case '?':
usage();
- fileop = optarg[0];
- switch (fileop) {
- case 'c':
- case 'r':
- case 'd':
- break;
- default:
+ break;
+ case 'c':
+ do_crash = 1;
+ break;
+ case 'r':
+ do_recover = 1;
+ break;
+ case 'O':
+ if (fileop != '\0')
usage();
- break;
- }
- break;
- case 'A':
- case 'B':
- case 'C':
- case 'D':
- case 'E':
- case 'F':
- case 'G':
- case 'H':
- case 'I':
- if (fileop == '\0')
- usage();
- int num;
- num = atoi(optarg);
- if (num < 0 || num > 9)
- usage();
- choices[c - 'A'] = num;
- break;
- case 'X':
- if (strcmp(optarg, "novalgrind") == 0) {
- // provide a way for the shell script runner to pass an
- // arg that suppresses valgrind on this child process
+ fileop = optarg[0];
+ switch (fileop) {
+ case 'c':
+ case 'r':
+ case 'd':
+ break;
+ default:
+ usage();
+ break;
+ }
+ break;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ if (fileop == '\0')
+ usage();
+ int num;
+ num = atoi(optarg);
+ if (num < 0 || num > 9)
+ usage();
+ choices[c - 'A'] = num;
break;
- }
+ case 'X':
+ if (strcmp(optarg, "novalgrind") == 0) {
+ // provide a way for the shell script runner to pass an
+ // arg that suppresses valgrind on this child process
+ break;
+ }
// otherwise, fall through to an error
default:
usage();
@@ -222,7 +226,7 @@ do_args(int argc, char * const argv[]) {
if (argc!=optind) { usage(); exit(1); }
for (i = 0; i < num_choices; i++) {
- if (i >= 'G' - 'A' && fileop == 'c')
+ if (i >= 'H' - 'A' && fileop == 'c')
break;
if (choices[i] == -1)
usage();
@@ -261,6 +265,8 @@ static void env_startup(void) {
int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE | recover_flag;
r = db_env_create(&env, 0);
CKERR(r);
+ r = env->set_dir_per_db(env, get_choice_dir_per_db());
+ CKERR(r);
env->set_errfile(env, stderr);
r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO);
CKERR(r);
@@ -625,8 +631,11 @@ recover_and_verify(void) {
else if (did_create_commit_early())
expect_old_name = 1;
}
- verify_file_exists(oldname, expect_old_name);
- verify_file_exists(newname, expect_new_name);
+ // We can't expect files existence until recovery log was not flushed
+ if ((get_choice_flush_log_before_crash())) {
+ verify_file_exists(oldname, expect_old_name);
+ verify_file_exists(newname, expect_new_name);
+ }
env_shutdown();
}
diff --git a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
index a2b48e443cd..48843a0bd32 100644
--- a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
+++ b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
@@ -166,7 +166,7 @@ run_test (void) {
DB_BTREE_STAT64 s;
r = db->stat64(db, NULL, &s); CKERR(r);
- assert(s.bt_nkeys == 0);
+ assert(s.bt_nkeys == 1);
r = db->close(db, 0); CKERR(r);
@@ -176,7 +176,7 @@ run_test (void) {
r = txn->commit(txn, 0); CKERR(r);
r = db->stat64(db, NULL, &s); CKERR(r);
- assert(s.bt_nkeys == 0);
+ assert(s.bt_nkeys == 1);
}
// verify update callback overwrites the row
diff --git a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
index 8e5109cd2a9..f6111d4b67c 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
@@ -78,7 +78,7 @@ static void test_insert_many_gc(void) {
// from having an MVCC stack of size 'N'. At the time of this
// writing, we run full GC on leaf-inject when the leaf is
// 32mb or larger. A good invariant is that the max LE size
- // never grew larger than 35mb and that the max commited xr stack
+ // never grew larger than 35mb and that the max committed xr stack
// length never exceeded 35
const uint64_t le_max_memsize = get_engine_status_val(env, "LE_MAX_MEMSIZE");
const uint64_t le_max_committed_xr = get_engine_status_val(env, "LE_MAX_COMMITTED_XR");
diff --git a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
index aaafe284906..88140dd1731 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
@@ -53,7 +53,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// This test is a micro stress test that does multithreaded updates on a fixed size table.
// There is also a thread that scans the table with bulk fetch, ensuring the sum is zero.
//
-// This test is targetted at stressing the locktree, hence the small table and many update threads.
+// This test is targeted at stressing the locktree, hence the small table and many update threads.
//
static int UU() lock_escalation_op(DB_TXN *UU(txn), ARG arg, void* operation_extra, void *UU(stats_extra)) {
diff --git a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
index fec454b8009..301eed1560e 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
@@ -123,7 +123,8 @@ test_main(int argc, char *const argv[]) {
continue;
}
}
- if (verbose>0) printf("%s", __FILE__); if (verbose>1) printf("\n");
+ if (verbose>0) printf("%s", __FILE__);
+ if (verbose>1) printf("\n");
for (i=1; i<100; i++)
test_txn_abort(i);
if (verbose>1) printf("%s OK\n", __FILE__);
diff --git a/storage/tokudb/PerconaFT/src/ydb-internal.h b/storage/tokudb/PerconaFT/src/ydb-internal.h
index 462a2a3d861..d40f7795b0b 100644
--- a/storage/tokudb/PerconaFT/src/ydb-internal.h
+++ b/storage/tokudb/PerconaFT/src/ydb-internal.h
@@ -114,7 +114,7 @@ struct __toku_db_env_internal {
char *real_data_dir; // data dir used when the env is opened (relative to cwd, or absolute with leading /)
char *real_log_dir; // log dir used when the env is opened (relative to cwd, or absolute with leading /)
- char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absoulte with leading /)
+ char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absolute with leading /)
fs_redzone_state fs_state;
uint64_t fs_seq; // how many times has fs_poller run?
@@ -132,7 +132,8 @@ struct __toku_db_env_internal {
int datadir_lockfd;
int logdir_lockfd;
int tmpdir_lockfd;
- bool check_thp; // if set check if transparent huge pages are disables
+ bool check_thp; // if set check if transparent huge pages are disabled
+ bool dir_per_db;
uint64_t (*get_loader_memory_size_callback)(void);
uint64_t default_lock_timeout_msec;
uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec);
diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc
index aed271bce40..3341f6d76c6 100644
--- a/storage/tokudb/PerconaFT/src/ydb.cc
+++ b/storage/tokudb/PerconaFT/src/ydb.cc
@@ -1298,6 +1298,22 @@ env_get_check_thp(DB_ENV * env) {
return env->i->check_thp;
}
+static bool env_set_dir_per_db(DB_ENV *env, bool new_val) {
+ HANDLE_PANICKED_ENV(env);
+ bool r = env->i->dir_per_db;
+ env->i->dir_per_db = new_val;
+ return r;
+}
+
+static bool env_get_dir_per_db(DB_ENV *env) {
+ HANDLE_PANICKED_ENV(env);
+ return env->i->dir_per_db;
+}
+
+static const char *env_get_data_dir(DB_ENV *env) {
+ return env->i->real_data_dir;
+}
+
static int env_dbremove(DB_ENV * env, DB_TXN *txn, const char *fname, const char *dbname, uint32_t flags);
static int
@@ -2700,6 +2716,9 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) {
USENV(do_backtrace);
USENV(set_check_thp);
USENV(get_check_thp);
+ USENV(set_dir_per_db);
+ USENV(get_dir_per_db);
+ USENV(get_data_dir);
#undef USENV
// unlocked methods
@@ -3045,7 +3064,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
if (env_is_db_with_dname_open(env, newname)) {
return toku_ydb_do_error(env, EINVAL, "Cannot rename dictionary; Dictionary with target name has an open handle.\n");
}
-
+
DBT old_dname_dbt;
DBT new_dname_dbt;
DBT iname_dbt;
@@ -3065,10 +3084,35 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
r = EEXIST;
}
else if (r == DB_NOTFOUND) {
+ DBT new_iname_dbt;
+ // Do not rename ft file if 'dir_per_db' option is not set
+ auto new_iname =
+ env->get_dir_per_db(env)
+ ? generate_iname_for_rename_or_open(
+ env, txn, newname, false)
+ : std::unique_ptr<char[], decltype(&toku_free)>(
+ toku_strdup(iname), &toku_free);
+ toku_fill_dbt(
+ &new_iname_dbt, new_iname.get(), strlen(new_iname.get()) + 1);
+
// remove old (dname,iname) and insert (newname,iname) in directory
r = toku_db_del(env->i->directory, txn, &old_dname_dbt, DB_DELETE_ANY, true);
if (r != 0) { goto exit; }
- r = toku_db_put(env->i->directory, txn, &new_dname_dbt, &iname_dbt, 0, true);
+
+ // Do not rename ft file if 'dir_per_db' option is not set
+ if (env->get_dir_per_db(env))
+ r = toku_ft_rename_iname(txn,
+ env->get_data_dir(env),
+ iname,
+ new_iname.get(),
+ env->i->cachetable);
+
+ r = toku_db_put(env->i->directory,
+ txn,
+ &new_dname_dbt,
+ &new_iname_dbt,
+ 0,
+ true);
if (r != 0) { goto exit; }
//Now that we have writelocks on both dnames, verify that there are still no handles open. (to prevent race conditions)
@@ -3091,7 +3135,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
// otherwise, we're okay in marking this ft as remove on
// commit. no new handles can open for this dictionary
// because the txn has directory write locks on the dname
- if (txn && !can_acquire_table_lock(env, txn, iname)) {
+ if (txn && !can_acquire_table_lock(env, txn, new_iname.get())) {
r = DB_LOCK_NOTGRANTED;
}
// We don't do anything at the ft or cachetable layer for rename.
diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc
index e5bd4e7d089..100d1bfa20b 100644
--- a/storage/tokudb/PerconaFT/src/ydb_db.cc
+++ b/storage/tokudb/PerconaFT/src/ydb_db.cc
@@ -83,8 +83,7 @@ ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp) {
*statp = ydb_db_layer_status;
}
-static void
-create_iname_hint(const char *dname, char *hint) {
+void create_iname_hint(const char *dname, char *hint) {
//Requires: size of hint array must be > strlen(dname)
//Copy alphanumeric characters only.
//Replace strings of non-alphanumeric characters with a single underscore.
@@ -105,11 +104,43 @@ create_iname_hint(const char *dname, char *hint) {
*hint = '\0';
}
+void create_iname_hint_for_dbdir(const char *dname, char *hint) {
+ assert(dname);
+ if (*dname == '.')
+ ++dname;
+ if (*dname == '/')
+ ++dname;
+ bool underscored = false;
+ bool dbdir_is_parsed = false;
+ // Do not change the first '/' because this is
+ // delimiter which splits name into database dir
+ // and table dir.
+ while (*dname) {
+ if (isalnum(*dname) || (*dname == '/' && !dbdir_is_parsed)) {
+ char c = *dname++;
+ *hint++ = c;
+ if (c == '/')
+ dbdir_is_parsed = true;
+ underscored = false;
+ } else {
+ if (!underscored)
+ *hint++ = '_';
+ dname++;
+ underscored = true;
+ }
+ }
+ *hint = '\0';
+}
+
// n < 0 means to ignore mark and ignore n
// n >= 0 means to include mark ("_B_" or "_P_") with hex value of n in iname
// (intended for use by loader, which will create many inames using one txnid).
-static char *
-create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *mark, int n) {
+char *create_iname(DB_ENV *env,
+ uint64_t id1,
+ uint64_t id2,
+ char *hint,
+ const char *mark,
+ int n) {
int bytes;
char inamebase[strlen(hint) +
8 + // hex file format version
@@ -138,6 +169,34 @@ create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *ma
return rval;
}
+static uint64_t nontransactional_open_id = 0;
+
+std::unique_ptr<char[], decltype(&toku_free)> generate_iname_for_rename_or_open(
+ DB_ENV *env,
+ DB_TXN *txn,
+ const char *dname,
+ bool is_open) {
+ std::unique_ptr<char[], decltype(&toku_free)> result(nullptr, &toku_free);
+ char hint[strlen(dname) + 1];
+ uint64_t id1 = 0;
+ uint64_t id2 = 0;
+
+ if (txn) {
+ id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64;
+ id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64;
+ } else if (is_open)
+ id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1);
+
+ if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname))
+ create_iname_hint_for_dbdir(dname, hint);
+ else
+ create_iname_hint(dname, hint);
+
+ result.reset(create_iname(env, id1, id2, hint, NULL, -1));
+
+ return result;
+}
+
static int toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYPE dbtype, uint32_t flags, int mode);
// Effect: Do the work required of DB->close().
@@ -227,8 +286,6 @@ db_open_subdb(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTY
return r;
}
-static uint64_t nontransactional_open_id = 0;
-
// inames are created here.
// algorithm:
// begin txn
@@ -286,27 +343,15 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
r = toku_db_get(db->dbenv->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE); // allocates memory for iname
- char *iname = (char *) iname_dbt.data;
+ std::unique_ptr<char[], decltype(&toku_free)> iname(
+ static_cast<char *>(iname_dbt.data), &toku_free);
if (r == DB_NOTFOUND && !is_db_create) {
r = ENOENT;
} else if (r==0 && is_db_excl) {
r = EEXIST;
} else if (r == DB_NOTFOUND) {
- char hint[strlen(dname) + 1];
-
- // create iname and make entry in directory
- uint64_t id1 = 0;
- uint64_t id2 = 0;
-
- if (txn) {
- id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64;
- id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64;
- } else {
- id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1);
- }
- create_iname_hint(dname, hint);
- iname = create_iname(db->dbenv, id1, id2, hint, NULL, -1); // allocated memory for iname
- toku_fill_dbt(&iname_dbt, iname, strlen(iname) + 1);
+ iname = generate_iname_for_rename_or_open(db->dbenv, txn, dname, true);
+ toku_fill_dbt(&iname_dbt, iname.get(), strlen(iname.get()) + 1);
//
// put_flags will be 0 for performance only, avoid unnecessary query
// if we are creating a hot index, per #3166, we do not want the write lock in directory grabbed.
@@ -318,16 +363,13 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
// we now have an iname
if (r == 0) {
- r = toku_db_open_iname(db, txn, iname, flags, mode);
+ r = toku_db_open_iname(db, txn, iname.get(), flags, mode);
if (r == 0) {
db->i->dname = toku_xstrdup(dname);
env_note_db_opened(db->dbenv, db); // tell env that a new db handle is open (using dname)
}
}
- if (iname) {
- toku_free(iname);
- }
return r;
}
@@ -1181,7 +1223,10 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new
toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
// now create new iname
char hint[strlen(dname) + 1];
- create_iname_hint(dname, hint);
+ if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname))
+ create_iname_hint_for_dbdir(dname, hint);
+ else
+ create_iname_hint(dname, hint);
const char *new_iname = create_iname(env, xid.parent_id64, xid.child_id64, hint, mark, i); // allocates memory for iname_in_env
new_inames_in_env[i] = new_iname;
toku_fill_dbt(&iname_dbt, new_iname, strlen(new_iname) + 1); // iname_in_env goes in directory
diff --git a/storage/tokudb/PerconaFT/src/ydb_db.h b/storage/tokudb/PerconaFT/src/ydb_db.h
index 8b92dd1c3cb..8be28857c14 100644
--- a/storage/tokudb/PerconaFT/src/ydb_db.h
+++ b/storage/tokudb/PerconaFT/src/ydb_db.h
@@ -43,6 +43,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "ydb-internal.h"
#include "ydb_txn.h"
+#include <memory>
+
typedef enum {
YDB_LAYER_DIRECTORY_WRITE_LOCKS = 0, /* total directory write locks taken */
YDB_LAYER_DIRECTORY_WRITE_LOCKS_FAIL, /* total directory write locks unable to be taken */
@@ -119,3 +121,17 @@ toku_db_destruct_autotxn(DB_TXN *txn, int r, bool changed) {
}
return r;
}
+
+void create_iname_hint_for_dbdir(const char *dname, char *hint);
+void create_iname_hint(const char *dname, char *hint);
+char *create_iname(DB_ENV *env,
+ uint64_t id1,
+ uint64_t id2,
+ char *hint,
+ const char *mark,
+ int n);
+std::unique_ptr<char[], decltype(&toku_free)> generate_iname_for_rename_or_open(
+ DB_ENV *env,
+ DB_TXN *txn,
+ const char *dname,
+ bool is_open);
diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
index da833146088..7501b1bee01 100644
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
@@ -1,10 +1,10 @@
#! /bin/sh
# Attempt to guess a canonical system name.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
-# Free Software Foundation, Inc.
+# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+# 2011, 2012 Free Software Foundation, Inc.
-timestamp='2009-04-27'
+timestamp='2016-06-22'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@ timestamp='2009-04-27'
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -27,16 +25,16 @@ timestamp='2009-04-27'
# the same distribution terms that you use for the rest of that program.
-# Originally written by Per Bothner <per@bothner.com>.
-# Please send patches to <config-patches@gnu.org>. Submit a context
-# diff and a properly formatted ChangeLog entry.
+# Originally written by Per Bothner. Please send patches (context
+# diff format) to <config-patches@gnu.org> and include a ChangeLog
+# entry.
#
# This script attempts to guess a canonical system name similar to
# config.sub. If it succeeds, it prints the system name on stdout, and
# exits with 0. Otherwise, it exits with 1.
#
-# The plan is that this can be called by configure scripts if you
-# don't specify an explicit build system type.
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
me=`echo "$0" | sed -e 's,.*/,,'`
@@ -56,8 +54,9 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -144,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
*:NetBSD:*:*)
# NetBSD (nbsd) targets should (where applicable) match one or
- # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+ # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
# *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
# switched to ELF, *-*-netbsd* would select the old
# object file format. This provides both forward
@@ -170,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
eval $set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
- | grep __ELF__ >/dev/null
+ | grep -q __ELF__
then
# Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
# Return netbsd for either. FIX?
@@ -180,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
fi
;;
*)
- os=netbsd
+ os=netbsd
;;
esac
# The OS release
@@ -223,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
;;
*5.*)
- UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
;;
esac
# According to Compaq, /usr/sbin/psrinfo has been available on
@@ -269,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# A Xn.n version is an unreleased experimental baselevel.
# 1.2 uses "1.2" for uname -r.
echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
- exit ;;
+ # Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+ exitcode=$?
+ trap '' 0
+ exit $exitcode ;;
Alpha\ *:Windows_NT*:*)
# How do we know it's Interix rather than the generic POSIX subsystem?
# Should we change UNAME_MACHINE based on the output of uname instead
@@ -295,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
echo s390-ibm-zvmoe
exit ;;
*:OS400:*:*)
- echo powerpc-ibm-os400
+ echo powerpc-ibm-os400
exit ;;
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
echo arm-acorn-riscix${UNAME_RELEASE}
@@ -333,6 +335,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
+ i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+ echo i386-pc-auroraux${UNAME_RELEASE}
+ exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
eval $set_cc_for_build
SUN_ARCH="i386"
@@ -391,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# MiNT. But MiNT is downward compatible to TOS, so this should
# be no problem.
atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
+ echo m68k-atari-mint${UNAME_RELEASE}
exit ;;
atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
echo m68k-atari-mint${UNAME_RELEASE}
- exit ;;
+ exit ;;
*falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
+ echo m68k-atari-mint${UNAME_RELEASE}
exit ;;
milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
- echo m68k-milan-mint${UNAME_RELEASE}
- exit ;;
+ echo m68k-milan-mint${UNAME_RELEASE}
+ exit ;;
hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
- echo m68k-hades-mint${UNAME_RELEASE}
- exit ;;
+ echo m68k-hades-mint${UNAME_RELEASE}
+ exit ;;
*:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
- echo m68k-unknown-mint${UNAME_RELEASE}
- exit ;;
+ echo m68k-unknown-mint${UNAME_RELEASE}
+ exit ;;
m68k:machten:*:*)
echo m68k-apple-machten${UNAME_RELEASE}
exit ;;
@@ -477,8 +482,8 @@ EOF
echo m88k-motorola-sysv3
exit ;;
AViiON:dgux:*:*)
- # DG/UX returns AViiON for all architectures
- UNAME_PROCESSOR=`/usr/bin/uname -p`
+ # DG/UX returns AViiON for all architectures
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
then
if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@@ -491,7 +496,7 @@ EOF
else
echo i586-dg-dgux${UNAME_RELEASE}
fi
- exit ;;
+ exit ;;
M88*:DolphinOS:*:*) # DolphinOS (SVR3)
echo m88k-dolphin-sysv3
exit ;;
@@ -548,7 +553,7 @@ EOF
echo rs6000-ibm-aix3.2
fi
exit ;;
- *:AIX:*:[456])
+ *:AIX:*:[4567])
IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
IBM_ARCH=rs6000
@@ -591,52 +596,52 @@ EOF
9000/[678][0-9][0-9])
if [ -x /usr/bin/getconf ]; then
sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
- sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
- case "${sc_cpu_version}" in
- 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
- 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
- 532) # CPU_PA_RISC2_0
- case "${sc_kernel_bits}" in
- 32) HP_ARCH="hppa2.0n" ;;
- 64) HP_ARCH="hppa2.0w" ;;
+ sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+ case "${sc_cpu_version}" in
+ 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+ 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+ 532) # CPU_PA_RISC2_0
+ case "${sc_kernel_bits}" in
+ 32) HP_ARCH="hppa2.0n" ;;
+ 64) HP_ARCH="hppa2.0w" ;;
'') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
- esac ;;
- esac
+ esac ;;
+ esac
fi
if [ "${HP_ARCH}" = "" ]; then
eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
+ sed 's/^ //' << EOF >$dummy.c
- #define _HPUX_SOURCE
- #include <stdlib.h>
- #include <unistd.h>
+ #define _HPUX_SOURCE
+ #include <stdlib.h>
+ #include <unistd.h>
- int main ()
- {
- #if defined(_SC_KERNEL_BITS)
- long bits = sysconf(_SC_KERNEL_BITS);
- #endif
- long cpu = sysconf (_SC_CPU_VERSION);
+ int main ()
+ {
+ #if defined(_SC_KERNEL_BITS)
+ long bits = sysconf(_SC_KERNEL_BITS);
+ #endif
+ long cpu = sysconf (_SC_CPU_VERSION);
- switch (cpu)
- {
- case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
- case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
- case CPU_PA_RISC2_0:
- #if defined(_SC_KERNEL_BITS)
- switch (bits)
- {
- case 64: puts ("hppa2.0w"); break;
- case 32: puts ("hppa2.0n"); break;
- default: puts ("hppa2.0"); break;
- } break;
- #else /* !defined(_SC_KERNEL_BITS) */
- puts ("hppa2.0"); break;
- #endif
- default: puts ("hppa1.0"); break;
- }
- exit (0);
- }
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+ case CPU_PA_RISC2_0:
+ #if defined(_SC_KERNEL_BITS)
+ switch (bits)
+ {
+ case 64: puts ("hppa2.0w"); break;
+ case 32: puts ("hppa2.0n"); break;
+ default: puts ("hppa2.0"); break;
+ } break;
+ #else /* !defined(_SC_KERNEL_BITS) */
+ puts ("hppa2.0"); break;
+ #endif
+ default: puts ("hppa1.0"); break;
+ }
+ exit (0);
+ }
EOF
(CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
test -z "$HP_ARCH" && HP_ARCH=hppa
@@ -656,7 +661,7 @@ EOF
# => hppa64-hp-hpux11.23
if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
- grep __LP64__ >/dev/null
+ grep -q __LP64__
then
HP_ARCH="hppa2.0w"
else
@@ -727,22 +732,22 @@ EOF
exit ;;
C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
echo c1-convex-bsd
- exit ;;
+ exit ;;
C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
if getsysinfo -f scalar_acc
then echo c32-convex-bsd
else echo c2-convex-bsd
fi
- exit ;;
+ exit ;;
C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
echo c34-convex-bsd
- exit ;;
+ exit ;;
C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
echo c38-convex-bsd
- exit ;;
+ exit ;;
C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
echo c4-convex-bsd
- exit ;;
+ exit ;;
CRAY*Y-MP:*:*:*)
echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;;
@@ -766,14 +771,14 @@ EOF
exit ;;
F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
- echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
- exit ;;
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+ echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ exit ;;
5000:UNIX_System_V:4.*:*)
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
- echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+ echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
@@ -785,13 +790,12 @@ EOF
echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
exit ;;
*:FreeBSD:*:*)
- case ${UNAME_MACHINE} in
- pc98)
- echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+ case ${UNAME_PROCESSOR} in
amd64)
echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
*)
- echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
esac
exit ;;
i*:CYGWIN*:*)
@@ -800,19 +804,22 @@ EOF
*:MINGW*:*)
echo ${UNAME_MACHINE}-pc-mingw32
exit ;;
+ i*:MSYS*:*)
+ echo ${UNAME_MACHINE}-pc-msys
+ exit ;;
i*:windows32*:*)
- # uname -m includes "-pc" on this system.
- echo ${UNAME_MACHINE}-mingw32
+ # uname -m includes "-pc" on this system.
+ echo ${UNAME_MACHINE}-mingw32
exit ;;
i*:PW*:*)
echo ${UNAME_MACHINE}-pc-pw32
exit ;;
- *:Interix*:[3456]*)
- case ${UNAME_MACHINE} in
+ *:Interix*:*)
+ case ${UNAME_MACHINE} in
x86)
echo i586-pc-interix${UNAME_RELEASE}
exit ;;
- EM64T | authenticamd | genuineintel)
+ authenticamd | genuineintel | EM64T)
echo x86_64-unknown-interix${UNAME_RELEASE}
exit ;;
IA64)
@@ -822,6 +829,9 @@ EOF
[345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
echo i${UNAME_MACHINE}-pc-mks
exit ;;
+ 8664:Windows_NT:*)
+ echo x86_64-pc-mks
+ exit ;;
i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
# How do we know it's Interix rather than the generic POSIX subsystem?
# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
@@ -851,6 +861,27 @@ EOF
i*86:Minix:*:*)
echo ${UNAME_MACHINE}-pc-minix
exit ;;
+ aarch64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ aarch64_be:Linux:*:*)
+ UNAME_MACHINE=aarch64_be
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ alpha:Linux:*:*)
+ case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ EV5) UNAME_MACHINE=alphaev5 ;;
+ EV56) UNAME_MACHINE=alphaev56 ;;
+ PCA56) UNAME_MACHINE=alphapca56 ;;
+ PCA57) UNAME_MACHINE=alphapca56 ;;
+ EV6) UNAME_MACHINE=alphaev6 ;;
+ EV67) UNAME_MACHINE=alphaev67 ;;
+ EV68*) UNAME_MACHINE=alphaev68 ;;
+ esac
+ objdump --private-headers /bin/sh | grep -q ld.so.1
+ if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+ echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+ exit ;;
arm*:Linux:*:*)
eval $set_cc_for_build
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
@@ -858,20 +889,40 @@ EOF
then
echo ${UNAME_MACHINE}-unknown-linux-gnu
else
- echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+ if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+ | grep -q __ARM_PCS_VFP
+ then
+ echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+ else
+ echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+ fi
fi
exit ;;
avr32*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
cris:Linux:*:*)
- echo cris-axis-linux-gnu
+ echo ${UNAME_MACHINE}-axis-linux-gnu
exit ;;
crisv32:Linux:*:*)
- echo crisv32-axis-linux-gnu
+ echo ${UNAME_MACHINE}-axis-linux-gnu
exit ;;
frv:Linux:*:*)
- echo frv-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ hexagon:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ i*86:Linux:*:*)
+ LIBC=gnu
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #ifdef __dietlibc__
+ LIBC=dietlibc
+ #endif
+EOF
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+ echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
exit ;;
ia64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -882,78 +933,34 @@ EOF
m68*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
- mips:Linux:*:*)
+ mips:Linux:*:* | mips64:Linux:*:*)
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#undef CPU
- #undef mips
- #undef mipsel
+ #undef ${UNAME_MACHINE}
+ #undef ${UNAME_MACHINE}el
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
- CPU=mipsel
+ CPU=${UNAME_MACHINE}el
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
- CPU=mips
+ CPU=${UNAME_MACHINE}
#else
CPU=
#endif
#endif
EOF
- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
- /^CPU/{
- s: ::g
- p
- }'`"
- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
- ;;
- mips64:Linux:*:*)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #undef CPU
- #undef mips64
- #undef mips64el
- #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
- CPU=mips64el
- #else
- #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
- CPU=mips64
- #else
- CPU=
- #endif
- #endif
-EOF
- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
- /^CPU/{
- s: ::g
- p
- }'`"
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
;;
or32:Linux:*:*)
- echo or32-unknown-linux-gnu
- exit ;;
- ppc:Linux:*:*)
- echo powerpc-unknown-linux-gnu
- exit ;;
- ppc64:Linux:*:*)
- echo powerpc64-unknown-linux-gnu
- exit ;;
- alpha:Linux:*:*)
- case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
- EV5) UNAME_MACHINE=alphaev5 ;;
- EV56) UNAME_MACHINE=alphaev56 ;;
- PCA56) UNAME_MACHINE=alphapca56 ;;
- PCA57) UNAME_MACHINE=alphapca56 ;;
- EV6) UNAME_MACHINE=alphaev6 ;;
- EV67) UNAME_MACHINE=alphaev67 ;;
- EV68*) UNAME_MACHINE=alphaev68 ;;
- esac
- objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
padre:Linux:*:*)
echo sparc-unknown-linux-gnu
exit ;;
+ parisc64:Linux:*:* | hppa64:Linux:*:*)
+ echo hppa64-unknown-linux-gnu
+ exit ;;
parisc:Linux:*:* | hppa:Linux:*:*)
# Look for CPU level
case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
@@ -962,14 +969,17 @@ EOF
*) echo hppa-unknown-linux-gnu ;;
esac
exit ;;
- parisc64:Linux:*:* | hppa64:Linux:*:*)
- echo hppa64-unknown-linux-gnu
+ ppc64:Linux:*:*)
+ echo powerpc64-unknown-linux-gnu
+ exit ;;
+ ppc:Linux:*:*)
+ echo powerpc-unknown-linux-gnu
exit ;;
s390:Linux:*:* | s390x:Linux:*:*)
echo ${UNAME_MACHINE}-ibm-linux
exit ;;
sh64*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
sh*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -977,75 +987,18 @@ EOF
sparc:Linux:*:* | sparc64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
+ tile*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
vax:Linux:*:*)
echo ${UNAME_MACHINE}-dec-linux-gnu
exit ;;
x86_64:Linux:*:*)
- echo x86_64-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
xtensa*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
- i*86:Linux:*:*)
- # The BFD linker knows what the default object file format is, so
- # first see if it will tell us. cd to the root directory to prevent
- # problems with other programs or directories called `ld' in the path.
- # Set LC_ALL=C to ensure ld outputs messages in English.
- ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
- | sed -ne '/supported targets:/!d
- s/[ ][ ]*/ /g
- s/.*supported targets: *//
- s/ .*//
- p'`
- case "$ld_supported_targets" in
- elf32-i386)
- TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
- ;;
- a.out-i386-linux)
- echo "${UNAME_MACHINE}-pc-linux-gnuaout"
- exit ;;
- "")
- # Either a pre-BFD a.out linker (linux-gnuoldld) or
- # one that does not give us useful --help.
- echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
- exit ;;
- esac
- # Determine whether the default compiler is a.out or elf
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #include <features.h>
- #ifdef __ELF__
- # ifdef __GLIBC__
- # if __GLIBC__ >= 2
- LIBC=gnu
- # else
- LIBC=gnulibc1
- # endif
- # else
- LIBC=gnulibc1
- # endif
- #else
- #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
- LIBC=gnu
- #else
- LIBC=gnuaout
- #endif
- #endif
- #ifdef __dietlibc__
- LIBC=dietlibc
- #endif
-EOF
- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
- /^LIBC/{
- s: ::g
- p
- }'`"
- test x"${LIBC}" != x && {
- echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
- exit
- }
- test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
- ;;
i*86:DYNIX/ptx:4*:*)
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
# earlier versions are messed up and put the nodename in both
@@ -1053,11 +1006,11 @@ EOF
echo i386-sequent-sysv4
exit ;;
i*86:UNIX_SV:4.2MP:2.*)
- # Unixware is an offshoot of SVR4, but it has its own version
- # number series starting with 2...
- # I am not positive that other SVR4 systems won't match this,
+ # Unixware is an offshoot of SVR4, but it has its own version
+ # number series starting with 2...
+ # I am not positive that other SVR4 systems won't match this,
# I just have to hope. -- rms.
- # Use sysv4.2uw... so that sysv4* matches it.
+ # Use sysv4.2uw... so that sysv4* matches it.
echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
exit ;;
i*86:OS/2:*:*)
@@ -1074,7 +1027,7 @@ EOF
i*86:syllable:*:*)
echo ${UNAME_MACHINE}-pc-syllable
exit ;;
- i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+ i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
echo i386-unknown-lynxos${UNAME_RELEASE}
exit ;;
i*86:*DOS:*:*)
@@ -1089,7 +1042,7 @@ EOF
fi
exit ;;
i*86:*:5:[678]*)
- # UnixWare 7.x, OpenUNIX and OpenServer 6.
+ # UnixWare 7.x, OpenUNIX and OpenServer 6.
case `/bin/uname -X | grep "^Machine"` in
*486*) UNAME_MACHINE=i486 ;;
*Pentium) UNAME_MACHINE=i586 ;;
@@ -1117,13 +1070,13 @@ EOF
exit ;;
pc:*:*:*)
# Left here for compatibility:
- # uname -m prints for DJGPP always 'pc', but it prints nothing about
- # the processor, so we play safe by assuming i586.
+ # uname -m prints for DJGPP always 'pc', but it prints nothing about
+ # the processor, so we play safe by assuming i586.
# Note: whatever this is, it MUST be the same as what config.sub
# prints for the "djgpp" host, or else GDB configury will decide that
# this is a cross-build.
echo i586-pc-msdosdjgpp
- exit ;;
+ exit ;;
Intel:Mach:3*:*)
echo i386-pc-mach3
exit ;;
@@ -1158,8 +1111,8 @@ EOF
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
- /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4; exit; } ;;
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && { echo i486-ncr-sysv4; exit; } ;;
NCR*:*:4.2:* | MPRAS*:*:4.2:*)
OS_REL='.3'
test -r /etc/.relid \
@@ -1182,7 +1135,7 @@ EOF
rs6000:LynxOS:2.*:*)
echo rs6000-unknown-lynxos${UNAME_RELEASE}
exit ;;
- PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+ PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
echo powerpc-unknown-lynxos${UNAME_RELEASE}
exit ;;
SM[BE]S:UNIX_SV:*:*)
@@ -1202,10 +1155,10 @@ EOF
echo ns32k-sni-sysv
fi
exit ;;
- PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
- # says <Richard.M.Bartel@ccMail.Census.GOV>
- echo i586-unisys-sysv4
- exit ;;
+ PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+ # says <Richard.M.Bartel@ccMail.Census.GOV>
+ echo i586-unisys-sysv4
+ exit ;;
*:UNIX_System_V:4*:FTX*)
# From Gerald Hewes <hewes@openmarket.com>.
# How about differentiating between stratus architectures? -djm
@@ -1231,11 +1184,11 @@ EOF
exit ;;
R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
if [ -d /usr/nec ]; then
- echo mips-nec-sysv${UNAME_RELEASE}
+ echo mips-nec-sysv${UNAME_RELEASE}
else
- echo mips-unknown-sysv${UNAME_RELEASE}
+ echo mips-unknown-sysv${UNAME_RELEASE}
fi
- exit ;;
+ exit ;;
BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
echo powerpc-be-beos
exit ;;
@@ -1275,6 +1228,16 @@ EOF
*:Darwin:*:*)
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
case $UNAME_PROCESSOR in
+ i386)
+ eval $set_cc_for_build
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_64BIT_ARCH >/dev/null
+ then
+ UNAME_PROCESSOR="x86_64"
+ fi
+ fi ;;
unknown) UNAME_PROCESSOR=powerpc ;;
esac
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
@@ -1290,6 +1253,9 @@ EOF
*:QNX:*:4*)
echo i386-pc-qnx
exit ;;
+ NEO-?:NONSTOP_KERNEL:*:*)
+ echo neo-tandem-nsk${UNAME_RELEASE}
+ exit ;;
NSE-?:NONSTOP_KERNEL:*:*)
echo nse-tandem-nsk${UNAME_RELEASE}
exit ;;
@@ -1335,13 +1301,13 @@ EOF
echo pdp10-unknown-its
exit ;;
SEI:*:*:SEIUX)
- echo mips-sei-seiux${UNAME_RELEASE}
+ echo mips-sei-seiux${UNAME_RELEASE}
exit ;;
*:DragonFly:*:*)
echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
exit ;;
*:*VMS:*:*)
- UNAME_MACHINE=`(uname -p) 2>/dev/null`
+ UNAME_MACHINE=`(uname -p) 2>/dev/null`
case "${UNAME_MACHINE}" in
A*) echo alpha-dec-vms ; exit ;;
I*) echo ia64-dec-vms ; exit ;;
@@ -1359,6 +1325,9 @@ EOF
i*86:AROS:*:*)
echo ${UNAME_MACHINE}-pc-aros
exit ;;
+ x86_64:VMkernel:*:*)
+ echo ${UNAME_MACHINE}-unknown-esx
+ exit ;;
esac
#echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1381,11 +1350,11 @@ main ()
#include <sys/param.h>
printf ("m68k-sony-newsos%s\n",
#ifdef NEWSOS4
- "4"
+ "4"
#else
- ""
+ ""
#endif
- ); exit (0);
+ ); exit (0);
#endif
#endif
diff --git a/storage/tokudb/PerconaFT/tools/CMakeLists.txt b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
index af82b4357d2..f11b9f350d7 100644
--- a/storage/tokudb/PerconaFT/tools/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
@@ -1,6 +1,6 @@
set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO)
-set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify ba_replay)
+set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify)
foreach(tool ${tools})
add_executable(${tool} ${tool}.cc)
add_dependencies(${tool} install_tdb_h)
@@ -14,4 +14,3 @@ target_link_libraries(ftverify m)
install(TARGETS tokuftdump DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
install(TARGETS tokuft_logprint DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
-
diff --git a/storage/tokudb/PerconaFT/tools/ba_replay.cc b/storage/tokudb/PerconaFT/tools/ba_replay.cc
deleted file mode 100644
index cade7e5dfaf..00000000000
--- a/storage/tokudb/PerconaFT/tools/ba_replay.cc
+++ /dev/null
@@ -1,629 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License, version 2,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License, version 3,
- as published by the Free Software Foundation.
-
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-// Replay a block allocator trace against different strategies and compare
-// the results
-
-#include <db.h>
-
-#include <getopt.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <map>
-#include <set>
-#include <string>
-#include <sstream>
-#include <vector>
-
-#include <portability/memory.h>
-#include <portability/toku_assert.h>
-#include <portability/toku_stdlib.h>
-
-#include "ft/serialize/block_allocator.h"
-
-using std::map;
-using std::set;
-using std::string;
-using std::vector;
-
-static int verbose = false;
-
-static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
- if (!pred) {
- fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line);
- abort();
- }
-}
-
-static char *trim_whitespace(char *line) {
- // skip leading whitespace
- while (isspace(*line)) {
- line++;
- }
- return line;
-}
-
-static int64_t parse_number(char **ptr, int line_num, int base) {
- *ptr = trim_whitespace(*ptr);
- char *line = *ptr;
-
- char *new_ptr;
- int64_t n = strtoll(line, &new_ptr, base);
- ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num);
- ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num);
- *ptr = new_ptr;
- return n;
-}
-
-static uint64_t parse_uint64(char **ptr, int line_num) {
- int64_t n = parse_number(ptr, line_num, 10);
- // we happen to know that the uint64's we deal with will
- // take less than 63 bits (they come from pointers)
- return static_cast<uint64_t>(n);
-}
-
-static string parse_token(char **ptr, int line_num) {
- *ptr = trim_whitespace(*ptr);
- char *line = *ptr;
-
- // parse the first token, which represents the traced function
- char token[64];
- int r = sscanf(*ptr, "%64s", token);
- ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num);
- *ptr += strlen(token);
- return string(token);
-}
-
-static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
- *ptr = trim_whitespace(*ptr);
- char *line = *ptr;
-
- uint64_t offset, size;
- int bytes_read;
- int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read);
- ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num);
- *ptr += bytes_read;
- return block_allocator::blockpair(offset, size);
-}
-
-static char *strip_newline(char *line, bool *found) {
- char *ptr = strchr(line, '\n');
- if (ptr != nullptr) {
- if (found != nullptr) {
- *found = true;
- }
- *ptr = '\0';
- }
- return line;
-}
-
-static char *read_trace_line(FILE *file) {
- const int buf_size = 4096;
- char buf[buf_size];
- std::stringstream ss;
- while (true) {
- if (fgets(buf, buf_size, file) == nullptr) {
- break;
- }
- bool has_newline = false;
- ss << strip_newline(buf, &has_newline);
- if (has_newline) {
- // end of the line, we're done out
- break;
- }
- }
- std::string s = ss.str();
- return s.size() ? toku_strdup(s.c_str()) : nullptr;
-}
-
-static vector<string> canonicalize_trace_from(FILE *file) {
- // new trace, canonicalized from a raw trace
- vector<string> canonicalized_trace;
-
- // raw allocator id -> canonical allocator id
- //
- // keeps track of allocators that were created as part of the trace,
- // and therefore will be part of the canonicalized trace.
- uint64_t allocator_id_seq_num = 0;
- map<uint64_t, uint64_t> allocator_ids;
-
- // allocated offset -> allocation seq num
- //
- uint64_t allocation_seq_num = 0;
- static const uint64_t ASN_NONE = (uint64_t) -1;
- typedef map<uint64_t, uint64_t> offset_seq_map;
-
- // raw allocator id -> offset_seq_map that tracks its allocations
- map<uint64_t, offset_seq_map> offset_to_seq_num_maps;
-
- int line_num = 0;
- char *line;
- while ((line = read_trace_line(file)) != nullptr) {
- line_num++;
- char *ptr = line;
-
- string fn = parse_token(&ptr, line_num);
- int64_t allocator_id = parse_number(&ptr, line_num, 16);
-
- std::stringstream ss;
- if (fn.find("ba_trace_create") != string::npos) {
- ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
- ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs",
- "corrupted trace: bad fn", line, line_num);
-
- // we only convert the allocator_id to an allocator_id_seq_num
- // in the canonical trace and leave the rest of the line as-is.
- allocator_ids[allocator_id] = allocator_id_seq_num;
- ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl;
- allocator_id_seq_num++;
-
- // First, read passed the reserve / alignment values.
- (void) parse_uint64(&ptr, line_num);
- (void) parse_uint64(&ptr, line_num);
- if (fn == "ba_trace_create_from_blockpairs") {
- // For each blockpair created by this traceline, add its offset to the offset seq map
- // with asn ASN_NONE so that later canonicalizations of `free' know whether to write
- // down the asn or the raw offset.
- offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
- while (*trim_whitespace(ptr) != '\0') {
- const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
- (*map)[bp.offset] = ASN_NONE;
- }
- }
- } else {
- ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num);
- uint64_t canonical_allocator_id = allocator_ids[allocator_id];
-
- // this is the map that tracks allocations for this allocator
- offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
-
- if (fn == "ba_trace_alloc") {
- const uint64_t size = parse_uint64(&ptr, line_num);
- const uint64_t heat = parse_uint64(&ptr, line_num);
- const uint64_t offset = parse_uint64(&ptr, line_num);
- ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num);
-
- // remember that an allocation at `offset' has the current alloc seq num
- (*map)[offset] = allocation_seq_num;
-
- // translate `offset = alloc(size)' to `asn = alloc(size)'
- ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl;
- allocation_seq_num++;
- } else if (fn == "ba_trace_free") {
- const uint64_t offset = parse_uint64(&ptr, line_num);
- ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num);
-
- // get the alloc seq num for an allcation that occurred at `offset'
- const uint64_t asn = (*map)[offset];
- map->erase(offset);
-
- // if there's an asn, then a corresponding ba_trace_alloc occurred and we should
- // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs
- // and we write the original offset.
- if (asn != ASN_NONE) {
- ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
- } else {
- ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl;
- }
- } else if (fn == "ba_trace_destroy") {
- // Remove this allocator from both maps
- allocator_ids.erase(allocator_id);
- offset_to_seq_num_maps.erase(allocator_id);
-
- // translate `destroy(ptr_id) to destroy(canonical_id)'
- ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl;
- } else {
- ba_replay_assert(false, "corrupted trace: bad fn", line, line_num);
- }
- }
- canonicalized_trace.push_back(ss.str());
-
- toku_free(line);
- }
-
- if (allocator_ids.size() != 0) {
- fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running");
- }
-
- return canonicalized_trace;
-}
-
-struct streaming_variance_calculator {
- int64_t n_samples;
- int64_t mean;
- int64_t variance;
-
- // math credit: AoCP, Donald Knuth, '62
- void add_sample(int64_t x) {
- n_samples++;
- if (n_samples == 1) {
- mean = x;
- variance = 0;
- } else {
- int64_t old_mean = mean;
- mean = old_mean + ((x - old_mean) / n_samples);
- variance = (((n_samples - 1) * variance) +
- ((x - old_mean) * (x - mean))) / n_samples;
- }
- }
-};
-
-struct canonical_trace_stats {
- uint64_t n_lines_replayed;
-
- uint64_t n_create;
- uint64_t n_create_from_blockpairs;
- uint64_t n_alloc_hot;
- uint64_t n_alloc_cold;
- uint64_t n_free;
- uint64_t n_destroy;
-
- struct streaming_variance_calculator alloc_hot_bytes;
- struct streaming_variance_calculator alloc_cold_bytes;
-
- canonical_trace_stats() {
- memset(this, 0, sizeof(*this));
- }
-};
-
-struct fragmentation_report {
- TOKU_DB_FRAGMENTATION_S beginning;
- TOKU_DB_FRAGMENTATION_S end;
- fragmentation_report() {
- memset(this, 0, sizeof(*this));
- }
- void merge(const struct fragmentation_report &src_report) {
- for (int i = 0; i < 2; i++) {
- TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end;
- const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end;
- dst->file_size_bytes += src->file_size_bytes;
- dst->data_bytes += src->data_bytes;
- dst->data_blocks += src->data_blocks;
- dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional;
- dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional;
- dst->unused_bytes += src->unused_bytes;
- dst->unused_blocks += src->unused_blocks;
- dst->largest_unused_block += src->largest_unused_block;
- }
- }
-};
-
-static void replay_canonicalized_trace(const vector<string> &canonicalized_trace,
- block_allocator::allocation_strategy strategy,
- map<uint64_t, struct fragmentation_report> *reports,
- struct canonical_trace_stats *stats) {
- // maps an allocator id to its block allocator
- map<uint64_t, block_allocator *> allocator_map;
-
- // maps allocation seq num to allocated offset
- map<uint64_t, uint64_t> seq_num_to_offset;
-
- for (vector<string>::const_iterator it = canonicalized_trace.begin();
- it != canonicalized_trace.end(); it++) {
- const int line_num = stats->n_lines_replayed++;
-
- char *line = toku_strdup(it->c_str());
- line = strip_newline(line, nullptr);
-
- char *ptr = trim_whitespace(line);
-
- // canonical allocator id is in base 10, not 16
- string fn = parse_token(&ptr, line_num);
- int64_t allocator_id = parse_number(&ptr, line_num, 10);
-
- if (fn.find("ba_trace_create") != string::npos) {
- const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num);
- const uint64_t alignment = parse_uint64(&ptr, line_num);
- ba_replay_assert(allocator_map.count(allocator_id) == 0,
- "corrupted canonical trace: double create", line, line_num);
-
- block_allocator *ba = new block_allocator();
- if (fn == "ba_trace_create") {
- ba->create(reserve_at_beginning, alignment);
- stats->n_create++;
- } else {
- ba_replay_assert(fn == "ba_trace_create_from_blockpairs",
- "corrupted canonical trace: bad create fn", line, line_num);
- vector<block_allocator::blockpair> pairs;
- while (*trim_whitespace(ptr) != '\0') {
- const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
- pairs.push_back(bp);
- }
- ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size());
- stats->n_create_from_blockpairs++;
- }
- ba->set_strategy(strategy);
-
- TOKU_DB_FRAGMENTATION_S report;
- ba->get_statistics(&report);
- (*reports)[allocator_id].beginning = report;
- allocator_map[allocator_id] = ba;
- } else {
- ba_replay_assert(allocator_map.count(allocator_id) > 0,
- "corrupted canonical trace: no such allocator", line, line_num);
-
- block_allocator *ba = allocator_map[allocator_id];
- if (fn == "ba_trace_alloc") {
- // replay an `alloc' whose result will be associated with a certain asn
- const uint64_t size = parse_uint64(&ptr, line_num);
- const uint64_t heat = parse_uint64(&ptr, line_num);
- const uint64_t asn = parse_uint64(&ptr, line_num);
- ba_replay_assert(seq_num_to_offset.count(asn) == 0,
- "corrupted canonical trace: double alloc (asn in use)", line, line_num);
-
- uint64_t offset;
- ba->alloc_block(size, heat, &offset);
- seq_num_to_offset[asn] = offset;
- heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++;
- heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size);
- } else if (fn == "ba_trace_free_asn") {
- // replay a `free' on a block whose offset is the result of an alloc with an asn
- const uint64_t asn = parse_uint64(&ptr, line_num);
- ba_replay_assert(seq_num_to_offset.count(asn) == 1,
- "corrupted canonical trace: double free (asn unused)", line, line_num);
-
- const uint64_t offset = seq_num_to_offset[asn];
- ba->free_block(offset);
- seq_num_to_offset.erase(asn);
- stats->n_free++;
- } else if (fn == "ba_trace_free_offset") {
- // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs
- const uint64_t offset = parse_uint64(&ptr, line_num);
- ba->free_block(offset);
- stats->n_free++;
- } else if (fn == "ba_trace_destroy") {
- TOKU_DB_FRAGMENTATION_S report;
- ba->get_statistics(&report);
- ba->destroy();
- (*reports)[allocator_id].end = report;
- allocator_map.erase(allocator_id);
- stats->n_destroy++;
- } else {
- ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num);
- }
- }
-
- toku_free(line);
- }
-}
-
-static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) {
- switch (strategy) {
- case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT:
- return "first-fit";
- case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT:
- return "best-fit";
- case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE:
- return "heat-zone";
- case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT:
- return "padded-fit";
- default:
- abort();
- }
-}
-
-static block_allocator::allocation_strategy cstring_to_strategy(const char *str) {
- if (strcmp(str, "first-fit") == 0) {
- return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT;
- }
- if (strcmp(str, "best-fit") == 0) {
- return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT;
- }
- if (strcmp(str, "heat-zone") == 0) {
- return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE;
- }
- if (strcmp(str, "padded-fit") != 0) {
- fprintf(stderr, "bad strategy string: %s\n", str);
- abort();
- }
- return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT;
-}
-
-static void print_result_verbose(uint64_t allocator_id,
- block_allocator::allocation_strategy strategy,
- const struct fragmentation_report &report) {
- if (report.end.data_bytes + report.end.unused_bytes +
- report.beginning.data_bytes + report.beginning.unused_bytes
- < 32UL * 1024 * 1024) {
- printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
- return;
- }
-
- printf(" allocator_id: %20" PRId64 "\n", allocator_id);
- printf(" strategy: %20s\n", strategy_to_cstring(strategy));
-
- for (int i = 0; i < 2; i++) {
- const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end;
- printf("%s\n", i == 0 ? "BEFORE" : "AFTER");
-
- uint64_t total_bytes = r->data_bytes + r->unused_bytes;
- uint64_t total_blocks = r->data_blocks + r->unused_blocks;
-
- // byte statistics
- printf(" total bytes: %20" PRId64 "\n", total_bytes);
- printf(" used bytes: %20" PRId64 " (%.3lf)\n", r->data_bytes,
- static_cast<double>(r->data_bytes) / total_bytes);
- printf(" unused bytes: %20" PRId64 " (%.3lf)\n", r->unused_bytes,
- static_cast<double>(r->unused_bytes) / total_bytes);
-
- // block statistics
- printf(" total blocks: %20" PRId64 "\n", total_blocks);
- printf(" used blocks: %20" PRId64 " (%.3lf)\n", r->data_blocks,
- static_cast<double>(r->data_blocks) / total_blocks);
- printf(" unused blocks: %20" PRId64 " (%.3lf)\n", r->unused_blocks,
- static_cast<double>(r->unused_blocks) / total_blocks);
-
- // misc
- printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block);
- }
-}
-
-static void print_result(uint64_t allocator_id,
- block_allocator::allocation_strategy strategy,
- const struct fragmentation_report &report) {
- const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning;
- const TOKU_DB_FRAGMENTATION_S *end = &report.end;
-
- uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes;
- uint64_t total_end_bytes = end->data_bytes + end->unused_bytes;
- if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) {
- if (verbose) {
- printf("\n");
- printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
- }
- return;
- }
- printf("\n");
- if (verbose) {
- print_result_verbose(allocator_id, strategy, report);
- } else {
- printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n",
- strategy_to_cstring(strategy), allocator_id,
- static_cast<double>(report.end.data_bytes) / total_end_bytes,
- static_cast<double>(report.beginning.data_bytes) / total_beginning_bytes);
- }
-}
-
-static int only_aggregate_reports;
-
-static struct option getopt_options[] = {
- { "verbose", no_argument, &verbose, 1 },
- { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 },
- { "include-strategy", required_argument, nullptr, 'i' },
- { "exclude-strategy", required_argument, nullptr, 'x' },
- { nullptr, 0, nullptr, 0 },
-};
-
-int main(int argc, char *argv[]) {
- int opt;
- set<block_allocator::allocation_strategy> candidate_strategies, excluded_strategies;
- while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) {
- switch (opt) {
- case 0:
- break;
- case 'i':
- candidate_strategies.insert(cstring_to_strategy(optarg));
- break;
- case 'x':
- excluded_strategies.insert(cstring_to_strategy(optarg));
- break;
- case '?':
- default:
- abort();
- };
- }
- // Default to everything if nothing was explicitly included.
- if (candidate_strategies.empty()) {
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
- }
- // ..but remove anything that was explicitly excluded
- for (set<block_allocator::allocation_strategy>::const_iterator it = excluded_strategies.begin();
- it != excluded_strategies.end(); it++) {
- candidate_strategies.erase(*it);
- }
-
- // Run the real trace
- //
- // First, read the raw trace from stdin
- vector<string> canonicalized_trace = canonicalize_trace_from(stdin);
-
- if (!only_aggregate_reports) {
- printf("\n");
- printf("Individual reports, by allocator:\n");
- }
-
- struct canonical_trace_stats stats;
- map<block_allocator::allocation_strategy, struct fragmentation_report> reports_by_strategy;
- for (set<block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
- it != candidate_strategies.end(); it++) {
- const block_allocator::allocation_strategy strategy(*it);
-
- // replay the canonicalized trace against the current strategy.
- //
- // we provided the allocator map so we can gather statistics later
- struct canonical_trace_stats dummy_stats;
- map<uint64_t, struct fragmentation_report> reports;
- replay_canonicalized_trace(canonicalized_trace, strategy, &reports,
- // Only need to gather canonical trace stats once
- it == candidate_strategies.begin() ? &stats : &dummy_stats);
-
- struct fragmentation_report aggregate_report;
- memset(&aggregate_report, 0, sizeof(aggregate_report));
- for (map<uint64_t, struct fragmentation_report>::iterator rp = reports.begin();
- rp != reports.end(); rp++) {
- const struct fragmentation_report &report = rp->second;
- aggregate_report.merge(report);
- if (!only_aggregate_reports) {
- print_result(rp->first, strategy, report);
- }
- }
- reports_by_strategy[strategy] = aggregate_report;
- }
-
- printf("\n");
- printf("Aggregate reports, by strategy:\n");
-
- for (map<block_allocator::allocation_strategy, struct fragmentation_report>::iterator it = reports_by_strategy.begin();
- it != reports_by_strategy.end(); it++) {
- print_result(0, it->first, it->second);
- }
-
- printf("\n");
- printf("Overall trace stats:\n");
- printf("\n");
- printf(" n_lines_played: %15" PRIu64 "\n", stats.n_lines_replayed);
- printf(" n_create: %15" PRIu64 "\n", stats.n_create);
- printf(" n_create_from_blockpairs: %15" PRIu64 "\n", stats.n_create_from_blockpairs);
- printf(" n_alloc_hot: %15" PRIu64 "\n", stats.n_alloc_hot);
- printf(" n_alloc_cold: %15" PRIu64 "\n", stats.n_alloc_cold);
- printf(" n_free: %15" PRIu64 "\n", stats.n_free);
- printf(" n_destroy: %15" PRIu64 "\n", stats.n_destroy);
- printf("\n");
- printf(" avg_alloc_hot: %15" PRIu64 "\n", stats.alloc_hot_bytes.mean);
- printf(" stddev_alloc_hot: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance));
- printf(" avg_alloc_cold: %15" PRIu64 "\n", stats.alloc_cold_bytes.mean);
- printf(" stddev_alloc_cold: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance));
- printf("\n");
-
- return 0;
-}
diff --git a/storage/tokudb/PerconaFT/tools/ftverify.cc b/storage/tokudb/PerconaFT/tools/ftverify.cc
index 5920be8deda..2324249ba00 100644
--- a/storage/tokudb/PerconaFT/tools/ftverify.cc
+++ b/storage/tokudb/PerconaFT/tools/ftverify.cc
@@ -148,7 +148,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p)
}
}
{
- toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+ toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
r1 = deserialize_ft_from_fd_into_rbuf(
fd,
header_1_off,
diff --git a/storage/tokudb/PerconaFT/tools/tokuftdump.cc b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
index 23ef72218ac..f6d777b4161 100644
--- a/storage/tokudb/PerconaFT/tools/tokuftdump.cc
+++ b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
@@ -192,6 +192,7 @@ static void dump_header(FT ft) {
dump_descriptor(&ft->descriptor);
printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows);
printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes);
+ printf(" logical row count=%" PRId64 "\n", ft->in_memory_logical_rows);
}
static int64_t getRootNode(FT ft) {
diff --git a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
index 48ff28e89af..76b1d9c713e 100644
--- a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
+++ b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
@@ -110,7 +110,7 @@ test2 (void) {
static void
test3 (void)
-// Compare the simple version to the highly optimized verison.
+// Compare the simple version to the highly optimized version.
{
const int datalen = 1000;
char data[datalen];
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 62a70467661..0afe9958b85 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -414,17 +414,17 @@ void TOKUDB_SHARE::update_row_count(
pct_of_rows_changed_to_trigger = ((_rows * auto_threshold) / 100);
if (_row_delta_activity >= pct_of_rows_changed_to_trigger) {
char msg[200];
- snprintf(
- msg,
- sizeof(msg),
- "TokuDB: Auto %s background analysis for %s, delta_activity "
- "%llu is greater than %llu percent of %llu rows.",
- tokudb::sysvars::analyze_in_background(thd) > 0 ?
- "scheduling" : "running",
- full_table_name(),
- _row_delta_activity,
- auto_threshold,
- (ulonglong)(_rows));
+ snprintf(msg,
+ sizeof(msg),
+ "TokuDB: Auto %s analysis for %s, delta_activity %llu is "
+ "greater than %llu percent of %llu rows.",
+ tokudb::sysvars::analyze_in_background(thd) > 0
+ ? "scheduling background"
+ : "running foreground",
+ full_table_name(),
+ _row_delta_activity,
+ auto_threshold,
+ (ulonglong)(_rows));
// analyze_standard will unlock _mutex regardless of success/failure
int ret = analyze_standard(thd, NULL);
@@ -4129,7 +4129,7 @@ int ha_tokudb::write_row(uchar * record) {
goto cleanup;
}
if (curr_num_DBs == 1) {
- error = insert_row_to_main_dictionary(record,&prim_key, &row, txn);
+ error = insert_row_to_main_dictionary(record, &prim_key, &row, txn);
if (error) { goto cleanup; }
} else {
error = insert_rows_to_dictionaries_mult(&prim_key, &row, txn, thd);
@@ -6176,7 +6176,7 @@ int ha_tokudb::info(uint flag) {
// we should always have a primary key
assert_always(share->file != NULL);
- error = estimate_num_rows(share->file,&num_rows, txn);
+ error = estimate_num_rows(share->file, &num_rows, txn);
if (error == 0) {
share->set_row_count(num_rows, false);
stats.records = num_rows;
diff --git a/storage/tokudb/ha_tokudb_admin.cc b/storage/tokudb/ha_tokudb_admin.cc
index db3d6c112d4..6d8e7173c8d 100644
--- a/storage/tokudb/ha_tokudb_admin.cc
+++ b/storage/tokudb/ha_tokudb_admin.cc
@@ -7,7 +7,7 @@ This file is part of TokuDB
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
- TokuDBis is free software: you can redistribute it and/or modify
+ TokuDB is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
@@ -43,13 +43,11 @@ public:
virtual ~recount_rows_t();
virtual const char* key();
-
- virtual void status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status);
+ virtual const char* database();
+ virtual const char* table();
+ virtual const char* type();
+ virtual const char* parameters();
+ virtual const char* status();
protected:
virtual void on_run();
@@ -64,6 +62,8 @@ private:
ulonglong _throttle;
// for recount rows status reporting
+ char _parameters[256];
+ char _status[1024];
int _result;
ulonglong _recount_start; // in microseconds
ulonglong _total_elapsed_time; // in microseconds
@@ -78,7 +78,6 @@ private:
uint64_t deleted,
void* extra);
int analyze_recount_rows_progress(uint64_t count, uint64_t deleted);
- void get_analyze_status(char*);
};
void* recount_rows_t::operator new(size_t sz) {
@@ -114,10 +113,19 @@ recount_rows_t::recount_rows_t(
}
_throttle = tokudb::sysvars::analyze_throttle(thd);
+
+ snprintf(_parameters,
+ sizeof(_parameters),
+ "TOKUDB_ANALYZE_THROTTLE=%llu;",
+ _throttle);
+ _status[0] = '\0';
}
recount_rows_t::~recount_rows_t() {
}
void recount_rows_t::on_run() {
+ const char* orig_proc_info = NULL;
+ if (_thd)
+ orig_proc_info = tokudb_thd_get_proc_info(_thd);
_recount_start = tokudb::time::microsec();
_total_elapsed_time = 0;
@@ -171,6 +179,8 @@ void recount_rows_t::on_run() {
_result,
_share->row_count());
error:
+ if(_thd)
+ tokudb_thd_set_proc_info(_thd, orig_proc_info);
return;
}
void recount_rows_t::on_destroy() {
@@ -179,18 +189,21 @@ void recount_rows_t::on_destroy() {
const char* recount_rows_t::key() {
return _share->full_table_name();
}
-void recount_rows_t::status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status) {
-
- strcpy(database, _share->database_name());
- strcpy(table, _share->table_name());
- strcpy(type, "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS");
- sprintf(params, "TOKUDB_ANALYZE_THROTTLE=%llu;", _throttle);
- get_analyze_status(status);
+const char* recount_rows_t::database() {
+ return _share->database_name();
+}
+const char* recount_rows_t::table() {
+ return _share->table_name();
+}
+const char* recount_rows_t::type() {
+ static const char* type = "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS";
+ return type;
+}
+const char* recount_rows_t::parameters() {
+ return _parameters;
+}
+const char* recount_rows_t::status() {
+ return _status;
}
int recount_rows_t::analyze_recount_rows_progress(
uint64_t count,
@@ -217,12 +230,32 @@ int recount_rows_t::analyze_recount_rows_progress(
return ER_ABORTING_CONNECTION;
}
+ // rebuild status
+ // There is a slight race condition here,
+ // _status is used here for tokudb_thd_set_proc_info and it is also used
+ // for the status column in i_s.background_job_status.
+ // If someone happens to be querying/building the i_s table
+ // at the exact same time that the status is being rebuilt here,
+ // the i_s table could get some garbage status.
+ // This solution is a little heavy handed but it works, it prevents us
+ // from changing the status while someone might be immediately observing
+ // us and it prevents someone from observing us while we change the
+ // status
+ tokudb::background::_job_manager->lock();
+ snprintf(_status,
+ sizeof(_status),
+ "recount_rows %s.%s counted %llu rows and %llu deleted "
+ "in %llu seconds.",
+ _share->database_name(),
+ _share->table_name(),
+ _rows,
+ _deleted_rows,
+ _total_elapsed_time / tokudb::time::MICROSECONDS);
+ tokudb::background::_job_manager->unlock();
+
// report
- if (_thd) {
- char status[256];
- get_analyze_status(status);
- thd_proc_info(_thd, status);
- }
+ if (_thd)
+ tokudb_thd_set_proc_info(_thd, _status);
// throttle
// given the throttle value, lets calculate the maximum number of rows
@@ -238,18 +271,6 @@ int recount_rows_t::analyze_recount_rows_progress(
}
return 0;
}
-void recount_rows_t::get_analyze_status(char* msg) {
- sprintf(
- msg,
- "recount_rows %s.%s counted %llu rows and %llu deleted in %llu "
- "seconds.",
- _share->database_name(),
- _share->table_name(),
- _rows,
- _deleted_rows,
- _total_elapsed_time / tokudb::time::MICROSECONDS);
-}
-
class standard_t : public tokudb::background::job_manager_t::job_t {
public:
@@ -261,13 +282,11 @@ public:
virtual ~standard_t();
virtual const char* key(void);
-
- virtual void status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status);
+ virtual const char* database();
+ virtual const char* table();
+ virtual const char* type();
+ virtual const char* parameters();
+ virtual const char* status();
protected:
virtual void on_run();
@@ -284,6 +303,8 @@ private:
double _delete_fraction;
// for analyze status reporting, may also use other state
+ char _parameters[256];
+ char _status[1024];
int _result;
ulonglong _analyze_start; // in microseconds
ulonglong _total_elapsed_time; // in microseconds
@@ -305,7 +326,6 @@ private:
uint64_t deleted_rows);
bool analyze_standard_cursor_callback(uint64_t deleted_rows);
- void get_analyze_status(char*);
int analyze_key_progress();
int analyze_key(uint64_t* rec_per_key_part);
};
@@ -351,6 +371,16 @@ standard_t::standard_t(
_time_limit =
tokudb::sysvars::analyze_time(thd) * tokudb::time::MICROSECONDS;
_delete_fraction = tokudb::sysvars::analyze_delete_fraction(thd);
+
+ snprintf(_parameters,
+ sizeof(_parameters),
+ "TOKUDB_ANALYZE_DELETE_FRACTION=%f; "
+ "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;",
+ _delete_fraction,
+ _time_limit / tokudb::time::MICROSECONDS,
+ _throttle);
+
+ _status[0] = '\0';
}
standard_t::~standard_t() {
}
@@ -358,6 +388,10 @@ void standard_t::on_run() {
DB_BTREE_STAT64 stat64;
uint64_t rec_per_key_part[_share->_max_key_parts];
uint64_t total_key_parts = 0;
+ const char* orig_proc_info = NULL;
+ if (_thd)
+ orig_proc_info = tokudb_thd_get_proc_info(_thd);
+
_analyze_start = tokudb::time::microsec();
_half_time = _time_limit > 0 ? _time_limit/2 : 0;
@@ -395,7 +429,7 @@ void standard_t::on_run() {
_result = HA_ADMIN_FAILED;
}
if (_thd && (_result == HA_ADMIN_FAILED ||
- (double)_deleted_rows >
+ static_cast<double>(_deleted_rows) >
_delete_fraction * (_rows + _deleted_rows))) {
char name[256]; int namelen;
@@ -460,8 +494,9 @@ cleanup:
}
error:
+ if (_thd)
+ tokudb_thd_set_proc_info(_thd, orig_proc_info);
return;
-
}
void standard_t::on_destroy() {
_share->lock();
@@ -472,24 +507,21 @@ void standard_t::on_destroy() {
const char* standard_t::key() {
return _share->full_table_name();
}
-void standard_t::status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status) {
-
- strcpy(database, _share->database_name());
- strcpy(table, _share->table_name());
- strcpy(type, "TOKUDB_ANALYZE_MODE_STANDARD");
- sprintf(
- params,
- "TOKUDB_ANALYZE_DELETE_FRACTION=%f; "
- "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;",
- _delete_fraction,
- _time_limit / tokudb::time::MICROSECONDS,
- _throttle);
- get_analyze_status(status);
+const char* standard_t::database() {
+ return _share->database_name();
+}
+const char* standard_t::table() {
+ return _share->table_name();
+}
+const char* standard_t::type() {
+ static const char* type = "TOKUDB_ANALYZE_MODE_STANDARD";
+ return type;
+}
+const char* standard_t::parameters() {
+ return _parameters;
+}
+const char* standard_t::status() {
+ return _status;
}
bool standard_t::analyze_standard_cursor_callback(
void* extra,
@@ -502,41 +534,6 @@ bool standard_t::analyze_standard_cursor_callback(uint64_t deleted_rows) {
_ticks += deleted_rows;
return analyze_key_progress() != 0;
}
-void standard_t::get_analyze_status(char* msg) {
- static const char* scan_direction_str[] = {
- "not scanning",
- "scanning forward",
- "scanning backward",
- "scan unknown"
- };
-
- const char* scan_direction = NULL;
- switch (_scan_direction) {
- case 0: scan_direction = scan_direction_str[0]; break;
- case DB_NEXT: scan_direction = scan_direction_str[1]; break;
- case DB_PREV: scan_direction = scan_direction_str[2]; break;
- default: scan_direction = scan_direction_str[3]; break;
- }
-
- float progress_rows = 0.0;
- if (_share->row_count() > 0)
- progress_rows = (float) _rows / (float) _share->row_count();
- float progress_time = 0.0;
- if (_time_limit > 0)
- progress_time = (float) _key_elapsed_time / (float) _time_limit;
- sprintf(
- msg,
- "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% time, "
- "%s",
- _share->database_name(),
- _share->table_name(),
- _share->_key_descriptors[_current_key]._name,
- _current_key,
- _share->_keys,
- progress_rows * 100.0,
- progress_time * 100.0,
- scan_direction);
-}
int standard_t::analyze_key_progress(void) {
if (_ticks > 1000) {
_ticks = 0;
@@ -546,19 +543,72 @@ int standard_t::analyze_key_progress(void) {
if ((_thd && thd_killed(_thd)) || cancelled()) {
// client killed
return ER_ABORTING_CONNECTION;
- } else if(_time_limit > 0 &&
- (uint64_t)_key_elapsed_time > _time_limit) {
+ } else if (_time_limit > 0 &&
+ static_cast<uint64_t>(_key_elapsed_time) > _time_limit) {
// time limit reached
return ETIME;
}
- // report
- if (_thd) {
- char status[256];
- get_analyze_status(status);
- thd_proc_info(_thd, status);
+ // rebuild status
+ // There is a slight race condition here,
+ // _status is used here for tokudb_thd_set_proc_info and it is also used
+ // for the status column in i_s.background_job_status.
+ // If someone happens to be querying/building the i_s table
+ // at the exact same time that the status is being rebuilt here,
+ // the i_s table could get some garbage status.
+ // This solution is a little heavy handed but it works, it prevents us
+ // from changing the status while someone might be immediately observing
+ // us and it prevents someone from observing us while we change the
+ // status.
+ static const char* scan_direction_str[] = {"not scanning",
+ "scanning forward",
+ "scanning backward",
+ "scan unknown"};
+
+ const char* scan_direction = NULL;
+ switch (_scan_direction) {
+ case 0:
+ scan_direction = scan_direction_str[0];
+ break;
+ case DB_NEXT:
+ scan_direction = scan_direction_str[1];
+ break;
+ case DB_PREV:
+ scan_direction = scan_direction_str[2];
+ break;
+ default:
+ scan_direction = scan_direction_str[3];
+ break;
}
+ float progress_rows = 0.0;
+ if (_share->row_count() > 0)
+ progress_rows = static_cast<float>(_rows) /
+ static_cast<float>(_share->row_count());
+ float progress_time = 0.0;
+ if (_time_limit > 0)
+ progress_time = static_cast<float>(_key_elapsed_time) /
+ static_cast<float>(_time_limit);
+ tokudb::background::_job_manager->lock();
+ snprintf(
+ _status,
+ sizeof(_status),
+ "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% "
+ "time, %s",
+ _share->database_name(),
+ _share->table_name(),
+ _share->_key_descriptors[_current_key]._name,
+ _current_key,
+ _share->_keys,
+ progress_rows * 100.0,
+ progress_time * 100.0,
+ scan_direction);
+ tokudb::background::_job_manager->unlock();
+
+ // report
+ if (_thd)
+ tokudb_thd_set_proc_info(_thd, _status);
+
// throttle
// given the throttle value, lets calculate the maximum number of rows
// we should have seen so far in a .1 sec resolution
@@ -694,6 +744,11 @@ int standard_t::analyze_key(uint64_t* rec_per_key_part) {
assert_always(close_error == 0);
done:
+ // in case we timed out (bunch of deleted records) without hitting a
+ // single row
+ if (_rows == 0)
+ _rows = 1;
+
// return cardinality
for (uint64_t i = 0; i < num_key_parts; i++) {
rec_per_key_part[i] = _rows / unique_rows[i];
@@ -733,7 +788,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) {
assert_always(thd != NULL);
- const char *orig_proc_info = tokudb_thd_get_proc_info(thd);
int result = HA_ADMIN_OK;
tokudb::analyze::recount_rows_t* job
@@ -753,8 +807,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) {
result = HA_ADMIN_FAILED;
}
- thd_proc_info(thd, orig_proc_info);
-
TOKUDB_HANDLER_DBUG_RETURN(result);
}
@@ -778,8 +830,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) {
TOKUDB_HANDLER_DBUG_RETURN(result);
}
- const char *orig_proc_info = tokudb_thd_get_proc_info(thd);
-
tokudb::analyze::standard_t* job
= new tokudb::analyze::standard_t(txn == NULL ? false : true, thd,
this, txn);
@@ -808,8 +858,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) {
lock();
- thd_proc_info(thd, orig_proc_info);
-
TOKUDB_HANDLER_DBUG_RETURN(result);
}
diff --git a/storage/tokudb/hatoku_defines.h b/storage/tokudb/hatoku_defines.h
index 3ff3e537778..a8d4a38e1c3 100644
--- a/storage/tokudb/hatoku_defines.h
+++ b/storage/tokudb/hatoku_defines.h
@@ -7,7 +7,7 @@ This file is part of TokuDB
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
- TokuDBis is free software: you can redistribute it and/or modify
+ TokuDB is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
@@ -233,9 +233,12 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
// mysql 5.6.15 removed the test macro, so we define our own
#define tokudb_test(e) ((e) ? 1 : 0)
-inline const char* tokudb_thd_get_proc_info(const THD *thd) {
+inline const char* tokudb_thd_get_proc_info(const THD* thd) {
return thd->proc_info;
}
+inline void tokudb_thd_set_proc_info(THD* thd, const char* proc_info) {
+ thd_proc_info(thd, proc_info);
+}
// uint3korr reads 4 bytes and valgrind reports an error, so we use this function instead
inline uint tokudb_uint3korr(const uchar *a) {
diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index e7dfbd810c2..1581a7b76df 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -531,6 +531,7 @@ static int tokudb_init_func(void *p) {
db_env->change_fsync_log_period(db_env, tokudb::sysvars::fsync_log_period);
db_env->set_lock_timeout_callback(db_env, tokudb_lock_timeout_callback);
+ db_env->set_dir_per_db(db_env, tokudb::sysvars::dir_per_db);
db_env->set_loader_memory_size(
db_env,
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result
deleted file mode 100644
index c2a598632ca..00000000000
--- a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result
+++ /dev/null
@@ -1,51 +0,0 @@
-include/master-slave.inc
-[connection master]
-CREATE TABLE t1 (a INT AUTO_INCREMENT KEY) ENGINE=TokuDB;
-CREATE TABLE t2 (b INT AUTO_INCREMENT KEY, c INT, FOREIGN KEY(b) REFERENCES t1(a)) ENGINE=TokuDB;
-SET FOREIGN_KEY_CHECKS=0;
-INSERT INTO t1 VALUES (10);
-INSERT INTO t1 VALUES (NULL),(NULL),(NULL);
-INSERT INTO t2 VALUES (5,0);
-INSERT INTO t2 VALUES (NULL,LAST_INSERT_ID());
-SET FOREIGN_KEY_CHECKS=1;
-SELECT * FROM t1 ORDER BY a;
-a
-10
-11
-12
-13
-SELECT * FROM t2 ORDER BY b;
-b c
-5 0
-6 11
-SELECT * FROM t1 ORDER BY a;
-a
-10
-11
-12
-13
-SELECT * FROM t2 ORDER BY b;
-b c
-5 0
-6 11
-SET TIMESTAMP=1000000000;
-CREATE TABLE t3 ( a INT UNIQUE );
-SET FOREIGN_KEY_CHECKS=0;
-INSERT INTO t3 VALUES (1),(1);
-Got one of the listed errors
-SET FOREIGN_KEY_CHECKS=0;
-DROP TABLE IF EXISTS t1,t2,t3;
-SET FOREIGN_KEY_CHECKS=1;
-create table t1 (b int primary key) engine = TokuDB;
-create table t2 (a int primary key, b int, foreign key (b) references t1(b))
-engine = TokuDB;
-insert into t1 set b=1;
-insert into t2 set a=1, b=1;
-set foreign_key_checks=0;
-delete from t1;
-must sync w/o a problem (could not with the buggy code)
-select count(*) from t1 /* must be zero */;
-count(*)
-0
-drop table t2,t1;
-include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test b/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test
deleted file mode 100644
index d798cfd4a62..00000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test
+++ /dev/null
@@ -1,4 +0,0 @@
--- source include/not_ndb_default.inc
--- source include/have_tokudb.inc
-let $engine_type=TokuDB;
--- source extra/rpl_tests/rpl_foreign_key.test
diff --git a/storage/tokudb/mysql-test/tokudb/disabled.def b/storage/tokudb/mysql-test/tokudb/disabled.def
index c98a8aa622a..ddefceb432e 100644
--- a/storage/tokudb/mysql-test/tokudb/disabled.def
+++ b/storage/tokudb/mysql-test/tokudb/disabled.def
@@ -28,3 +28,4 @@ type_timestamp_explicit:
cluster_key_part: engine options on partitioned tables
i_s_tokudb_lock_waits_released: unstable, race conditions
i_s_tokudb_locks_released: unstable, race conditions
+row_format: n/a
diff --git a/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc b/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc
new file mode 100644
index 00000000000..b10ad21dd95
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc
@@ -0,0 +1 @@
+--replace_regex /[a-z0-9]+_[a-z0-9]+_[a-z0-9]+(_[BP]_[a-z0-9]+){0,1}\./id./ /sqlx_[a-z0-9]+_[a-z0-9]+_/sqlx_nnnn_nnnn_/ /sqlx-[a-z0-9]+_[a-z0-9]+/sqlx-nnnn_nnnn/ /#p#/#P#/ /#sp#/#SP#/ /#tmp#/#TMP#/
diff --git a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
index 5769ee74071..8b53f89efa3 100644
--- a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
+++ b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
@@ -25,7 +25,7 @@ TokuDB_background_job_status CREATE TEMPORARY TABLE `TokuDB_background_job_statu
`scheduler` varchar(32) NOT NULL DEFAULT '',
`scheduled_time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`started_time` datetime DEFAULT NULL,
- `status` varchar(256) DEFAULT NULL
+ `status` varchar(1024) DEFAULT NULL
) ENGINE=MEMORY DEFAULT CHARSET=utf8
create table t1 (a int not null auto_increment, b int, c int, primary key(a), key kb(b), key kc(c), key kabc(a,b,c), key kab(a,b), key kbc(b,c));
insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3);
diff --git a/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result
new file mode 100644
index 00000000000..a36dbcb28c0
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result
@@ -0,0 +1,10 @@
+SELECT @@tokudb_dir_per_db;
+@@tokudb_dir_per_db
+1
+TOKUDB_DATA_DIR_CHANGED
+1
+CREATE DATABASE tokudb_test;
+USE tokudb_test;
+CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb;
+DROP TABLE t;
+DROP DATABASE tokudb_test;
diff --git a/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result
new file mode 100644
index 00000000000..371f97406c8
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result
@@ -0,0 +1,180 @@
+########
+# tokudb_dir_per_db = 1
+########
+SET GLOBAL tokudb_dir_per_db= 1;
+########
+# CREATE
+########
+CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+INSERT INTO t1 SET b = 10;
+INSERT INTO t1 SET b = 20;
+SELECT b FROM t1 ORDER BY a;
+b
+10
+20
+CREATE INDEX b ON t1 (b);
+CREATE INDEX ab ON t1 (a,b);
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t1_key_ab_id.tokudb
+t1_key_b_id.tokudb
+t1_main_id.tokudb
+t1_status_id.tokudb
+########
+# RENAME
+########
+RENAME TABLE t1 TO t2;
+SELECT b FROM t2 ORDER BY a;
+b
+10
+20
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t2_key_ab_id.tokudb
+t2_key_b_id.tokudb
+t2_main_id.tokudb
+t2_status_id.tokudb
+########
+# DROP
+########
+DROP TABLE t2;
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+########
+# tokudb_dir_per_db = 0
+########
+SET GLOBAL tokudb_dir_per_db= 0;
+########
+# CREATE
+########
+CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+INSERT INTO t1 SET b = 10;
+INSERT INTO t1 SET b = 20;
+SELECT b FROM t1 ORDER BY a;
+b
+10
+20
+CREATE INDEX b ON t1 (b);
+CREATE INDEX ab ON t1 (a,b);
+## Looking for *.tokudb files in data_dir
+_test_t1_key_ab_id.tokudb
+_test_t1_key_b_id.tokudb
+_test_t1_main_id.tokudb
+_test_t1_status_id.tokudb
+## Looking for *.tokudb files in data_dir/test
+########
+# RENAME
+########
+RENAME TABLE t1 TO t2;
+SELECT b FROM t2 ORDER BY a;
+b
+10
+20
+## Looking for *.tokudb files in data_dir
+_test_t1_key_ab_id.tokudb
+_test_t1_key_b_id.tokudb
+_test_t1_main_id.tokudb
+_test_t1_status_id.tokudb
+## Looking for *.tokudb files in data_dir/test
+########
+# DROP
+########
+DROP TABLE t2;
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+########
+# CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa
+########
+########
+# tokudb_dir_per_db = (1 - 1);
+########
+SET GLOBAL tokudb_dir_per_db= (1 - 1);;
+########
+# CREATE
+########
+CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+INSERT INTO t1 SET b = 10;
+INSERT INTO t1 SET b = 20;
+SELECT b FROM t1 ORDER BY a;
+b
+10
+20
+CREATE INDEX b ON t1 (b);
+CREATE INDEX ab ON t1 (a,b);
+## Looking for *.tokudb files in data_dir
+_test_t1_key_ab_id.tokudb
+_test_t1_key_b_id.tokudb
+_test_t1_main_id.tokudb
+_test_t1_status_id.tokudb
+## Looking for *.tokudb files in data_dir/test
+########
+# tokudb_dir_per_db = 1
+########
+SET GLOBAL tokudb_dir_per_db= 1;
+########
+# RENAME
+########
+RENAME TABLE t1 TO t2;
+SELECT b FROM t2 ORDER BY a;
+b
+10
+20
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t2_key_ab_id.tokudb
+t2_key_b_id.tokudb
+t2_main_id.tokudb
+t2_status_id.tokudb
+########
+# DROP
+########
+DROP TABLE t2;
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+########
+# tokudb_dir_per_db = (1 - 0);
+########
+SET GLOBAL tokudb_dir_per_db= (1 - 0);;
+########
+# CREATE
+########
+CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+INSERT INTO t1 SET b = 10;
+INSERT INTO t1 SET b = 20;
+SELECT b FROM t1 ORDER BY a;
+b
+10
+20
+CREATE INDEX b ON t1 (b);
+CREATE INDEX ab ON t1 (a,b);
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t1_key_ab_id.tokudb
+t1_key_b_id.tokudb
+t1_main_id.tokudb
+t1_status_id.tokudb
+########
+# tokudb_dir_per_db = 0
+########
+SET GLOBAL tokudb_dir_per_db= 0;
+########
+# RENAME
+########
+RENAME TABLE t1 TO t2;
+SELECT b FROM t2 ORDER BY a;
+b
+10
+20
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t1_key_ab_id.tokudb
+t1_key_b_id.tokudb
+t1_main_id.tokudb
+t1_status_id.tokudb
+########
+# DROP
+########
+DROP TABLE t2;
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+SET GLOBAL tokudb_dir_per_db=default;
diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result
index 6f9592ddc1f..ecd4d077206 100644
--- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result
+++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result
@@ -2,6 +2,7 @@ set default_storage_engine='tokudb';
set tokudb_prelock_empty=false;
drop table if exists t;
create table t (id int primary key);
+t should be empty
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
trx_id trx_mysql_thread_id
select * from information_schema.tokudb_locks;
@@ -15,17 +16,21 @@ insert into t values (1);
set autocommit=0;
set tokudb_lock_timeout=600000;
insert into t values (1);
+should find the presence of a lock on 1st transaction
select * from information_schema.tokudb_locks;
locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name
TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main
+should find the presence of a lock_wait on the 2nd transaction
select * from information_schema.tokudb_lock_waits;
requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name
REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main
+should find the presence of two transactions
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
trx_id trx_mysql_thread_id
TRX_ID MYSQL_ID
TRX_ID MYSQL_ID
commit;
+verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
select * from information_schema.tokudb_locks;
locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name
TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main
@@ -33,6 +38,8 @@ select * from information_schema.tokudb_lock_waits;
requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name
ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
commit;
+verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes
+verify that the lock on the 2nd transaction has been released, should be be empty
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
trx_id trx_mysql_thread_id
select * from information_schema.tokudb_locks;
@@ -46,23 +53,28 @@ replace into t values (1);
set autocommit=0;
set tokudb_lock_timeout=600000;
replace into t values (1);
+should find the presence of a lock on 1st transaction
select * from information_schema.tokudb_locks;
locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name
TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main
+should find the presence of a lock_wait on the 2nd transaction
select * from information_schema.tokudb_lock_waits;
requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name
REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main
+should find the presence of two transactions
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
trx_id trx_mysql_thread_id
TRX_ID MYSQL_ID
TRX_ID MYSQL_ID
commit;
+verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
select * from information_schema.tokudb_locks;
locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name
TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main
select * from information_schema.tokudb_lock_waits;
requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name
commit;
+verify that the lock on the 2nd transaction has been released, should be be empty
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
trx_id trx_mysql_thread_id
select * from information_schema.tokudb_locks;
diff --git a/storage/tokudb/mysql-test/tokudb/r/row_format.result b/storage/tokudb/mysql-test/tokudb/r/row_format.result
new file mode 100644
index 00000000000..cb669148445
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/r/row_format.result
@@ -0,0 +1,51 @@
+CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT;
+CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST;
+CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL;
+CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED;
+CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB;
+CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA;
+CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ;
+CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name;
+table_name row_format engine
+tokudb_row_format_test_1 tokudb_zlib TokuDB
+tokudb_row_format_test_2 tokudb_quicklz TokuDB
+tokudb_row_format_test_3 tokudb_lzma TokuDB
+tokudb_row_format_test_4 tokudb_uncompressed TokuDB
+tokudb_row_format_test_5 tokudb_zlib TokuDB
+tokudb_row_format_test_6 tokudb_lzma TokuDB
+tokudb_row_format_test_7 tokudb_quicklz TokuDB
+tokudb_row_format_test_8 tokudb_snappy TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name row_format engine
+tokudb_row_format_test_1 tokudb_quicklz TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name row_format engine
+tokudb_row_format_test_1 tokudb_lzma TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name row_format engine
+tokudb_row_format_test_1 tokudb_uncompressed TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name row_format engine
+tokudb_row_format_test_1 tokudb_zlib TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name row_format engine
+tokudb_row_format_test_1 tokudb_snappy TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name row_format engine
+tokudb_row_format_test_1 tokudb_quicklz TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name row_format engine
+tokudb_row_format_test_1 tokudb_lzma TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name row_format engine
+tokudb_row_format_test_1 tokudb_zlib TokuDB
+DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8;
diff --git a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result
index 5c1c53946a4..b287c70469e 100644
--- a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result
+++ b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result
@@ -1009,6 +1009,7 @@ Table Op Msg_type Msg_text
test.t check status OK
optimize table t;
Table Op Msg_type Msg_text
+test.t optimize note Table does not support optimize, doing recreate + analyze instead
test.t optimize status OK
check table t;
Table Op Msg_type Msg_text
diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt
new file mode 100644
index 00000000000..a9090f4d115
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt
@@ -0,0 +1 @@
+--loose-tokudb_data_dir="$MYSQL_TMP_DIR" --loose-tokudb-dir-per-db=1
diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test
new file mode 100644
index 00000000000..7f415a72515
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test
@@ -0,0 +1,16 @@
+--source include/have_tokudb.inc
+
+SELECT @@tokudb_dir_per_db;
+
+--disable_query_log
+--eval SELECT STRCMP(@@tokudb_data_dir, '$MYSQL_TMP_DIR') = 0 AS TOKUDB_DATA_DIR_CHANGED
+--enable_query_log
+
+CREATE DATABASE tokudb_test;
+USE tokudb_test;
+CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb;
+
+--file_exists $MYSQL_TMP_DIR/tokudb_test
+
+DROP TABLE t;
+DROP DATABASE tokudb_test;
diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test
new file mode 100644
index 00000000000..b638b706d87
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test
@@ -0,0 +1,76 @@
+source include/have_tokudb.inc;
+
+--let $DB= test
+--let $DATADIR= `select @@datadir`
+--let $i= 2
+
+while ($i) {
+ --dec $i
+ --echo ########
+ --echo # tokudb_dir_per_db = $i
+ --echo ########
+ --eval SET GLOBAL tokudb_dir_per_db= $i
+ --echo ########
+ --echo # CREATE
+ --echo ########
+ CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+ INSERT INTO t1 SET b = 10;
+ INSERT INTO t1 SET b = 20;
+ SELECT b FROM t1 ORDER BY a;
+ CREATE INDEX b ON t1 (b);
+ CREATE INDEX ab ON t1 (a,b);
+ --source dir_per_db_show_table_files.inc
+ --echo ########
+ --echo # RENAME
+ --echo ########
+ RENAME TABLE t1 TO t2;
+ SELECT b FROM t2 ORDER BY a;
+ --source dir_per_db_show_table_files.inc
+ --echo ########
+ --echo # DROP
+ --echo ########
+ DROP TABLE t2;
+ --source dir_per_db_show_table_files.inc
+}
+
+--echo ########
+--echo # CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa
+--echo ########
+
+--let $i= 2
+
+while ($i) {
+ --dec $i
+ --let $inv_i= (1 - $i);
+ --echo ########
+ --echo # tokudb_dir_per_db = $inv_i
+ --echo ########
+ --eval SET GLOBAL tokudb_dir_per_db= $inv_i
+ --echo ########
+ --echo # CREATE
+ --echo ########
+ CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+ INSERT INTO t1 SET b = 10;
+ INSERT INTO t1 SET b = 20;
+ SELECT b FROM t1 ORDER BY a;
+ CREATE INDEX b ON t1 (b);
+ CREATE INDEX ab ON t1 (a,b);
+ --source dir_per_db_show_table_files.inc
+ --echo ########
+ --echo # tokudb_dir_per_db = $i
+ --echo ########
+ --eval SET GLOBAL tokudb_dir_per_db= $i
+ --echo ########
+ --echo # RENAME
+ --echo ########
+ RENAME TABLE t1 TO t2;
+ SELECT b FROM t2 ORDER BY a;
+ --source dir_per_db_show_table_files.inc
+ --echo ########
+ --echo # DROP
+ --echo ########
+ DROP TABLE t2;
+ --source dir_per_db_show_table_files.inc
+}
+
+SET GLOBAL tokudb_dir_per_db=default;
diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc
new file mode 100644
index 00000000000..bdf7d5b235f
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc
@@ -0,0 +1,9 @@
+--sorted_result
+
+--echo ## Looking for *.tokudb files in data_dir
+--source include/table_files_replace_pattern.inc
+--list_files $DATADIR *.tokudb
+
+--echo ## Looking for *.tokudb files in data_dir/$DB
+--source include/table_files_replace_pattern.inc
+--list_files $DATADIR/$DB/ *.tokudb
diff --git a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test
index d8ce18b3aa7..6534175d619 100644
--- a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test
+++ b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test
@@ -17,7 +17,7 @@ create table t (id int primary key);
# verify that txn_a insert (1) blocks txn_b insert (1) and txn_b gets a duplicate key error
-# should be empty
+--echo t should be empty
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
select * from information_schema.tokudb_locks;
select * from information_schema.tokudb_lock_waits;
@@ -33,7 +33,7 @@ set autocommit=0;
set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes
send insert into t values (1);
-# should find the presence of a lock on 1st transaction
+--echo should find the presence of a lock on 1st transaction
connection default;
let $wait_condition= select count(*)=1 from information_schema.processlist where info='insert into t values (1)' and state='update';
source include/wait_condition.inc;
@@ -42,17 +42,17 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r
replace_column 1 TRX_ID 2 MYSQL_ID;
select * from information_schema.tokudb_locks;
-# should find the presence of a lock_wait on the 2nd transaction
+--echo should find the presence of a lock_wait on the 2nd transaction
replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME;
select * from information_schema.tokudb_lock_waits;
-# should find the presence of two transactions
+--echo should find the presence of two transactions
replace_column 1 TRX_ID 2 MYSQL_ID;
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
connection conn_a;
commit;
-# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
+--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main';
source include/wait_condition.inc;
@@ -69,10 +69,8 @@ connection default;
disconnect conn_a;
disconnect conn_b;
-# verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes
-
-# verify that the lock on the 2nd transaction has been released
-# should be be empty
+--echo verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes
+--echo verify that the lock on the 2nd transaction has been released, should be be empty
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
select * from information_schema.tokudb_locks;
select * from information_schema.tokudb_lock_waits;
@@ -88,7 +86,7 @@ set autocommit=0;
set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes
send replace into t values (1);
-# should find the presence of a lock on 1st transaction
+--echo should find the presence of a lock on 1st transaction
connection default;
let $wait_condition= select count(*)=1 from information_schema.processlist where info='replace into t values (1)' and state='update';
source include/wait_condition.inc;
@@ -97,17 +95,19 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r
replace_column 1 TRX_ID 2 MYSQL_ID;
select * from information_schema.tokudb_locks;
-# should find the presence of a lock_wait on the 2nd transaction
+--echo should find the presence of a lock_wait on the 2nd transaction
replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME;
select * from information_schema.tokudb_lock_waits;
-# should find the presence of two transactions
+--echo should find the presence of two transactions
replace_column 1 TRX_ID 2 MYSQL_ID;
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
connection conn_a;
commit;
-# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
+--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
+let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main';
+source include/wait_condition.inc;
replace_column 1 TRX_ID 2 MYSQL_ID;
select * from information_schema.tokudb_locks;
select * from information_schema.tokudb_lock_waits;
@@ -120,8 +120,7 @@ connection default;
disconnect conn_a;
disconnect conn_b;
-# verify that the lock on the 2nd transaction has been released
-# should be be empty
+--echo verify that the lock on the 2nd transaction has been released, should be be empty
select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
select * from information_schema.tokudb_locks;
select * from information_schema.tokudb_lock_waits;
diff --git a/storage/tokudb/mysql-test/tokudb/t/row_format.test b/storage/tokudb/mysql-test/tokudb/t/row_format.test
new file mode 100644
index 00000000000..6533f8c06be
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/row_format.test
@@ -0,0 +1,41 @@
+#
+# Test TokuDB compression option additions to row_format
+#
+--source include/have_tokudb.inc
+
+CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT;
+CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST;
+CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL;
+CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED;
+CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB;
+CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA;
+CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ;
+CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY;
+
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name;
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result
index 779d458221b..30e0bdbebd7 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result
@@ -23,6 +23,7 @@ set DEBUG_SYNC = 'tokudb_after_truncate_all_dictionarys SIGNAL closed WAIT_FOR d
TRUNCATE TABLE t1;
set global tokudb_debug_pause_background_job_manager = FALSE;
set DEBUG_SYNC = 'now SIGNAL done';
+set DEBUG_SYNC = 'RESET';
drop table t1;
set session tokudb_auto_analyze = @orig_auto_analyze;
set session tokudb_analyze_in_background = @orig_in_background;
@@ -32,4 +33,3 @@ set session tokudb_analyze_time = @orig_time;
set global tokudb_cardinality_scale_percent = @orig_scale_percent;
set session default_storage_engine = @orig_default_storage_engine;
set global tokudb_debug_pause_background_job_manager = @orig_pause_background_job_manager;
-set DEBUG_SYNC='reset';
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test
index f56f93d1492..50434a79a00 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test
@@ -40,6 +40,7 @@ insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3);
select database_name, table_name, job_type, job_params, scheduler from information_schema.tokudb_background_job_status;
# lets flip to another connection
+--source include/count_sessions.inc
connect(conn1, localhost, root);
# set up the DEBUG_SYNC point
@@ -64,6 +65,7 @@ connection conn1;
reap;
connection default;
disconnect conn1;
+set DEBUG_SYNC = 'RESET';
drop table t1;
set session tokudb_auto_analyze = @orig_auto_analyze;
@@ -74,4 +76,4 @@ set session tokudb_analyze_time = @orig_time;
set global tokudb_cardinality_scale_percent = @orig_scale_percent;
set session default_storage_engine = @orig_default_storage_engine;
set global tokudb_debug_pause_background_job_manager = @orig_pause_background_job_manager;
-set DEBUG_SYNC='reset';
+--source include/wait_until_count_sessions.inc
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
index 6100d9aeec2..8b6df4966f4 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
@@ -12,33 +12,11 @@ let $MYSQLD_DATADIR= `SELECT @@datadir`;
create table foo (a int, b int);
create table bar (a int, key(a));
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
remove_file $MYSQLD_DATADIR/test/foo.frm;
copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
show create table foo;
show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
index e1acea13ed7..53c1037b051 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
@@ -15,33 +15,11 @@ create table bar (a int);
alter table foo drop column a;
alter table bar add column b int, add column c int;
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
remove_file $MYSQLD_DATADIR/test/foo.frm;
copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
show create table foo;
show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
index 17a124249da..0421b8e9d26 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
@@ -14,33 +14,11 @@ create table bar (a bigint)engine=TokuDB;
alter table foo drop index b;
alter table bar add index (a);
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
remove_file $MYSQLD_DATADIR/test/foo.frm;
copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
show create table foo;
show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
index 42dbb30058a..4c40339be5a 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
@@ -7,17 +7,7 @@ set default_storage_engine='tokudb';
# capture the datadir
let $MYSQLD_DATADIR= `SELECT @@datadir`;
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
# remove all tokudb file in the datadir
system mkdir $MYSQLD_DATADIR/save;
system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save;
@@ -25,13 +15,7 @@ system mkdir $MYSQLD_DATADIR/test;
# install 6.6.8 tokudb test files
system cp -r std_data/tokudb_drop_part_table_668/data/* $MYSQLD_DATADIR;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
create table tc (a int, b int, c int, primary key(a), key(b)) engine=tokudb partition by hash(a) partitions 2;
@@ -45,26 +29,9 @@ select dictionary_name from information_schema.tokudb_file_map;
# check that the test dir is empty
list_files $MYSQLD_DATADIR/test *.frm;
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
# restore saved datadir
system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test;
system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR;
system rmdir $MYSQLD_DATADIR/save;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
-
+--source include/start_mysqld.inc
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
index 3903c2cef9f..0340b960fa5 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
@@ -6,17 +6,7 @@ set default_storage_engine='tokudb';
# capture the datadir
let $MYSQLD_DATADIR= `SELECT @@datadir`;
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
# remove all tokudb file in the datadir
system mkdir $MYSQLD_DATADIR/save;
system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save;
@@ -24,13 +14,7 @@ system mkdir $MYSQLD_DATADIR/test;
# install 6.6.8 tokudb test files
system cp -r std_data/tokudb_drop_simple_table_668/data/* $MYSQLD_DATADIR;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
create table tc (id int, x int, primary key(id), key(x));
@@ -46,26 +30,9 @@ select dictionary_name from information_schema.tokudb_file_map;
# check that the test dir is empty
list_files $MYSQLD_DATADIR/test *.frm;
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
# restore saved datadir
system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test;
system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR;
system rmdir $MYSQLD_DATADIR/save;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
-
+--source include/start_mysqld.inc
diff --git a/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc b/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc
new file mode 100644
index 00000000000..b10ad21dd95
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc
@@ -0,0 +1 @@
+--replace_regex /[a-z0-9]+_[a-z0-9]+_[a-z0-9]+(_[BP]_[a-z0-9]+){0,1}\./id./ /sqlx_[a-z0-9]+_[a-z0-9]+_/sqlx_nnnn_nnnn_/ /sqlx-[a-z0-9]+_[a-z0-9]+/sqlx-nnnn_nnnn/ /#p#/#P#/ /#sp#/#SP#/ /#tmp#/#TMP#/
diff --git a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test
index be14d8814f0..f97235a0a2d 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test
+++ b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test
@@ -56,7 +56,7 @@ partition by range (a)
insert into t1 values (1), (11), (21), (33);
SELECT * FROM t1;
SHOW CREATE TABLE t1;
---replace_result #p# #P# #sp# #SP#
+--source include/table_files_replace_pattern.inc
--list_files $MYSQLD_DATADIR/test
SET DEBUG_SYNC='before_open_in_get_all_tables SIGNAL parked WAIT_FOR open';
@@ -82,7 +82,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p0 INTO
disconnect con1;
connection default;
--reap
---replace_result #p# #P# #sp# #SP#
+--source include/table_files_replace_pattern.inc
--list_files $MYSQLD_DATADIR/test
SHOW CREATE TABLE t1;
SELECT * FROM t1;
diff --git a/storage/tokudb/tokudb_background.cc b/storage/tokudb/tokudb_background.cc
index d8ef54a5972..e019e41c788 100644
--- a/storage/tokudb/tokudb_background.cc
+++ b/storage/tokudb/tokudb_background.cc
@@ -8,7 +8,7 @@ This file is part of TokuDB
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
- TokuDBis is free software: you can redistribute it and/or modify
+ TokuDB is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
@@ -68,7 +68,8 @@ void job_manager_t::destroy() {
while (_background_jobs.size()) {
_mutex.lock();
job_t* job = _background_jobs.front();
- cancel(job);
+ if (!job->cancelled())
+ cancel(job);
_background_jobs.pop_front();
delete job;
_mutex.unlock();
@@ -148,11 +149,8 @@ bool job_manager_t::cancel_job(const char* key) {
it != _background_jobs.end(); it++) {
job_t* job = *it;
- if (!job->cancelled() &&
- strcmp(job->key(), key) == 0) {
-
+ if (!job->cancelled() && strcmp(job->key(), key) == 0) {
cancel(job);
-
ret = true;
}
}
@@ -162,8 +160,6 @@ bool job_manager_t::cancel_job(const char* key) {
}
void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const {
- char database[256], table[256], type[256], params[256], status[256];
-
_mutex.lock();
for (jobs_t::const_iterator it = _background_jobs.begin();
@@ -171,19 +167,7 @@ void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const {
it++) {
job_t* job = *it;
if (!job->cancelled()) {
- database[0] = table[0] = type[0] = params[0] = status[0] = '\0';
- job->status(database, table, type, params, status);
- callback(
- job->id(),
- database,
- table,
- type,
- params,
- status,
- job->user_scheduled(),
- job->scheduled_time(),
- job->started_time(),
- extra);
+ callback(job, extra);
}
}
@@ -233,6 +217,7 @@ void job_manager_t::run(job_t* job) {
}
void job_manager_t::cancel(job_t* job) {
assert_debug(_mutex.is_owned_by_me());
+ assert_always(!job->cancelled());
job->cancel();
}
job_manager_t* _job_manager = NULL;
diff --git a/storage/tokudb/tokudb_background.h b/storage/tokudb/tokudb_background.h
index 3786701fd0f..29991ab325d 100644
--- a/storage/tokudb/tokudb_background.h
+++ b/storage/tokudb/tokudb_background.h
@@ -7,7 +7,7 @@ This file is part of TokuDB
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
- TokuDBis is free software: you can redistribute it and/or modify
+ TokuDB is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
@@ -58,13 +58,20 @@ public:
// (or jobs) usually used to find jobs to cancel
virtual const char* key() = 0;
- // method to get info for information schema, 255 chars per buffer
- virtual void status(
- char* database,
- char* table,
- char* type,
- char* params,
- char* status) = 0;
+ // method to obtain the database name the job is scheduled on
+ virtual const char* database() = 0;
+
+ // method to obtain the table name the job is scheduled on
+ virtual const char* table() = 0;
+
+ // method to obtain the type of job
+ virtual const char* type() = 0;
+
+ // method to obtain a stringized list of job parameters
+ virtual const char* parameters() = 0;
+
+ // method to obtain a sting identifying the current status of the job
+ virtual const char* status() = 0;
inline bool running() const;
@@ -99,17 +106,7 @@ public:
};
// pfn for iterate callback
- typedef void (*pfn_iterate_t)(
- uint64_t,
- const char*,
- const char*,
- const char*,
- const char*,
- const char*,
- bool,
- time_t,
- time_t,
- void*);
+ typedef void (*pfn_iterate_t)(class job_t*, void*);
public:
void* operator new(size_t sz);
@@ -144,6 +141,11 @@ public:
// data passed when the job was scheduled
void iterate_jobs(pfn_iterate_t callback, void* extra) const;
+ // lock the bjm, this prevents anyone from running, cancelling or iterating
+ // jobs in the bjm.
+ inline void lock();
+ inline void unlock();
+
private:
static void* thread_func(void* v);
@@ -170,6 +172,15 @@ extern job_manager_t* _job_manager;
bool initialize();
bool destroy();
+inline void job_manager_t::lock() {
+ assert_debug(!_mutex.is_owned_by_me());
+ _mutex.lock();
+}
+inline void job_manager_t::unlock() {
+ assert_debug(_mutex.is_owned_by_me());
+ _mutex.unlock();
+}
+
inline void job_manager_t::job_t::run() {
if (!_cancelled) {
_running = true;
diff --git a/storage/tokudb/tokudb_information_schema.cc b/storage/tokudb/tokudb_information_schema.cc
index e69a7899b45..b3d77eef2d9 100644
--- a/storage/tokudb/tokudb_information_schema.cc
+++ b/storage/tokudb/tokudb_information_schema.cc
@@ -1085,7 +1085,7 @@ ST_FIELD_INFO background_job_status_field_info[] = {
{"scheduler", 32, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
{"scheduled_time", 0, MYSQL_TYPE_DATETIME, 0, 0, NULL, SKIP_OPEN_TABLE },
{"started_time", 0, MYSQL_TYPE_DATETIME, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE },
- {"status", 256, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE },
+ {"status", 1024, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE },
{NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
};
@@ -1095,15 +1095,7 @@ struct background_job_status_extra {
};
void background_job_status_callback(
- uint64_t id,
- const char* database_name,
- const char* table_name,
- const char* type,
- const char* params,
- const char* status,
- bool user_scheduled,
- time_t scheduled_time,
- time_t started_time,
+ tokudb::background::job_manager_t::job_t* job,
void* extra) {
background_job_status_extra* e =
@@ -1111,24 +1103,33 @@ void background_job_status_callback(
THD* thd = e->thd;
TABLE* table = e->table;
+ const char* tmp = NULL;
- table->field[0]->store(id, false);
- table->field[1]->store(
- database_name,
- strlen(database_name),
- system_charset_info);
- table->field[2]->store(table_name, strlen(table_name), system_charset_info);
- table->field[3]->store(type, strlen(type), system_charset_info);
- table->field[4]->store(params, strlen(params), system_charset_info);
- if (user_scheduled)
+ table->field[0]->store(job->id(), false);
+
+ tmp = job->database();
+ table->field[1]->store(tmp, strlen(tmp), system_charset_info);
+
+ tmp = job->table();
+ table->field[2]->store(tmp, strlen(tmp), system_charset_info);
+
+ tmp = job->type();
+ table->field[3]->store(tmp, strlen(tmp), system_charset_info);
+
+ tmp = job->parameters();
+ table->field[4]->store(tmp, strlen(tmp), system_charset_info);
+
+ if (job->user_scheduled())
table->field[5]->store("USER", strlen("USER"), system_charset_info);
else
table->field[5]->store("AUTO", strlen("AUTO"), system_charset_info);
- field_store_time_t(table->field[6], scheduled_time);
- field_store_time_t(table->field[7], started_time);
- if (status[0] != '\0') {
- table->field[8]->store(status, strlen(status), system_charset_info);
+ field_store_time_t(table->field[6], job->scheduled_time());
+ field_store_time_t(table->field[7], job->started_time());
+
+ tmp = job->status();
+ if (tmp && tmp[0] != '\0') {
+ table->field[8]->store(tmp, strlen(tmp), system_charset_info);
table->field[8]->set_notnull();
} else {
table->field[8]->store(NULL, 0, system_charset_info);
diff --git a/storage/tokudb/tokudb_sysvars.cc b/storage/tokudb/tokudb_sysvars.cc
index 7cea749b4fb..b758929c10e 100644
--- a/storage/tokudb/tokudb_sysvars.cc
+++ b/storage/tokudb/tokudb_sysvars.cc
@@ -66,6 +66,7 @@ uint read_status_frequency = 0;
my_bool strip_frm_data = FALSE;
char* tmp_dir = NULL;
uint write_status_frequency = 0;
+my_bool dir_per_db = FALSE;
char* version = (char*) TOKUDB_VERSION_STR;
// file system reserve as a percentage of total disk space
@@ -394,6 +395,18 @@ static MYSQL_SYSVAR_UINT(
~0U,
0);
+static void tokudb_dir_per_db_update(THD* thd,
+ struct st_mysql_sys_var* sys_var,
+ void* var, const void* save) {
+ my_bool *value = (my_bool *) var;
+ *value = *(const my_bool *) save;
+ db_env->set_dir_per_db(db_env, *value);
+}
+
+static MYSQL_SYSVAR_BOOL(dir_per_db, dir_per_db,
+ 0, "TokuDB store ft files in db directories",
+ NULL, tokudb_dir_per_db_update, FALSE);
+
#if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL
static MYSQL_SYSVAR_STR(
gdb_path,
@@ -935,6 +948,7 @@ st_mysql_sys_var* system_variables[] = {
MYSQL_SYSVAR(tmp_dir),
MYSQL_SYSVAR(version),
MYSQL_SYSVAR(write_status_frequency),
+ MYSQL_SYSVAR(dir_per_db),
#if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL
MYSQL_SYSVAR(gdb_path),
diff --git a/storage/tokudb/tokudb_sysvars.h b/storage/tokudb/tokudb_sysvars.h
index 3bd96f7c68d..7701f211729 100644
--- a/storage/tokudb/tokudb_sysvars.h
+++ b/storage/tokudb/tokudb_sysvars.h
@@ -101,6 +101,7 @@ extern uint read_status_frequency;
extern my_bool strip_frm_data;
extern char* tmp_dir;
extern uint write_status_frequency;
+extern my_bool dir_per_db;
extern char* version;
#if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL
diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc
index da4a6d3cdb1..ecea98fccfe 100644
--- a/storage/xtradb/btr/btr0btr.cc
+++ b/storage/xtradb/btr/btr0btr.cc
@@ -78,7 +78,7 @@ btr_corruption_report(
buf_block_get_zip_size(block),
BUF_PAGE_PRINT_NO_CRASH);
}
- buf_page_print(buf_block_get_frame_fast(block), 0, 0);
+ buf_page_print(buf_nonnull_block_get_frame(block), 0, 0);
}
#ifndef UNIV_HOTBACKUP
@@ -804,8 +804,10 @@ btr_height_get(
/* S latches the page */
root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+ ut_ad(root_block); // The index must not be corrupted
- height = btr_page_get_level(buf_block_get_frame_fast(root_block), mtr);
+ height = btr_page_get_level(buf_nonnull_block_get_frame(root_block),
+ mtr);
/* Release the S latch on the root page. */
mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
@@ -1231,7 +1233,7 @@ btr_get_size(
SRV_CORRUPT_TABLE_CHECK(root,
{
mtr_commit(mtr);
- return(0);
+ return(ULINT_UNDEFINED);
});
if (flag == BTR_N_LEAF_PAGES) {
@@ -2756,7 +2758,7 @@ btr_attach_half_pages(
}
/* Get the level of the split pages */
- level = btr_page_get_level(buf_block_get_frame_fast(block), mtr);
+ level = btr_page_get_level(buf_nonnull_block_get_frame(block), mtr);
ut_ad(level
== btr_page_get_level(buf_block_get_frame(new_block), mtr));
@@ -4133,8 +4135,10 @@ btr_discard_page(
/* Decide the page which will inherit the locks */
- left_page_no = btr_page_get_prev(buf_block_get_frame_fast(block), mtr);
- right_page_no = btr_page_get_next(buf_block_get_frame_fast(block), mtr);
+ left_page_no = btr_page_get_prev(buf_nonnull_block_get_frame(block),
+ mtr);
+ right_page_no = btr_page_get_next(buf_nonnull_block_get_frame(block),
+ mtr);
if (left_page_no != FIL_NULL) {
merge_block = btr_block_get(space, zip_size, left_page_no,
diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc
index 05af024a882..214d050d562 100644
--- a/storage/xtradb/btr/btr0cur.cc
+++ b/storage/xtradb/btr/btr0cur.cc
@@ -1751,7 +1751,7 @@ btr_cur_pessimistic_insert(
}
if (!page_rec_is_infimum(btr_cur_get_rec(cursor))
|| btr_page_get_prev(
- buf_block_get_frame(
+ buf_nonnull_block_get_frame(
btr_cur_get_block(cursor)), mtr)
== FIL_NULL) {
/* split and inserted need to call
@@ -2220,7 +2220,7 @@ func_exit:
if (page_zip
&& !(flags & BTR_KEEP_IBUF_BITMAP)
&& !dict_index_is_clust(index)
- && page_is_leaf(buf_block_get_frame(block))) {
+ && page_is_leaf(buf_nonnull_block_get_frame(block))) {
/* Update the free bits in the insert buffer. */
ibuf_update_free_bits_zip(block, mtr);
}
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc
index 0d5a478ca67..21b10196a25 100644
--- a/storage/xtradb/buf/buf0buf.cc
+++ b/storage/xtradb/buf/buf0buf.cc
@@ -4539,7 +4539,9 @@ corrupt:
recv_recover_page(TRUE, (buf_block_t*) bpage);
}
- if (uncompressed && !recv_no_ibuf_operations) {
+ if (uncompressed && !recv_no_ibuf_operations
+ && fil_page_get_type(frame) == FIL_PAGE_INDEX
+ && page_is_leaf(frame)) {
buf_block_t* block;
ibool update_ibuf_bitmap;
diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc
index f4d1c637e3e..3c12d6da73f 100644
--- a/storage/xtradb/buf/buf0dblwr.cc
+++ b/storage/xtradb/buf/buf0dblwr.cc
@@ -521,7 +521,7 @@ buf_dblwr_process()
if (buf_page_is_corrupted(true, read_buf, zip_size)) {
fprintf(stderr,
- "InnoDB: Warning: database page"
+ "InnoDB: Database page"
" corruption or a failed\n"
"InnoDB: file read of"
" space %lu page %lu.\n"
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 3554405e0b9..601e79d8923 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -309,6 +309,8 @@ buf_flush_init_flush_rbt(void)
buf_flush_list_mutex_enter(buf_pool);
+ ut_ad(buf_pool->flush_rbt == NULL);
+
/* Create red black tree for speedy insertions in flush list. */
buf_pool->flush_rbt = rbt_create(
sizeof(buf_page_t*), buf_flush_block_cmp);
@@ -2565,6 +2567,11 @@ page_cleaner_sleep_if_needed(
ulint next_loop_time) /*!< in: time when next loop iteration
should start */
{
+ /* No sleep if we are cleaning the buffer pool during the shutdown
+ with everything else finished */
+ if (srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE)
+ return;
+
ulint cur_time = ut_time_ms();
if (next_loop_time > cur_time) {
diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc
index 94a3af2852b..c0bb0298bea 100644
--- a/storage/xtradb/dict/dict0boot.cc
+++ b/storage/xtradb/dict/dict0boot.cc
@@ -272,6 +272,10 @@ dict_boot(void)
ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2);
ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4);
ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6);
+ ut_ad(DICT_NUM_COLS__SYS_ZIP_DICT == 3);
+ ut_ad(DICT_NUM_FIELDS__SYS_ZIP_DICT == 5);
+ ut_ad(DICT_NUM_COLS__SYS_ZIP_DICT_COLS == 3);
+ ut_ad(DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS == 5);
mtr_start(&mtr);
diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc
index b34fb7e2626..db0ca638de4 100644
--- a/storage/xtradb/dict/dict0crea.cc
+++ b/storage/xtradb/dict/dict0crea.cc
@@ -38,6 +38,7 @@ Created 1/8/1996 Heikki Tuuri
#include "que0que.h"
#include "row0ins.h"
#include "row0mysql.h"
+#include "row0sel.h"
#include "pars0pars.h"
#include "trx0roll.h"
#include "usr0sess.h"
@@ -1931,6 +1932,135 @@ dict_create_or_check_sys_tablespace(void)
return(err);
}
+/** Creates the zip_dict system table inside InnoDB
+at server bootstrap or server start if it is not found or is
+not of the right form.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_zip_dict(void)
+{
+ trx_t* trx;
+ my_bool srv_file_per_table_backup;
+ dberr_t err;
+ dberr_t sys_zip_dict_err;
+ dberr_t sys_zip_dict_cols_err;
+
+ ut_a(srv_get_active_thread_type() == SRV_NONE);
+
+ /* Note: The master thread has not been started at this point. */
+
+ sys_zip_dict_err = dict_check_if_system_table_exists(
+ "SYS_ZIP_DICT", DICT_NUM_FIELDS__SYS_ZIP_DICT + 1, 2);
+ sys_zip_dict_cols_err = dict_check_if_system_table_exists(
+ "SYS_ZIP_DICT_COLS", DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS + 1,
+ 1);
+
+ if (sys_zip_dict_err == DB_SUCCESS &&
+ sys_zip_dict_cols_err == DB_SUCCESS)
+ return (DB_SUCCESS);
+
+ trx = trx_allocate_for_mysql();
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ trx->op_info = "creating zip_dict and zip_dict_cols sys tables";
+
+ row_mysql_lock_data_dictionary(trx);
+
+ /* Check which incomplete table definition to drop. */
+
+ if (sys_zip_dict_err == DB_CORRUPTION) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Dropping incompletely created "
+ "SYS_ZIP_DICT table.");
+ row_drop_table_for_mysql("SYS_ZIP_DICT", trx, TRUE, TRUE);
+ }
+ if (sys_zip_dict_cols_err == DB_CORRUPTION) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Dropping incompletely created "
+ "SYS_ZIP_DICT_COLS table.");
+ row_drop_table_for_mysql("SYS_ZIP_DICT_COLS", trx, TRUE, TRUE);
+ }
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Creating zip_dict and zip_dict_cols system tables.");
+
+ /* We always want SYSTEM tables to be created inside the system
+ tablespace. */
+ srv_file_per_table_backup = srv_file_per_table;
+ srv_file_per_table = 0;
+
+ err = que_eval_sql(
+ NULL,
+ "PROCEDURE CREATE_SYS_ZIP_DICT_PROC () IS\n"
+ "BEGIN\n"
+ "CREATE TABLE SYS_ZIP_DICT(\n"
+ " ID INT UNSIGNED NOT NULL,\n"
+ " NAME CHAR("
+ STRINGIFY_ARG(ZIP_DICT_MAX_NAME_LENGTH)
+ ") NOT NULL,\n"
+ " DATA BLOB NOT NULL\n"
+ ");\n"
+ "CREATE UNIQUE CLUSTERED INDEX SYS_ZIP_DICT_ID"
+ " ON SYS_ZIP_DICT (ID);\n"
+ "CREATE UNIQUE INDEX SYS_ZIP_DICT_NAME"
+ " ON SYS_ZIP_DICT (NAME);\n"
+ "CREATE TABLE SYS_ZIP_DICT_COLS(\n"
+ " TABLE_ID INT UNSIGNED NOT NULL,\n"
+ " COLUMN_POS INT UNSIGNED NOT NULL,\n"
+ " DICT_ID INT UNSIGNED NOT NULL\n"
+ ");\n"
+ "CREATE UNIQUE CLUSTERED INDEX SYS_ZIP_DICT_COLS_COMPOSITE"
+ " ON SYS_ZIP_DICT_COLS (TABLE_ID, COLUMN_POS);\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (err != DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Creation of SYS_ZIP_DICT and SYS_ZIP_DICT_COLS"
+ "has failed with error %lu. Tablespace is full. "
+ "Dropping incompletely created tables.",
+ (ulong) err);
+
+ ut_a(err == DB_OUT_OF_FILE_SPACE
+ || err == DB_TOO_MANY_CONCURRENT_TRXS);
+
+ row_drop_table_for_mysql("SYS_ZIP_DICT", trx, TRUE, TRUE);
+ row_drop_table_for_mysql("SYS_ZIP_DICT_COLS", trx, TRUE, TRUE);
+
+ if (err == DB_OUT_OF_FILE_SPACE) {
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+ }
+ }
+
+ trx_commit_for_mysql(trx);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx_free_for_mysql(trx);
+
+ srv_file_per_table = srv_file_per_table_backup;
+
+ if (err == DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "zip_dict and zip_dict_cols system tables created.");
+ }
+
+ /* Note: The master thread has not been started at this point. */
+ /* Confirm and move to the non-LRU part of the table LRU list. */
+
+ sys_zip_dict_err = dict_check_if_system_table_exists(
+ "SYS_ZIP_DICT", DICT_NUM_FIELDS__SYS_ZIP_DICT + 1, 2);
+ ut_a(sys_zip_dict_err == DB_SUCCESS);
+ sys_zip_dict_cols_err = dict_check_if_system_table_exists(
+ "SYS_ZIP_DICT_COLS",
+ DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS + 1, 1);
+ ut_a(sys_zip_dict_cols_err == DB_SUCCESS);
+
+ return(err);
+}
+
/********************************************************************//**
Add a single tablespace definition to the data dictionary tables in the
database.
@@ -1984,3 +2114,456 @@ dict_create_add_tablespace_to_dictionary(
return(error);
}
+
+/** Add a single compression dictionary definition to the SYS_ZIP_DICT
+InnoDB system table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_zip_dict(
+ const char* name, /*!< in: dict name */
+ ulint name_len, /*!< in: dict name length */
+ const char* data, /*!< in: dict data */
+ ulint data_len, /*!< in: dict data length */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(name);
+ ut_ad(data);
+
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_literal(info, "name", name, name_len,
+ DATA_VARCHAR, DATA_ENGLISH);
+ pars_info_add_literal(info, "data", data, data_len,
+ DATA_BLOB, DATA_BINARY_TYPE | DATA_NOT_NULL);
+
+ dberr_t error = que_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ " max_id INT;\n"
+ "DECLARE CURSOR cur IS\n"
+ " SELECT ID FROM SYS_ZIP_DICT\n"
+ " ORDER BY ID DESC;\n"
+ "BEGIN\n"
+ " max_id := 0;\n"
+ " OPEN cur;\n"
+ " FETCH cur INTO max_id;\n"
+ " IF (cur % NOTFOUND) THEN\n"
+ " max_id := 0;\n"
+ " END IF;\n"
+ " CLOSE cur;\n"
+ " INSERT INTO SYS_ZIP_DICT VALUES"
+ " (max_id + 1, :name, :data);\n"
+ "END;\n",
+ FALSE, trx);
+
+ return error;
+}
+
+/** Fetch callback, just stores extracted zip_dict id in the external
+variable.
+@return TRUE if all OK */
+static
+ibool
+dict_create_extract_int_aux(
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: int32 id */
+{
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ dfield_t* dfield = que_node_get_val(node->select_list);
+ dtype_t* type = dfield_get_type(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == sizeof(ib_uint32_t));
+
+ memcpy(user_arg, dfield_get_data(dfield), sizeof(ib_uint32_t));
+
+ return(TRUE);
+}
+
+/** Add a single compression dictionary reference to the SYS_ZIP_DICT_COLS
+InnoDB system table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_zip_dict_reference(
+ ulint table_id, /*!< in: table id */
+ ulint column_pos, /*!< in: column position */
+ ulint dict_id, /*!< in: dict id */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "table_id", table_id);
+ pars_info_add_int4_literal(info, "column_pos", column_pos);
+ pars_info_add_int4_literal(info, "dict_id", dict_id);
+
+ dberr_t error = que_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ " INSERT INTO SYS_ZIP_DICT_COLS VALUES"
+ " (:table_id, :column_pos, :dict_id);\n"
+ "END;\n",
+ FALSE, trx);
+ return error;
+}
+
+/** Get a single compression dictionary id for the given
+(table id, column pos) pair.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_id_by_reference(
+ ulint table_id, /*!< in: table id */
+ ulint column_pos, /*!< in: column position */
+ ulint* dict_id, /*!< out: dict id */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(dict_id);
+
+ pars_info_t* info = pars_info_create();
+
+ ib_uint32_t dict_id_buf;
+ mach_write_to_4(reinterpret_cast<byte*>(&dict_id_buf ),
+ ULINT32_UNDEFINED);
+
+ pars_info_add_int4_literal(info, "table_id", table_id);
+ pars_info_add_int4_literal(info, "column_pos", column_pos);
+ pars_info_bind_function(
+ info, "my_func", dict_create_extract_int_aux, &dict_id_buf);
+
+ dberr_t error = que_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR cur IS\n"
+ " SELECT DICT_ID FROM SYS_ZIP_DICT_COLS\n"
+ " WHERE TABLE_ID = :table_id AND\n"
+ " COLUMN_POS = :column_pos;\n"
+ "BEGIN\n"
+ " OPEN cur;\n"
+ " FETCH cur INTO my_func();\n"
+ " CLOSE cur;\n"
+ "END;\n",
+ FALSE, trx);
+ if (error == DB_SUCCESS) {
+ ib_uint32_t local_dict_id = mach_read_from_4(
+ reinterpret_cast<const byte*>(&dict_id_buf));
+ if (local_dict_id == ULINT32_UNDEFINED)
+ error = DB_RECORD_NOT_FOUND;
+ else
+ *dict_id = local_dict_id;
+ }
+ return error;
+}
+
+/** Get compression dictionary id for the given name.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_id_by_name(
+ const char* dict_name, /*!< in: dict name */
+ ulint dict_name_len, /*!< in: dict name length */
+ ulint* dict_id, /*!< out: dict id */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(dict_name);
+ ut_ad(dict_name_len);
+ ut_ad(dict_id);
+
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_literal(info, "dict_name", dict_name, dict_name_len,
+ DATA_VARCHAR, DATA_ENGLISH);
+
+ ib_uint32_t dict_id_buf;
+ mach_write_to_4(reinterpret_cast<byte*>(&dict_id_buf),
+ ULINT32_UNDEFINED);
+ pars_info_bind_function(
+ info, "my_func", dict_create_extract_int_aux, &dict_id_buf);
+
+ dberr_t error = que_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR cur IS\n"
+ " SELECT ID FROM SYS_ZIP_DICT\n"
+ " WHERE NAME = :dict_name;\n"
+ "BEGIN\n"
+ " OPEN cur;\n"
+ " FETCH cur INTO my_func();\n"
+ " CLOSE cur;\n"
+ "END;\n",
+ FALSE, trx);
+ if (error == DB_SUCCESS) {
+ ib_uint32_t local_dict_id = mach_read_from_4(
+ reinterpret_cast<const byte*>(&dict_id_buf));
+ if (local_dict_id == ULINT32_UNDEFINED)
+ error = DB_RECORD_NOT_FOUND;
+ else
+ *dict_id = local_dict_id;
+ }
+ return error;
+}
+
+/** Auxiliary enum used to indicate zip dict data extraction result code */
+enum zip_dict_info_aux_code {
+ zip_dict_info_success, /*!< success */
+ zip_dict_info_not_found, /*!< zip dict record not found */
+ zip_dict_info_oom, /*!< out of memory */
+ zip_dict_info_corrupted_name, /*!< corrupted zip dict name */
+ zip_dict_info_corrupted_data /*!< corrupted zip dict data */
+};
+
+/** Auxiliary struct used to return zip dict info aling with result code */
+struct zip_dict_info_aux {
+ LEX_STRING name; /*!< zip dict name */
+ LEX_STRING data; /*!< zip dict data */
+ int code; /*!< result code (0 - success) */
+};
+
+/** Fetch callback, just stores extracted zip_dict data in the external
+variable.
+@return always returns TRUE */
+static
+ibool
+dict_create_get_zip_dict_info_by_id_aux(
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: pointer to zip_dict_info_aux* */
+{
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ zip_dict_info_aux* result =
+ static_cast<zip_dict_info_aux*>(user_arg);
+
+ result->code = zip_dict_info_success;
+ result->name.str = 0;
+ result->name.length = 0;
+ result->data.str = 0;
+ result->data.length = 0;
+
+ /* NAME field */
+ que_node_t* exp = node->select_list;
+ ut_a(exp != 0);
+
+ dfield_t* dfield = que_node_get_val(exp);
+ dtype_t* type = dfield_get_type(dfield);
+ ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
+
+ ulint len = dfield_get_len(dfield);
+ void* data = dfield_get_data(dfield);
+
+
+ if (len == UNIV_SQL_NULL) {
+ result->code = zip_dict_info_corrupted_name;
+ }
+ else {
+ result->name.str =
+ static_cast<char*>(my_malloc(len + 1, MYF(0)));
+ if (result->name.str == 0) {
+ result->code = zip_dict_info_oom;
+ }
+ else {
+ memcpy(result->name.str, data, len);
+ result->name.str[len] = '\0';
+ result->name.length = len;
+ }
+ }
+
+ /* DATA field */
+ exp = que_node_get_next(exp);
+ ut_a(exp != 0);
+
+ dfield = que_node_get_val(exp);
+ type = dfield_get_type(dfield);
+ ut_a(dtype_get_mtype(type) == DATA_BLOB);
+
+ len = dfield_get_len(dfield);
+ data = dfield_get_data(dfield);
+
+ if (len == UNIV_SQL_NULL) {
+ result->code = zip_dict_info_corrupted_data;
+ }
+ else {
+ result->data.str =
+ static_cast<char*>(my_malloc(
+ len == 0 ? 1 : len, MYF(0)));
+ if (result->data.str == 0) {
+ result->code = zip_dict_info_oom;
+ }
+ else {
+ memcpy(result->data.str, data, len);
+ result->data.length = len;
+ }
+ }
+
+ ut_ad(que_node_get_next(exp) == 0);
+
+ if (result->code != zip_dict_info_success) {
+ if (result->name.str == 0) {
+ mem_free(result->name.str);
+ result->name.str = 0;
+ result->name.length = 0;
+ }
+ if (result->data.str == 0) {
+ mem_free(result->data.str);
+ result->data.str = 0;
+ result->data.length = 0;
+ }
+ }
+
+ return TRUE;
+}
+
+/** Get compression dictionary info (name and data) for the given id.
+Allocates memory for name and data on success.
+Must be freed with mem_free().
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_info_by_id(
+ ulint dict_id, /*!< in: dict id */
+ char** name, /*!< out: dict name */
+ ulint* name_len, /*!< out: dict name length*/
+ char** data, /*!< out: dict data */
+ ulint* data_len, /*!< out: dict data length*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(name);
+ ut_ad(data);
+
+ zip_dict_info_aux rec;
+ rec.code = zip_dict_info_not_found;
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "id", dict_id);
+ pars_info_bind_function(
+ info, "my_func", dict_create_get_zip_dict_info_by_id_aux,
+ &rec);
+
+ dberr_t error = que_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR cur IS\n"
+ " SELECT NAME, DATA FROM SYS_ZIP_DICT\n"
+ " WHERE ID = :id;\n"
+ "BEGIN\n"
+ " OPEN cur;\n"
+ " FETCH cur INTO my_func();\n"
+ " CLOSE cur;\n"
+ "END;\n",
+ FALSE, trx);
+ if (error == DB_SUCCESS) {
+ switch (rec.code) {
+ case zip_dict_info_success:
+ *name = rec.name.str;
+ *name_len = rec.name.length;
+ *data = rec.data.str;
+ *data_len = rec.data.length;
+ break;
+ case zip_dict_info_not_found:
+ error = DB_RECORD_NOT_FOUND;
+ break;
+ case zip_dict_info_oom:
+ error = DB_OUT_OF_MEMORY;
+ break;
+ case zip_dict_info_corrupted_name:
+ case zip_dict_info_corrupted_data:
+ error = DB_INVALID_NULL;
+ break;
+ default:
+ ut_error;
+ }
+ }
+ return error;
+}
+
+/** Remove a single compression dictionary from the data dictionary
+tables in the database.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_remove_zip_dict(
+ const char* name, /*!< in: dict name */
+ ulint name_len, /*!< in: dict name length */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(name);
+
+ pars_info_t* info = pars_info_create();
+
+ ib_uint32_t dict_id_buf;
+ mach_write_to_4(reinterpret_cast<byte*>(&dict_id_buf),
+ ULINT32_UNDEFINED);
+ ib_uint32_t counter_buf;
+ mach_write_to_4(reinterpret_cast<byte*>(&counter_buf),
+ ULINT32_UNDEFINED);
+
+ pars_info_add_literal(info, "name", name, name_len,
+ DATA_VARCHAR, DATA_ENGLISH);
+ pars_info_bind_int4_literal(info, "dict_id", &dict_id_buf);
+ pars_info_bind_function(info, "find_dict_func",
+ dict_create_extract_int_aux, &dict_id_buf);
+ pars_info_bind_function(info, "count_func",
+ dict_create_extract_int_aux, &counter_buf);
+
+ dberr_t error = que_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ "DECLARE FUNCTION find_dict_func;\n"
+ "DECLARE FUNCTION count_func;\n"
+ "DECLARE CURSOR dict_cur IS\n"
+ " SELECT ID FROM SYS_ZIP_DICT\n"
+ " WHERE NAME = :name\n"
+ " FOR UPDATE;\n"
+ "DECLARE CURSOR ref_cur IS\n"
+ " SELECT 1 FROM SYS_ZIP_DICT_COLS\n"
+ " WHERE DICT_ID = :dict_id;\n"
+ "BEGIN\n"
+ " OPEN dict_cur;\n"
+ " FETCH dict_cur INTO find_dict_func();\n"
+ " IF NOT (SQL % NOTFOUND) THEN\n"
+ " OPEN ref_cur;\n"
+ " FETCH ref_cur INTO count_func();\n"
+ " IF SQL % NOTFOUND THEN\n"
+ " DELETE FROM SYS_ZIP_DICT WHERE CURRENT OF dict_cur;\n"
+ " END IF;\n"
+ " CLOSE ref_cur;\n"
+ " END IF;\n"
+ " CLOSE dict_cur;\n"
+ "END;\n",
+ FALSE, trx);
+ if (error == DB_SUCCESS) {
+ ib_uint32_t local_dict_id = mach_read_from_4(
+ reinterpret_cast<const byte*>(&dict_id_buf));
+ if (local_dict_id == ULINT32_UNDEFINED) {
+ error = DB_RECORD_NOT_FOUND;
+ }
+ else {
+ ib_uint32_t local_counter = mach_read_from_4(
+ reinterpret_cast<const byte*>(&counter_buf));
+ if (local_counter != ULINT32_UNDEFINED)
+ error = DB_ROW_IS_REFERENCED;
+ }
+ }
+ return error;
+}
+
+/** Remove all compression dictionary references for the given table ID from
+the data dictionary tables in the database.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_remove_zip_dict_references_for_table(
+ ulint table_id, /*!< in: table id */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "table_id", table_id);
+
+ dberr_t error = que_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ " DELETE FROM SYS_ZIP_DICT_COLS\n"
+ " WHERE TABLE_ID = :table_id;\n"
+ "END;\n",
+ FALSE, trx);
+ return error;
+}
diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc
index 1b4fa916973..9e2aa8328d1 100644
--- a/storage/xtradb/dict/dict0dict.cc
+++ b/storage/xtradb/dict/dict0dict.cc
@@ -7310,3 +7310,161 @@ dict_tf_to_row_format_string(
return(0);
}
#endif /* !UNIV_HOTBACKUP */
+
+/** Insert a records into SYS_ZIP_DICT.
+@retval DB_SUCCESS if OK
+@retval dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_create_zip_dict(
+ const char* name, /*!< in: zip_dict name */
+ ulint name_len, /*!< in: zip_dict name length*/
+ const char* data, /*!< in: zip_dict data */
+ ulint data_len) /*!< in: zip_dict data length */
+{
+ dberr_t err = DB_SUCCESS;
+ trx_t* trx;
+
+ ut_ad(name);
+ ut_ad(data);
+
+ rw_lock_x_lock(&dict_operation_lock);
+ dict_mutex_enter_for_mysql();
+
+ trx = trx_allocate_for_background();
+ trx->op_info = "insert zip_dict";
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+ trx_start_if_not_started(trx);
+
+ err = dict_create_add_zip_dict(name, name_len, data, data_len, trx);
+
+ if (err == DB_SUCCESS) {
+ trx_commit_for_mysql(trx);
+ }
+ else {
+ trx->op_info = "rollback of internal trx on zip_dict table";
+ trx_rollback_to_savepoint(trx, NULL);
+ ut_a(trx->error_state == DB_SUCCESS);
+ }
+ trx->op_info = "";
+ trx->dict_operation_lock_mode = 0;
+ trx_free_for_background(trx);
+
+ dict_mutex_exit_for_mysql();
+ rw_lock_x_unlock(&dict_operation_lock);
+
+ return err;
+}
+/** Get single compression dictionary id for the given
+(table id, column pos) pair.
+@retval DB_SUCCESS if OK
+@retval DB_RECORD_NOT_FOUND if not found */
+UNIV_INTERN
+dberr_t
+dict_get_dictionary_id_by_key(
+ ulint table_id, /*!< in: table id */
+ ulint column_pos, /*!< in: column position */
+ ulint* dict_id) /*!< out: zip_dict id */
+{
+ dberr_t err = DB_SUCCESS;
+ trx_t* trx;
+
+ rw_lock_s_lock(&dict_operation_lock);
+ dict_mutex_enter_for_mysql();
+
+ trx = trx_allocate_for_background();
+ trx->op_info = "get zip dict id by composite key";
+ trx->dict_operation_lock_mode = RW_S_LATCH;
+ trx_start_if_not_started(trx);
+
+ err = dict_create_get_zip_dict_id_by_reference(table_id, column_pos,
+ dict_id, trx);
+
+ trx_commit_for_mysql(trx);
+ trx->dict_operation_lock_mode = 0;
+ trx_free_for_background(trx);
+
+ dict_mutex_exit_for_mysql();
+ rw_lock_s_unlock(&dict_operation_lock);
+
+ return err;
+}
+/** Get compression dictionary info (name and data) for the given id.
+Allocates memory in name->str and data->str on success.
+Must be freed with mem_free().
+@retval DB_SUCCESS if OK
+@retval DB_RECORD_NOT_FOUND if not found */
+UNIV_INTERN
+dberr_t
+dict_get_dictionary_info_by_id(
+ ulint dict_id, /*!< in: table name */
+ char** name, /*!< out: dictionary name */
+ ulint* name_len, /*!< out: dictionary name length*/
+ char** data, /*!< out: dictionary data */
+ ulint* data_len) /*!< out: dictionary data length*/
+{
+ dberr_t err = DB_SUCCESS;
+ trx_t* trx;
+
+ rw_lock_s_lock(&dict_operation_lock);
+ dict_mutex_enter_for_mysql();
+
+ trx = trx_allocate_for_background();
+ trx->op_info = "get zip dict name and data by id";
+ trx->dict_operation_lock_mode = RW_S_LATCH;
+ trx_start_if_not_started(trx);
+
+ err = dict_create_get_zip_dict_info_by_id(dict_id, name, name_len,
+ data, data_len, trx);
+
+ trx_commit_for_mysql(trx);
+ trx->dict_operation_lock_mode = 0;
+ trx_free_for_background(trx);
+
+ dict_mutex_exit_for_mysql();
+ rw_lock_s_unlock(&dict_operation_lock);
+
+ return err;
+}
+/** Delete a record in SYS_ZIP_DICT with the given name.
+@retval DB_SUCCESS if OK
+@retval DB_RECORD_NOT_FOUND if not found
+@retval DB_ROW_IS_REFERENCED if in use */
+UNIV_INTERN
+dberr_t
+dict_drop_zip_dict(
+ const char* name, /*!< in: zip_dict name */
+ ulint name_len) /*!< in: zip_dict name length*/
+{
+ dberr_t err = DB_SUCCESS;
+ trx_t* trx;
+
+ ut_ad(name);
+
+ rw_lock_x_lock(&dict_operation_lock);
+ dict_mutex_enter_for_mysql();
+
+ trx = trx_allocate_for_background();
+ trx->op_info = "delete zip_dict";
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+ trx_start_if_not_started(trx);
+
+ err = dict_create_remove_zip_dict(name, name_len, trx);
+
+ if (err == DB_SUCCESS) {
+ trx_commit_for_mysql(trx);
+ }
+ else {
+ trx->op_info = "rollback of internal trx on zip_dict table";
+ trx_rollback_to_savepoint(trx, NULL);
+ ut_a(trx->error_state == DB_SUCCESS);
+ }
+ trx->op_info = "";
+ trx->dict_operation_lock_mode = 0;
+ trx_free_for_background(trx);
+
+ dict_mutex_exit_for_mysql();
+ rw_lock_x_unlock(&dict_operation_lock);
+
+ return err;
+}
diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc
index 988351dbca5..ca7de72c9b9 100644
--- a/storage/xtradb/dict/dict0load.cc
+++ b/storage/xtradb/dict/dict0load.cc
@@ -56,7 +56,9 @@ static const char* SYSTEM_TABLE_NAME[] = {
"SYS_FOREIGN",
"SYS_FOREIGN_COLS",
"SYS_TABLESPACES",
- "SYS_DATAFILES"
+ "SYS_DATAFILES",
+ "SYS_ZIP_DICT",
+ "SYS_ZIP_DICT_COLS"
};
/* If this flag is TRUE, then we will load the cluster index's (and tables')
@@ -728,6 +730,161 @@ err_len:
return(NULL);
}
+/** This function parses a SYS_ZIP_DICT record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_zip_dict(
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ ulint zip_size, /*!< in: nonzero=compressed BLOB page size */
+ const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */
+ ulint* id, /*!< out: dict id */
+ const char** name, /*!< out: dict name */
+ const char** data, /*!< out: dict data */
+ ulint* data_len) /*!< out: dict data length */
+{
+ ulint len;
+ const byte* field;
+
+ /* Initialize the output values */
+ *id = ULINT_UNDEFINED;
+ *name = NULL;
+ *data = NULL;
+ *data_len = 0;
+
+ if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) {
+ return("delete-marked record in SYS_ZIP_DICT");
+ }
+
+ if (UNIV_UNLIKELY(
+ rec_get_n_fields_old(rec)!= DICT_NUM_FIELDS__SYS_ZIP_DICT)) {
+ return("wrong number of columns in SYS_ZIP_DICT record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_ZIP_DICT__ID, &len);
+ if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) {
+ goto err_len;
+ }
+ *id = mach_read_from_4(field);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_ZIP_DICT__DB_TRX_ID, &len);
+ if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_ZIP_DICT__DB_ROLL_PTR, &len);
+ if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_ZIP_DICT__NAME, &len);
+ if (UNIV_UNLIKELY(len == 0 || len == UNIV_SQL_NULL)) {
+ goto err_len;
+ }
+ *name = mem_heap_strdupl(heap, (char*) field, len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_ZIP_DICT__DATA, &len);
+ if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) {
+ goto err_len;
+ }
+
+ if (rec_get_1byte_offs_flag(rec) == 0 &&
+ rec_2_is_field_extern(rec, DICT_FLD__SYS_ZIP_DICT__DATA)) {
+ ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ if (UNIV_UNLIKELY
+ (!memcmp(field + len - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero,
+ BTR_EXTERN_FIELD_REF_SIZE))) {
+ goto err_len;
+ }
+ *data = reinterpret_cast<char*>(
+ btr_copy_externally_stored_field(data_len, field,
+ zip_size, len, heap, 0));
+ }
+ else {
+ *data_len = len;
+ *data = static_cast<char*>(mem_heap_dup(heap, field, len));
+ }
+
+ return(NULL);
+
+err_len:
+ return("incorrect column length in SYS_ZIP_DICT");
+}
+
+/** This function parses a SYS_ZIP_DICT_COLS record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_zip_dict_cols(
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */
+ ulint* table_id, /*!< out: table id */
+ ulint* column_pos, /*!< out: column position */
+ ulint* dict_id) /*!< out: dict id */
+{
+ ulint len;
+ const byte* field;
+
+ /* Initialize the output values */
+ *table_id = ULINT_UNDEFINED;
+ *column_pos = ULINT_UNDEFINED;
+ *dict_id = ULINT_UNDEFINED;
+
+ if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) {
+ return("delete-marked record in SYS_ZIP_DICT_COLS");
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_fields_old(rec) !=
+ DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS)) {
+ return("wrong number of columns in SYS_ZIP_DICT_COLS"
+ " record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_ZIP_DICT_COLS__TABLE_ID, &len);
+ if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) {
+err_len:
+ return("incorrect column length in SYS_ZIP_DICT_COLS");
+ }
+ *table_id = mach_read_from_4(field);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_ZIP_DICT_COLS__COLUMN_POS, &len);
+ if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) {
+ goto err_len;
+ }
+ *column_pos = mach_read_from_4(field);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_ZIP_DICT_COLS__DB_TRX_ID, &len);
+ if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_ZIP_DICT_COLS__DB_ROLL_PTR, &len);
+ if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_ZIP_DICT_COLS__DICT_ID, &len);
+ if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) {
+ goto err_len;
+ }
+ *dict_id = mach_read_from_4(field);
+
+ return(NULL);
+}
/********************************************************************//**
Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS.
@return ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */
diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc
index b073398f8ec..a4aa43651f8 100644
--- a/storage/xtradb/dict/dict0stats.cc
+++ b/storage/xtradb/dict/dict0stats.cc
@@ -673,7 +673,10 @@ void
dict_stats_copy(
/*============*/
dict_table_t* dst, /*!< in/out: destination table */
- const dict_table_t* src) /*!< in: source table */
+ const dict_table_t* src, /*!< in: source table */
+ bool reset_ignored_indexes) /*!< in: if true, set ignored indexes
+ to have the same statistics as if
+ the table was empty */
{
dst->stats_last_recalc = src->stats_last_recalc;
dst->stat_n_rows = src->stat_n_rows;
@@ -692,7 +695,16 @@ dict_stats_copy(
&& (src_idx = dict_table_get_next_index(src_idx)))) {
if (dict_stats_should_ignore_index(dst_idx)) {
- continue;
+ if (reset_ignored_indexes) {
+ /* Reset index statistics for all ignored indexes,
+ unless they are FT indexes (these have no statistics)*/
+ if (dst_idx->type & DICT_FTS) {
+ continue;
+ }
+ dict_stats_empty_index(dst_idx);
+ } else {
+ continue;
+ }
}
ut_ad(!dict_index_is_univ(dst_idx));
@@ -782,7 +794,7 @@ dict_stats_snapshot_create(
t = dict_stats_table_clone_create(table);
- dict_stats_copy(t, table);
+ dict_stats_copy(t, table, false);
t->stat_persistent = table->stat_persistent;
t->stats_auto_recalc = table->stats_auto_recalc;
@@ -3240,13 +3252,10 @@ dict_stats_update(
dict_table_stats_lock(table, RW_X_LATCH);
- /* Initialize all stats to dummy values before
- copying because dict_stats_table_clone_create() does
- skip corrupted indexes so our dummy object 't' may
- have less indexes than the real object 'table'. */
- dict_stats_empty_table(table);
-
- dict_stats_copy(table, t);
+ /* Pass reset_ignored_indexes=true as parameter
+ to dict_stats_copy. This will cause statictics
+ for corrupted indexes to be set to empty values */
+ dict_stats_copy(table, t, true);
dict_stats_assert_initialized(table);
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 21a3cd75a44..a7b0377d2a4 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -325,6 +325,8 @@ fil_space_get_by_id(
ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
space->id == id);
+ /* The system tablespace must always be found */
+ ut_ad(space || id != 0 || srv_is_being_started);
return(space);
}
@@ -1680,6 +1682,9 @@ fil_close_all_files(void)
{
fil_space_t* space;
+ // Must check both flags as it's possible for this to be called during
+ // server startup with srv_track_changed_pages == true but
+ // srv_redo_log_thread_started == false
if (srv_track_changed_pages && srv_redo_log_thread_started)
os_event_wait(srv_redo_log_tracked_event);
@@ -1719,6 +1724,9 @@ fil_close_log_files(
{
fil_space_t* space;
+ // Must check both flags as it's possible for this to be called during
+ // server startup with srv_track_changed_pages == true but
+ // srv_redo_log_thread_started == false
if (srv_track_changed_pages && srv_redo_log_thread_started)
os_event_wait(srv_redo_log_tracked_event);
diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc
index 80c48b616a9..4c54afae8cd 100644
--- a/storage/xtradb/fts/fts0fts.cc
+++ b/storage/xtradb/fts/fts0fts.cc
@@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key;
/** variable to record innodb_fts_internal_tbl_name for information
schema table INNODB_FTS_INSERTED etc. */
UNIV_INTERN char* fts_internal_tbl_name = NULL;
+UNIV_INTERN char* fts_internal_tbl_name2 = NULL;
/** InnoDB default stopword list:
There are different versions of stopwords, the stop words listed
@@ -265,13 +266,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] sync sync state
@param[in] unlock_cache whether unlock cache lock when write node
@param[in] wait whether wait when a sync is in progress
+@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS if all OK */
static
dberr_t
fts_sync(
fts_sync_t* sync,
bool unlock_cache,
- bool wait);
+ bool wait,
+ bool has_dict);
/****************************************************************//**
Release all resources help by the words rb tree e.g., the node ilist. */
@@ -3567,7 +3570,7 @@ fts_add_doc_by_id(
DBUG_EXECUTE_IF(
"fts_instrument_sync_debug",
- fts_sync(cache->sync, true, true);
+ fts_sync(cache->sync, true, true, false);
);
DEBUG_SYNC_C("fts_instrument_sync_request");
@@ -4379,13 +4382,11 @@ fts_sync_index(
}
/** Check if index cache has been synced completely
-@param[in,out] sync sync state
@param[in,out] index_cache index cache
@return true if index is synced, otherwise false. */
static
bool
fts_sync_index_check(
- fts_sync_t* sync,
fts_index_cache_t* index_cache)
{
const ib_rbt_node_t* rbt_node;
@@ -4408,14 +4409,36 @@ fts_sync_index_check(
return(true);
}
-/*********************************************************************//**
-Commit the SYNC, change state of processed doc ids etc.
+/** Reset synced flag in index cache when rollback
+@param[in,out] index_cache index cache */
+static
+void
+fts_sync_index_reset(
+ fts_index_cache_t* index_cache)
+{
+ const ib_rbt_node_t* rbt_node;
+
+ for (rbt_node = rbt_first(index_cache->words);
+ rbt_node != NULL;
+ rbt_node = rbt_next(index_cache->words, rbt_node)) {
+
+ fts_tokenizer_word_t* word;
+ word = rbt_value(fts_tokenizer_word_t, rbt_node);
+
+ fts_node_t* fts_node;
+ fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
+
+ fts_node->synced = false;
+ }
+}
+
+/** Commit the SYNC, change state of processed doc ids etc.
+@param[in,out] sync sync state
@return DB_SUCCESS if all OK */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
fts_sync_commit(
-/*============*/
- fts_sync_t* sync) /*!< in: sync state */
+ fts_sync_t* sync)
{
dberr_t error;
trx_t* trx = sync->trx;
@@ -4468,6 +4491,8 @@ fts_sync_commit(
(double) n_nodes/ (double) elapsed_time);
}
+ /* Avoid assertion in trx_free(). */
+ trx->dict_operation_lock_mode = 0;
trx_free_for_background(trx);
return(error);
@@ -4490,6 +4515,10 @@ fts_sync_rollback(
index_cache = static_cast<fts_index_cache_t*>(
ib_vector_get(cache->indexes, i));
+ /* Reset synced flag so nodes will not be skipped
+ in the next sync, see fts_sync_write_words(). */
+ fts_sync_index_reset(index_cache);
+
for (j = 0; fts_index_selector[j].value; ++j) {
if (index_cache->ins_graph[j] != NULL) {
@@ -4515,6 +4544,9 @@ fts_sync_rollback(
rw_lock_x_unlock(&cache->lock);
fts_sql_rollback(trx);
+
+ /* Avoid assertion in trx_free(). */
+ trx->dict_operation_lock_mode = 0;
trx_free_for_background(trx);
}
@@ -4523,13 +4555,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] sync sync state
@param[in] unlock_cache whether unlock cache lock when write node
@param[in] wait whether wait when a sync is in progress
+@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS if all OK */
static
dberr_t
fts_sync(
fts_sync_t* sync,
bool unlock_cache,
- bool wait)
+ bool wait,
+ bool has_dict)
{
ulint i;
dberr_t error = DB_SUCCESS;
@@ -4558,6 +4592,12 @@ fts_sync(
DEBUG_SYNC_C("fts_sync_begin");
fts_sync_begin(sync);
+ /* When sync in background, we hold dict operation lock
+ to prevent DDL like DROP INDEX, etc. */
+ if (has_dict) {
+ sync->trx->dict_operation_lock_mode = RW_S_LATCH;
+ }
+
begin_sync:
if (cache->total_size > fts_max_cache_size) {
/* Avoid the case: sync never finish when
@@ -4598,7 +4638,7 @@ begin_sync:
ib_vector_get(cache->indexes, i));
if (index_cache->index->to_be_dropped
- || fts_sync_index_check(sync, index_cache)) {
+ || fts_sync_index_check(index_cache)) {
continue;
}
@@ -4613,6 +4653,7 @@ end_sync:
}
rw_lock_x_lock(&cache->lock);
+ sync->interrupted = false;
sync->in_progress = false;
os_event_set(sync->event);
rw_lock_x_unlock(&cache->lock);
@@ -4636,20 +4677,23 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] table fts table
@param[in] unlock_cache whether unlock cache when write node
@param[in] wait whether wait for existing sync to finish
+@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS on success, error code on failure. */
UNIV_INTERN
dberr_t
fts_sync_table(
dict_table_t* table,
bool unlock_cache,
- bool wait)
+ bool wait,
+ bool has_dict)
{
dberr_t err = DB_SUCCESS;
ut_ad(table->fts);
if (!dict_table_is_discarded(table) && table->fts->cache) {
- err = fts_sync(table->fts->cache->sync, unlock_cache, wait);
+ err = fts_sync(table->fts->cache->sync,
+ unlock_cache, wait, has_dict);
}
return(err);
@@ -6527,6 +6571,36 @@ fts_check_corrupt_index(
return(0);
}
+/* Get parent table name if it's a fts aux table
+@param[in] aux_table_name aux table name
+@param[in] aux_table_len aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+ const char* aux_table_name,
+ ulint aux_table_len)
+{
+ fts_aux_table_t aux_table;
+ char* parent_table_name = NULL;
+
+ if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
+ dict_table_t* parent_table;
+
+ parent_table = dict_table_open_on_id(
+ aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
+
+ if (parent_table != NULL) {
+ parent_table_name = mem_strdupl(
+ parent_table->name,
+ strlen(parent_table->name));
+
+ dict_table_close(parent_table, TRUE, FALSE);
+ }
+ }
+
+ return(parent_table_name);
+}
+
/** Check the validity of the parent table.
@param[in] aux_table auxiliary table
@return true if it is a valid table or false if it is not */
diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc
index d9f2532578e..ea937c20752 100644
--- a/storage/xtradb/fts/fts0opt.cc
+++ b/storage/xtradb/fts/fts0opt.cc
@@ -2986,7 +2986,7 @@ fts_optimize_sync_table(
if (table) {
if (dict_table_has_fts_index(table) && table->fts->cache) {
- fts_sync_table(table, true, false);
+ fts_sync_table(table, true, false, true);
}
dict_table_close(table, FALSE, FALSE);
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index ceb489c8f57..66d2ae4f9de 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -107,6 +107,14 @@ this program; if not, write to the Free Software Foundation, Inc.,
#define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
+#ifndef HAVE_PERCONA_COMPRESSED_COLUMNS
+#define COLUMN_FORMAT_TYPE_COMPRESSED 0xBADF00D
+#define SQLCOM_CREATE_COMPRESSION_DICTIONARY 0xDECAF
+#define SQLCOM_DROP_COMPRESSION_DICTIONARY 0xC0FFEE
+#define ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST 0xDEADFACE
+const static LEX_CSTRING null_lex_cstr={0,0};
+#endif
+
#ifdef MYSQL_DYNAMIC_PLUGIN
#define tc_size 400
#define tdc_size 400
@@ -818,6 +826,19 @@ innobase_is_fake_change(
THD* thd) __attribute__((unused)); /*!< in: MySQL thread handle of the user for
whom the transaction is being committed */
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd The thread handle
+@param path Path to the table
+@param f_key_list[out] The list of foreign keys
+
+@return error code or zero for success */
+static
+int
+innobase_get_parent_fk_list(
+ THD* thd,
+ const char* path,
+ List<FOREIGN_KEY_INFO>* f_key_list) __attribute__((unused));
/******************************************************************//**
Maps a MySQL trx isolation level code to the InnoDB isolation level code
@@ -1493,6 +1514,30 @@ normalize_table_name_low(
ibool set_lower_case); /* in: TRUE if we want to set
name to lower case */
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+/** Creates a new compression dictionary. */
+static
+handler_create_zip_dict_result
+innobase_create_zip_dict(
+ handlerton* hton, /*!< in: innobase handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread */
+ const char* name, /*!< in: zip dictionary name */
+ ulint* name_len,
+ /*!< in/out: zip dictionary name length */
+ const char* data, /*!< in: zip dictionary data */
+ ulint* data_len);
+ /*!< in/out: zip dictionary data length */
+
+/** Drops a existing compression dictionary. */
+static
+handler_drop_zip_dict_result
+innobase_drop_zip_dict(
+ handlerton* hton, /*!< in: innobase handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread */
+ const char* name, /*!< in: zip dictionary name */
+ ulint* name_len);
+ /*!< in/out: zip dictionary name length */
+#endif
/*************************************************************//**
Checks if buffer pool is big enough to enable backoff algorithm.
InnoDB empty free list algorithm backoff requires free pages
@@ -3607,6 +3652,10 @@ innobase_init(
innobase_hton->wsrep_fake_trx_id=wsrep_fake_trx_id;
#endif /* WITH_WSREP */
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+ innobase_hton->create_zip_dict = innobase_create_zip_dict;
+ innobase_hton->drop_zip_dict = innobase_drop_zip_dict;
+#endif
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
#ifndef DBUG_OFF
@@ -4300,6 +4349,90 @@ innobase_purge_changed_page_bitmaps(
return (my_bool)log_online_purge_changed_page_bitmaps(lsn);
}
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+/** Creates a new compression dictionary. */
+static
+handler_create_zip_dict_result
+innobase_create_zip_dict(
+ handlerton* hton, /*!< in: innobase handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread */
+ const char* name, /*!< in: zip dictionary name */
+ ulint* name_len,
+ /*!< in/out: zip dictionary name length */
+ const char* data, /*!< in: zip dictionary data */
+ ulint* data_len)
+ /*!< in/out: zip dictionary data length */
+{
+ handler_create_zip_dict_result result =
+ HA_CREATE_ZIP_DICT_UNKNOWN_ERROR;
+
+ DBUG_ENTER("innobase_create_zip_dict");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ if (UNIV_UNLIKELY(high_level_read_only)) {
+ DBUG_RETURN(HA_CREATE_ZIP_DICT_READ_ONLY);
+ }
+
+ if (UNIV_UNLIKELY(*name_len > ZIP_DICT_MAX_NAME_LENGTH)) {
+ *name_len = ZIP_DICT_MAX_NAME_LENGTH;
+ DBUG_RETURN(HA_CREATE_ZIP_DICT_NAME_TOO_LONG);
+ }
+
+ if (UNIV_UNLIKELY(*data_len > ZIP_DICT_MAX_DATA_LENGTH)) {
+ *data_len = ZIP_DICT_MAX_DATA_LENGTH;
+ DBUG_RETURN(HA_CREATE_ZIP_DICT_DATA_TOO_LONG);
+ }
+
+ switch (dict_create_zip_dict(name, *name_len, data, *data_len)) {
+ case DB_SUCCESS:
+ result = HA_CREATE_ZIP_DICT_OK;
+ break;
+ case DB_DUPLICATE_KEY:
+ result = HA_CREATE_ZIP_DICT_ALREADY_EXISTS;
+ break;
+ default:
+ ut_ad(0);
+ result = HA_CREATE_ZIP_DICT_UNKNOWN_ERROR;
+ }
+ DBUG_RETURN(result);
+}
+
+/** Drops a existing compression dictionary. */
+static
+handler_drop_zip_dict_result
+innobase_drop_zip_dict(
+ handlerton* hton, /*!< in: innobase handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread */
+ const char* name, /*!< in: zip dictionary name */
+ ulint* name_len)
+ /*!< in/out: zip dictionary name length */
+{
+ handler_drop_zip_dict_result result = HA_DROP_ZIP_DICT_UNKNOWN_ERROR;
+
+ DBUG_ENTER("innobase_drop_zip_dict");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ if (UNIV_UNLIKELY(high_level_read_only)) {
+ DBUG_RETURN(HA_DROP_ZIP_DICT_READ_ONLY);
+ }
+
+ switch (dict_drop_zip_dict(name, *name_len)) {
+ case DB_SUCCESS:
+ result = HA_DROP_ZIP_DICT_OK;
+ break;
+ case DB_RECORD_NOT_FOUND:
+ result = HA_DROP_ZIP_DICT_DOES_NOT_EXIST;
+ break;
+ case DB_ROW_IS_REFERENCED:
+ result = HA_DROP_ZIP_DICT_IS_REFERENCED;
+ break;
+ default:
+ ut_ad(0);
+ result = HA_DROP_ZIP_DICT_UNKNOWN_ERROR;
+ }
+ DBUG_RETURN(result);
+}
+#endif
/*****************************************************************//**
Check whether this is a fake change transaction.
@return TRUE if a fake change transaction */
@@ -5933,6 +6066,88 @@ func_exit:
DBUG_RETURN(ret);
}
+/** This function checks if all the compression dictionaries referenced
+in table->fields exist in SYS_ZIP_DICT InnoDB system table.
+@return true if all referenced dictionaries exist */
+UNIV_INTERN
+bool
+innobase_check_zip_dicts(
+ const TABLE* table, /*!< in: table in MySQL data
+ dictionary */
+ ulint* dict_ids, /*!< out: identified zip dict ids
+ (at least n_fields long) */
+ trx_t* trx, /*!< in: transaction */
+ const char** err_dict_name) /*!< out: the name of the
+ zip_dict which does not exist. */
+{
+ DBUG_ENTER("innobase_check_zip_dicts");
+
+ bool res = true;
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+ dberr_t err = DB_SUCCESS;
+ const size_t n_fields = table->s->fields;
+
+ Field* field_ptr;
+ for (size_t field_idx = 0; err == DB_SUCCESS && field_idx < n_fields;
+ ++field_idx)
+ {
+ field_ptr = table->field[field_idx];
+ if (field_ptr->has_associated_compression_dictionary()) {
+ err = dict_create_get_zip_dict_id_by_name(
+ field_ptr->zip_dict_name.str,
+ field_ptr->zip_dict_name.length,
+ &dict_ids[field_idx],
+ trx);
+ ut_a(err == DB_SUCCESS || err == DB_RECORD_NOT_FOUND);
+ }
+ else {
+ dict_ids[field_idx] = ULINT_UNDEFINED;
+ }
+
+ }
+
+ if (err != DB_SUCCESS) {
+ res = false;
+ *err_dict_name = field_ptr->zip_dict_name.str;
+ }
+
+#endif
+ DBUG_RETURN(res);
+}
+
+/** This function creates compression dictionary references in
+SYS_ZIP_DICT_COLS InnoDB system table for table_id based on info
+in table->fields and provided zip dict ids. */
+UNIV_INTERN
+void
+innobase_create_zip_dict_references(
+ const TABLE* table, /*!< in: table in MySQL data
+ dictionary */
+ table_id_t ib_table_id, /*!< in: table ID in Innodb data
+ dictionary */
+ ulint* zip_dict_ids, /*!< in: zip dict ids
+ (at least n_fields long) */
+ trx_t* trx) /*!< in: transaction */
+{
+ DBUG_ENTER("innobase_create_zip_dict_references");
+
+ dberr_t err = DB_SUCCESS;
+ const size_t n_fields = table->s->fields;
+
+ for (size_t field_idx = 0; err == DB_SUCCESS && field_idx < n_fields;
+ ++field_idx)
+ {
+ if (zip_dict_ids[field_idx] != ULINT_UNDEFINED) {
+ err = dict_create_add_zip_dict_reference(ib_table_id,
+ table->field[field_idx]->field_index,
+ zip_dict_ids[field_idx], trx);
+ ut_a(err == DB_SUCCESS);
+ }
+ }
+
+ DBUG_VOID_RETURN;
+}
+
/*******************************************************************//**
This function uses index translation table to quickly locate the
requested index structure.
@@ -7164,6 +7379,7 @@ wsrep_store_key_val_for_row(
format) */
uint buff_len,/*!< in: buffer length */
const uchar* record,
+ row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */
ibool* key_is_null)/*!< out: full key was null */
{
KEY* key_info = table->key_info + keynr;
@@ -7320,8 +7536,17 @@ wsrep_store_key_val_for_row(
blob_data = row_mysql_read_blob_ref(&blob_len,
(byte*) (record
- + (ulint)get_field_offset(table, field)),
- (ulint) field->pack_length());
+ + (ulint) get_field_offset(table, field)),
+ (ulint) field->pack_length(),
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+ field->column_format() ==
+ COLUMN_FORMAT_TYPE_COMPRESSED,
+ reinterpret_cast<const byte*>(
+ field->zip_dict_data.str),
+ field->zip_dict_data.length, prebuilt);
+#else
+ 0, 0, 0, prebuilt);
+#endif
true_len = blob_len;
@@ -7616,7 +7841,16 @@ ha_innobase::store_key_val_for_row(
blob_data = row_mysql_read_blob_ref(&blob_len,
(byte*) (record
+ (ulint) get_field_offset(table, field)),
- (ulint) field->pack_length());
+ (ulint) field->pack_length(),
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+ field->column_format() ==
+ COLUMN_FORMAT_TYPE_COMPRESSED,
+ reinterpret_cast<const byte*>(
+ field->zip_dict_data.str),
+ field->zip_dict_data.length, prebuilt);
+#else
+ 0, 0, 0, prebuilt);
+#endif
true_len = blob_len;
@@ -7872,6 +8106,14 @@ build_template_field(
templ->mbminlen = dict_col_get_mbminlen(col);
templ->mbmaxlen = dict_col_get_mbmaxlen(col);
templ->is_unsigned = col->prtype & DATA_UNSIGNED;
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+ templ->compressed = (field->column_format()
+ == COLUMN_FORMAT_TYPE_COMPRESSED);
+ templ->zip_dict_data = field->zip_dict_data;
+#else
+ templ->compressed = 0;
+ templ->zip_dict_data = null_lex_cstr;
+#endif
if (!dict_index_is_clust(index)
&& templ->rec_field_no == ULINT_UNDEFINED) {
@@ -8190,6 +8432,7 @@ dberr_t
ha_innobase::innobase_lock_autoinc(void)
/*====================================*/
{
+ DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
dberr_t error = DB_SUCCESS;
ut_ad(!srv_read_only_mode);
@@ -8229,6 +8472,8 @@ ha_innobase::innobase_lock_autoinc(void)
/* Fall through to old style locking. */
case AUTOINC_OLD_STYLE_LOCKING:
+ DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
+ ut_ad(0););
error = row_lock_table_autoinc_for_mysql(prebuilt);
if (error == DB_SUCCESS) {
@@ -8242,7 +8487,7 @@ ha_innobase::innobase_lock_autoinc(void)
ut_error;
}
- return(error);
+ DBUG_RETURN(error);
}
/********************************************************************//**
@@ -8776,8 +9021,11 @@ calc_row_difference(
switch (col_type) {
case DATA_BLOB:
- o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
- n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
+ /* Do not compress blob column while comparing*/
+ o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len,
+ false, 0, 0, prebuilt);
+ n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len,
+ false, 0, 0, prebuilt);
break;
@@ -8847,7 +9095,17 @@ calc_row_difference(
TRUE,
new_mysql_row_col,
col_pack_len,
- dict_table_is_comp(prebuilt->table));
+ dict_table_is_comp(prebuilt->table),
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+ field->column_format() ==
+ COLUMN_FORMAT_TYPE_COMPRESSED,
+ reinterpret_cast<const byte*>(
+ field->zip_dict_data.str),
+ field->zip_dict_data.length,
+#else
+ 0, 0, 0,
+#endif
+ prebuilt);
dfield_copy(&ufield->new_val, &dfield);
} else {
dfield_set_null(&ufield->new_val);
@@ -9018,7 +9276,8 @@ wsrep_calc_row_hash(
switch (col_type) {
case DATA_BLOB:
- ptr = row_mysql_read_blob_ref(&len, ptr, len);
+ ptr = row_mysql_read_blob_ref(&len, ptr, len,
+ false, 0, 0, prebuilt);
break;
@@ -10831,7 +11090,7 @@ ha_innobase::wsrep_append_keys(
len = wsrep_store_key_val_for_row(
thd, table, 0, key, WSREP_MAX_SUPPORTED_KEY_LENGTH,
- record0, &is_null);
+ record0, prebuilt, &is_null);
if (!is_null) {
rcode = wsrep_append_key(
@@ -10885,7 +11144,7 @@ ha_innobase::wsrep_append_keys(
len = wsrep_store_key_val_for_row(
thd, table, i, key0,
WSREP_MAX_SUPPORTED_KEY_LENGTH,
- record0, &is_null);
+ record0, prebuilt, &is_null);
if (!is_null) {
rcode = wsrep_append_key(
thd, trx, table_share, table,
@@ -10904,7 +11163,7 @@ ha_innobase::wsrep_append_keys(
len = wsrep_store_key_val_for_row(
thd, table, i, key1,
WSREP_MAX_SUPPORTED_KEY_LENGTH,
- record1, &is_null);
+ record1, prebuilt, &is_null);
if (!is_null && memcmp(key0, key1, len)) {
rcode = wsrep_append_key(
thd, trx, table_share,
@@ -11079,6 +11338,7 @@ create_table_def(
ulint unsigned_type;
ulint binary_type;
ulint long_true_varchar;
+ ulint compressed;
ulint charset_no;
ulint i;
ulint doc_id_col = 0;
@@ -11228,6 +11488,13 @@ create_table_def(
}
}
+ /* Check if the the field has COMPRESSED attribute */
+ compressed = 0;
+ if (field->column_format() ==
+ COLUMN_FORMAT_TYPE_COMPRESSED) {
+ compressed = DATA_COMPRESSED;
+ }
+
/* First check whether the column to be added has a
system reserved name. */
if (dict_col_name_is_reserved(field->field_name)){
@@ -11248,7 +11515,8 @@ err_col:
dtype_form_prtype(
(ulint) field->type()
| nulls_allowed | unsigned_type
- | binary_type | long_true_varchar,
+ | binary_type | long_true_varchar
+ | compressed,
charset_no),
col_len);
}
@@ -12078,6 +12346,9 @@ ha_innobase::create(
const char* stmt;
size_t stmt_len;
+ mem_heap_t* heap = 0;
+ ulint* zip_dict_ids = 0;
+
DBUG_ENTER("ha_innobase::create");
DBUG_ASSERT(thd != NULL);
@@ -12168,6 +12439,19 @@ ha_innobase::create(
row_mysql_lock_data_dictionary(trx);
+ heap = mem_heap_create(form->s->fields * sizeof(ulint));
+ zip_dict_ids = static_cast<ulint*>(
+ mem_heap_alloc(heap, form->s->fields * sizeof(ulint)));
+
+ const char* err_zip_dict_name = 0;
+ if (!innobase_check_zip_dicts(form, zip_dict_ids,
+ trx, &err_zip_dict_name)) {
+ error = -1;
+ my_error(ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST,
+ MYF(0), err_zip_dict_name);
+ goto cleanup;
+ }
+
error = create_table_def(trx, form, norm_name, temp_path,
remote_path, flags, flags2);
if (error) {
@@ -12275,6 +12559,22 @@ ha_innobase::create(
dict_table_get_all_fts_indexes(innobase_table, fts->indexes);
}
+ /*
+ Adding compression dictionary <-> compressed table column links
+ to the SYS_ZIP_DICT_COLS table.
+ */
+ ut_a(zip_dict_ids != 0);
+ {
+ dict_table_t* local_table = dict_table_open_on_name(
+ norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
+
+ ut_a(local_table);
+ table_id_t table_id = local_table->id;
+ dict_table_close(local_table, TRUE, FALSE);
+ innobase_create_zip_dict_references(form,
+ table_id, zip_dict_ids, trx);
+ }
+
stmt = innobase_get_stmt(thd, &stmt_len);
if (stmt) {
@@ -12391,6 +12691,9 @@ ha_innobase::create(
trx_free_for_mysql(trx);
+ if (heap != 0)
+ mem_heap_free(heap);
+
DBUG_RETURN(0);
cleanup:
@@ -12400,6 +12703,9 @@ cleanup:
trx_free_for_mysql(trx);
+ if (heap != 0)
+ mem_heap_free(heap);
+
DBUG_RETURN(error);
}
@@ -13486,6 +13792,14 @@ ha_innobase::info_low(
if (dict_stats_is_persistent_enabled(ib_table)) {
if (is_analyze) {
+
+ /* If this table is already queued for
+ background analyze, remove it from the
+ queue as we are about to do the same */
+ dict_mutex_enter_for_mysql();
+ dict_stats_recalc_pool_del(ib_table);
+ dict_mutex_exit_for_mysql();
+
opt = DICT_STATS_RECALC_PERSISTENT;
} else {
/* This is e.g. 'SHOW INDEXES', fetch
@@ -13915,7 +14229,7 @@ ha_innobase::optimize(
if (innodb_optimize_fulltext_only) {
if (prebuilt->table->fts && prebuilt->table->fts->cache
&& !dict_table_is_discarded(prebuilt->table)) {
- fts_sync_table(prebuilt->table, false, true);
+ fts_sync_table(prebuilt->table, false, true, false);
fts_optimize_table(prebuilt->table);
}
return(HA_ADMIN_OK);
@@ -14119,7 +14433,14 @@ ha_innobase::check(
prebuilt->select_lock_type = LOCK_NONE;
- if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
+ bool check_result
+ = row_check_index_for_mysql(prebuilt, index, &n_rows);
+ DBUG_EXECUTE_IF(
+ "dict_set_index_corrupted",
+ if (!(index->type & DICT_CLUSTERED)) {
+ check_result = false;
+ });
+ if (!check_result) {
innobase_format_name(
index_name, sizeof index_name,
index->name, TRUE);
@@ -14446,6 +14767,75 @@ get_foreign_key_info(
return(pf_key_info);
}
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd The thread handle
+@param path Path to the table
+@param f_key_list[out] The list of foreign keys */
+static
+void
+fill_foreign_key_list(THD* thd,
+ const dict_table_t* table,
+ List<FOREIGN_KEY_INFO>* f_key_list)
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ for (dict_foreign_set::iterator it = table->referenced_set.begin();
+ it != table->referenced_set.end(); ++it) {
+
+ dict_foreign_t* foreign = *it;
+
+ FOREIGN_KEY_INFO* pf_key_info
+ = get_foreign_key_info(thd, foreign);
+ if (pf_key_info) {
+ f_key_list->push_back(pf_key_info);
+ }
+ }
+}
+
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd The thread handle
+@param path Path to the table
+@param f_key_list[out] The list of foreign keys
+
+@return error code or zero for success */
+static
+int
+innobase_get_parent_fk_list(
+ THD* thd,
+ const char* path,
+ List<FOREIGN_KEY_INFO>* f_key_list)
+{
+ ut_a(strlen(path) <= FN_REFLEN);
+ char norm_name[FN_REFLEN + 1];
+ normalize_table_name(norm_name, path);
+
+ trx_t* parent_trx = check_trx_exists(thd);
+ parent_trx->op_info = "getting list of referencing foreign keys";
+ trx_search_latch_release_if_reserved(parent_trx);
+
+ mutex_enter(&dict_sys->mutex);
+
+ dict_table_t* table
+ = dict_table_open_on_name(norm_name, TRUE, FALSE,
+ static_cast<dict_err_ignore_t>(
+ DICT_ERR_IGNORE_INDEX_ROOT
+ | DICT_ERR_IGNORE_CORRUPT));
+ if (!table) {
+ mutex_exit(&dict_sys->mutex);
+ return(HA_ERR_NO_SUCH_TABLE);
+ }
+
+ fill_foreign_key_list(thd, table, f_key_list);
+
+ dict_table_close(table, TRUE, FALSE);
+
+ mutex_exit(&dict_sys->mutex);
+ parent_trx->op_info = "";
+ return(0);
+}
+
/*******************************************************************//**
Gets the list of foreign keys in this table.
@return always 0, that is, always succeeds */
@@ -14498,9 +14888,6 @@ ha_innobase::get_parent_foreign_key_list(
THD* thd, /*!< in: user thread handle */
List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
{
- FOREIGN_KEY_INFO* pf_key_info;
- dict_foreign_t* foreign;
-
ut_a(prebuilt != NULL);
update_thd(ha_thd());
@@ -14509,20 +14896,7 @@ ha_innobase::get_parent_foreign_key_list(
trx_search_latch_release_if_reserved(prebuilt->trx);
mutex_enter(&(dict_sys->mutex));
-
- for (dict_foreign_set::iterator it
- = prebuilt->table->referenced_set.begin();
- it != prebuilt->table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- pf_key_info = get_foreign_key_info(thd, foreign);
- if (pf_key_info) {
- f_key_list->push_back(pf_key_info);
- }
- }
-
+ fill_foreign_key_list(thd, prebuilt->table, f_key_list);
mutex_exit(&(dict_sys->mutex));
prebuilt->trx->op_info = "";
@@ -14612,6 +14986,11 @@ ha_innobase::extra(
if (prebuilt->blob_heap) {
row_mysql_prebuilt_free_blob_heap(prebuilt);
}
+
+ if (prebuilt->compress_heap) {
+ row_mysql_prebuilt_free_compress_heap(prebuilt);
+ }
+
break;
case HA_EXTRA_RESET_STATE:
reset_template();
@@ -14663,6 +15042,10 @@ ha_innobase::reset()
row_mysql_prebuilt_free_blob_heap(prebuilt);
}
+ if (prebuilt->compress_heap) {
+ row_mysql_prebuilt_free_compress_heap(prebuilt);
+ }
+
reset_template();
ds_mrr.dsmrr_close();
@@ -14869,7 +15252,11 @@ ha_innobase::external_lock(
&& lock_type == F_WRLCK)
|| thd_sql_command(thd) == SQLCOM_CREATE_INDEX
|| thd_sql_command(thd) == SQLCOM_DROP_INDEX
- || thd_sql_command(thd) == SQLCOM_DELETE)) {
+ || thd_sql_command(thd) == SQLCOM_DELETE
+ || thd_sql_command(thd) ==
+ SQLCOM_CREATE_COMPRESSION_DICTIONARY
+ || thd_sql_command(thd) ==
+ SQLCOM_DROP_COMPRESSION_DICTIONARY)) {
if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE)
{
@@ -15637,7 +16024,9 @@ ha_innobase::store_lock(
&& lock_type <= TL_WRITE))
|| sql_command == SQLCOM_CREATE_INDEX
|| sql_command == SQLCOM_DROP_INDEX
- || sql_command == SQLCOM_DELETE)) {
+ || sql_command == SQLCOM_DELETE
+ || sql_command == SQLCOM_CREATE_COMPRESSION_DICTIONARY
+ || sql_command == SQLCOM_DROP_COMPRESSION_DICTIONARY)) {
ib_senderrf(trx->mysql_thd,
IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
@@ -16594,6 +16983,84 @@ ha_innobase::check_if_incompatible_data(
return(COMPATIBLE_DATA_YES);
}
+/** This function reads zip dict-related info from SYS_ZIP_DICT
+and SYS_ZIP_DICT_COLS for all columns marked with
+COLUMN_FORMAT_TYPE_COMPRESSED flag and updates
+zip_dict_name / zip_dict_data for those which have associated
+compression dictionaries.
+*/
+UNIV_INTERN
+void
+ha_innobase::update_field_defs_with_zip_dict_info()
+{
+ DBUG_ENTER("update_field_defs_with_zip_dict_info");
+ ut_ad(!mutex_own(&dict_sys->mutex));
+
+ char norm_name[FN_REFLEN];
+ normalize_table_name(norm_name, table_share->normalized_path.str);
+
+ dict_table_t* ib_table = dict_table_open_on_name(
+ norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
+
+ /* if dict_table_open_on_name() returns NULL, then it means that
+ TABLE_SHARE is populated for a table being created and we can
+ skip filling zip dict info here */
+ if (ib_table == 0)
+ DBUG_VOID_RETURN;
+
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+ table_id_t ib_table_id = ib_table->id;
+ dict_table_close(ib_table, FALSE, FALSE);
+ Field* field;
+ for (uint i = 0; i < table_share->fields; ++i) {
+ field = table_share->field[i];
+ if (field->column_format() ==
+ COLUMN_FORMAT_TYPE_COMPRESSED) {
+ bool reference_found = false;
+ ulint dict_id = 0;
+ switch (dict_get_dictionary_id_by_key(ib_table_id, i,
+ &dict_id)) {
+ case DB_SUCCESS:
+ reference_found = true;
+ break;
+ case DB_RECORD_NOT_FOUND:
+ reference_found = false;
+ break;
+ default:
+ ut_error;
+ }
+ if (reference_found) {
+ char* local_name = 0;
+ ulint local_name_len = 0;
+ char* local_data = 0;
+ ulint local_data_len = 0;
+ if (dict_get_dictionary_info_by_id(dict_id,
+ &local_name, &local_name_len,
+ &local_data, &local_data_len) !=
+ DB_SUCCESS) {
+ ut_error;
+ }
+ else {
+ field->zip_dict_name.str =
+ local_name;
+ field->zip_dict_name.length =
+ local_name_len;
+ field->zip_dict_data.str =
+ local_data;
+ field->zip_dict_data.length =
+ local_data_len;
+ }
+ }
+ else {
+ field->zip_dict_name = null_lex_cstr;
+ field->zip_dict_data = null_lex_cstr;
+ }
+ }
+ }
+#endif
+ DBUG_VOID_RETURN;
+}
+
/****************************************************************//**
Update the system variable innodb_io_capacity_max using the "saved"
value. This function is registered as a callback with MySQL. */
@@ -17155,7 +17622,12 @@ innodb_internal_table_update(
my_free(old);
}
- fts_internal_tbl_name = *(char**) var_ptr;
+ fts_internal_tbl_name2 = *(char**) var_ptr;
+ if (fts_internal_tbl_name2 == NULL) {
+ fts_internal_tbl_name = const_cast<char*>("default");
+ } else {
+ fts_internal_tbl_name = fts_internal_tbl_name2;
+ }
}
/****************************************************************//**
@@ -18287,7 +18759,6 @@ innodb_track_changed_pages_validate(
for update function */
struct st_mysql_value* value) /*!< in: incoming bool */
{
- static bool enabled_on_startup = false;
long long intbuf = 0;
if (value->val_int(value, &intbuf)) {
@@ -18295,8 +18766,7 @@ innodb_track_changed_pages_validate(
return 1;
}
- if (srv_track_changed_pages || enabled_on_startup) {
- enabled_on_startup = true;
+ if (srv_redo_log_thread_started) {
*reinterpret_cast<ulong*>(save)
= static_cast<ulong>(intbuf);
return 0;
@@ -19834,7 +20304,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
"Whether to disable OS system file cache for sort I/O",
NULL, NULL, FALSE);
-static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
+static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2,
PLUGIN_VAR_NOCMDARG,
"FTS internal auxiliary table to be checked",
innodb_internal_table_validate,
@@ -20314,7 +20784,7 @@ static MYSQL_SYSVAR_ENUM(corrupt_table_action, srv_pass_corrupt_table,
"Warn corruptions of user tables as 'corrupt table' instead of not crashing itself, "
"when used with file_per_table. "
"All file io for the datafile after detected as corrupt are disabled, "
- "except for the deletion. Possible options are 'assert', 'warn' & 'salvage'",
+ "except for the deletion.",
NULL, NULL, 0, &corrupt_table_action_typelib);
static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks,
@@ -20329,6 +20799,21 @@ static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
"Print stacktrace on long semaphore wait (off by default supported only on linux)",
NULL, NULL, FALSE);
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+static MYSQL_SYSVAR_UINT(compressed_columns_zip_level,
+ srv_compressed_columns_zip_level,
+ PLUGIN_VAR_RQCMDARG,
+ "Compression level used for compressed columns. 0 is no compression"
+ ", 1 is fastest and 9 is best compression. Default is 6.",
+ NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
+
+static MYSQL_SYSVAR_ULONG(compressed_columns_threshold,
+ srv_compressed_columns_threshold,
+ PLUGIN_VAR_RQCMDARG,
+ "Compress column data if its length exceeds this value. Default is 96",
+ NULL, NULL, 96, 1, ~0UL, 0);
+#endif
+
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(log_block_size),
MYSQL_SYSVAR(additional_mem_pool_size),
@@ -20535,6 +21020,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(tmpdir),
MYSQL_SYSVAR(use_stacktrace),
MYSQL_SYSVAR(simulate_comp_failures),
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+ MYSQL_SYSVAR(compressed_columns_zip_level),
+ MYSQL_SYSVAR(compressed_columns_threshold),
+#endif
NULL
};
@@ -20557,6 +21046,10 @@ maria_declare_plugin(xtradb)
i_s_xtradb_read_view,
i_s_xtradb_internal_hash_tables,
i_s_xtradb_rseg,
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+i_s_xtradb_zip_dict,
+i_s_xtradb_zip_dict_cols,
+#endif
i_s_innodb_trx,
i_s_innodb_locks,
i_s_innodb_lock_waits,
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 37d787ad14d..cb7dd6b9cf4 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -291,8 +291,17 @@ class ha_innobase: public handler
/** @} */
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
+
bool check_if_supported_virtual_columns(void) { return TRUE; }
+ /** This function reads zip dict-related info from SYS_ZIP_DICT
+ and SYS_ZIP_DICT_COLS for all columns marked with
+ COLUMN_FORMAT_TYPE_COMPRESSED flag and updates
+ zip_dict_name / zip_dict_data for those which have associated
+ compression dictionaries.
+ */
+ virtual void update_field_defs_with_zip_dict_info();
+
private:
/** Builds a 'template' to the prebuilt struct.
@@ -723,3 +732,31 @@ ib_push_frm_error(
TABLE* table, /*!< in: MySQL table */
ulint n_keys, /*!< in: InnoDB #keys */
bool push_warning); /*!< in: print warning ? */
+
+/** This function checks if all the compression dictionaries referenced
+in table->fields exist in SYS_ZIP_DICT InnoDB system table.
+@return true if all referenced dictionaries exist */
+UNIV_INTERN
+bool
+innobase_check_zip_dicts(
+ const TABLE* table, /*!< in: table in MySQL data
+ dictionary */
+ ulint* dict_ids, /*!< out: identified zip dict ids
+ (at least n_fields long) */
+ trx_t* trx, /*!< in: transaction */
+ const char** err_dict_name); /*!< out: the name of the
+ zip_dict which does not exist. */
+
+/** This function creates compression dictionary references in
+SYS_ZIP_DICT_COLS InnoDB system table for table_id based on info
+in table->fields and provided zip dict ids. */
+UNIV_INTERN
+void
+innobase_create_zip_dict_references(
+ const TABLE* table, /*!< in: table in MySQL data
+ dictionary */
+ table_id_t ib_table_id, /*!< in: table ID in Innodb data
+ dictionary */
+ ulint* zip_dict_ids, /*!< in: zip dict ids
+ (at least n_fields long) */
+ trx_t* trx); /*!< in: transaction */
diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc
index 646ba986dcc..8299b4dafe1 100644
--- a/storage/xtradb/handler/handler0alter.cc
+++ b/storage/xtradb/handler/handler0alter.cc
@@ -21,6 +21,11 @@ this program; if not, write to the Free Software Foundation, Inc.,
Smart ALTER TABLE
*******************************************************/
+#ifndef HAVE_PERCONA_COMPRESSED_COLUMNS
+#define COLUMN_FORMAT_TYPE_COMPRESSED 0xBADF00D
+#define ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST 0xDEADFACE
+#endif
+
#include <my_global.h>
#include <unireg.h>
#include <mysqld_error.h>
@@ -213,7 +218,10 @@ innobase_need_rebuild(
const Alter_inplace_info* ha_alter_info,
const TABLE* altered_table)
{
- if (ha_alter_info->handler_flags
+ Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
+ ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE);
+
+ if (alter_inplace_flags
== Alter_inplace_info::CHANGE_CREATE_OPTION
&& !(ha_alter_info->create_info->used_fields
& (HA_CREATE_USED_ROW_FORMAT
@@ -1181,6 +1189,15 @@ innobase_col_to_mysql(
field->reset();
if (field->type() == MYSQL_TYPE_VARCHAR) {
+ if (field->column_format() ==
+ COLUMN_FORMAT_TYPE_COMPRESSED) {
+ /* Skip compressed varchar column when
+ reporting an erroneous row
+ during index creation or table rebuild. */
+ field->set_null();
+ break;
+ }
+
/* This is a >= 5.0.3 type true VARCHAR. Store the
length of the data to the first byte or the first
two bytes of dest. */
@@ -2466,7 +2483,8 @@ innobase_build_col_map_add(
mem_heap_t* heap,
dfield_t* dfield,
const Field* field,
- ulint comp)
+ ulint comp,
+ row_prebuilt_t* prebuilt)
{
if (field->is_real_null()) {
dfield_set_null(dfield);
@@ -2478,7 +2496,14 @@ innobase_build_col_map_add(
byte* buf = static_cast<byte*>(mem_heap_alloc(heap, size));
row_mysql_store_col_in_innobase_format(
- dfield, buf, TRUE, field->ptr, size, comp);
+ dfield, buf, TRUE, field->ptr, size, comp,
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+ field->column_format() == COLUMN_FORMAT_TYPE_COMPRESSED,
+ reinterpret_cast<const byte*>(field->zip_dict_data.str),
+ field->zip_dict_data.length, prebuilt);
+#else
+ 0,0,0, prebuilt);
+#endif
}
/** Construct the translation table for reordering, dropping or
@@ -2503,7 +2528,8 @@ innobase_build_col_map(
const dict_table_t* new_table,
const dict_table_t* old_table,
dtuple_t* add_cols,
- mem_heap_t* heap)
+ mem_heap_t* heap,
+ row_prebuilt_t* prebuilt)
{
uint old_i, old_innobase_i;
DBUG_ENTER("innobase_build_col_map");
@@ -2554,7 +2580,7 @@ innobase_build_col_map(
innobase_build_col_map_add(
heap, dtuple_get_nth_field(add_cols, i),
altered_table->field[sql_idx],
- dict_table_is_comp(new_table));
+ dict_table_is_comp(new_table), prebuilt);
found_col:
i++;
sql_idx++;
@@ -2718,7 +2744,8 @@ prepare_inplace_alter_table_dict(
ulint flags2,
ulint fts_doc_id_col,
bool add_fts_doc_id,
- bool add_fts_doc_id_idx)
+ bool add_fts_doc_id_idx,
+ row_prebuilt_t* prebuilt)
{
bool dict_locked = false;
ulint* add_key_nums; /* MySQL key numbers */
@@ -2730,6 +2757,7 @@ prepare_inplace_alter_table_dict(
ulint num_fts_index;
ha_innobase_inplace_ctx*ctx;
uint sql_idx;
+ ulint* zip_dict_ids = 0;
DBUG_ENTER("prepare_inplace_alter_table_dict");
@@ -2866,6 +2894,18 @@ prepare_inplace_alter_table_dict(
ulint n_cols;
dtuple_t* add_cols;
+ zip_dict_ids = static_cast<ulint*>(
+ mem_heap_alloc(ctx->heap,
+ altered_table->s->fields * sizeof(ulint)));
+
+ const char* err_zip_dict_name = 0;
+ if (!innobase_check_zip_dicts(altered_table, zip_dict_ids,
+ ctx->trx, &err_zip_dict_name)) {
+ my_error(ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST,
+ MYF(0), err_zip_dict_name);
+ goto new_clustered_failed;
+ }
+
if (innobase_check_foreigns(
ha_alter_info, altered_table, old_table,
user_table, ctx->drop_fk, ctx->num_to_drop_fk)) {
@@ -2972,6 +3012,12 @@ prepare_inplace_alter_table_dict(
}
}
+ if (field->column_format() ==
+ COLUMN_FORMAT_TYPE_COMPRESSED) {
+ field_type |= DATA_COMPRESSED;
+ }
+
+
if (dict_col_name_is_reserved(field->field_name)) {
dict_mem_table_free(ctx->new_table);
my_error(ER_WRONG_COLUMN_NAME, MYF(0),
@@ -3051,7 +3097,7 @@ prepare_inplace_alter_table_dict(
ctx->col_map = innobase_build_col_map(
ha_alter_info, altered_table, old_table,
ctx->new_table, user_table,
- add_cols, ctx->heap);
+ add_cols, ctx->heap, prebuilt);
ctx->add_cols = add_cols;
} else {
DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info, old_table));
@@ -3229,6 +3275,15 @@ op_ok:
DBUG_ASSERT(error == DB_SUCCESS);
+ /*
+ Adding compression dictionary <-> compressed table column links
+ to the SYS_ZIP_DICT_COLS table.
+ */
+ if (zip_dict_ids != 0) {
+ innobase_create_zip_dict_references(altered_table,
+ ctx->trx->table_id, zip_dict_ids, ctx->trx);
+ }
+
/* Commit the data dictionary transaction in order to release
the table locks on the system tables. This means that if
MySQL crashes while creating a new primary key inside
@@ -3947,7 +4002,7 @@ err_exit:
}
if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
- || (ha_alter_info->handler_flags
+ || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
== Alter_inplace_info::CHANGE_CREATE_OPTION
&& !innobase_need_rebuild(ha_alter_info, table))) {
@@ -4081,7 +4136,7 @@ found_col:
table_share->table_name.str,
flags, flags2,
fts_doc_col_no, add_fts_doc_id,
- add_fts_doc_id_idx));
+ add_fts_doc_id_idx, prebuilt));
}
/** Alter the table structure in-place with operations
@@ -4121,7 +4176,7 @@ ok_exit:
DBUG_RETURN(false);
}
- if (ha_alter_info->handler_flags
+ if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
== Alter_inplace_info::CHANGE_CREATE_OPTION
&& !innobase_need_rebuild(ha_alter_info, table)) {
goto ok_exit;
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index 02d2a2100a4..59cad1c2e7a 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -2958,15 +2958,26 @@ i_s_fts_deleted_generic_fill(
DBUG_RETURN(0);
}
- deleted = fts_doc_ids_create();
+ /* Prevent DDL to drop fts aux tables. */
+ rw_lock_s_lock(&dict_operation_lock);
user_table = dict_table_open_on_name(
fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
+ rw_lock_s_unlock(&dict_operation_lock);
+
+ DBUG_RETURN(0);
+ } else if (!dict_table_has_fts_index(user_table)) {
+ dict_table_close(user_table, FALSE, FALSE);
+
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
+ deleted = fts_doc_ids_create();
+
trx = trx_allocate_for_background();
trx->op_info = "Select for FTS DELETE TABLE";
@@ -2994,6 +3005,8 @@ i_s_fts_deleted_generic_fill(
dict_table_close(user_table, FALSE, FALSE);
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
@@ -3365,6 +3378,12 @@ i_s_fts_index_cache_fill(
DBUG_RETURN(0);
}
+ if (user_table->fts == NULL || user_table->fts->cache == NULL) {
+ dict_table_close(user_table, FALSE, FALSE);
+
+ DBUG_RETURN(0);
+ }
+
cache = user_table->fts->cache;
ut_a(cache);
@@ -3798,10 +3817,15 @@ i_s_fts_index_table_fill(
DBUG_RETURN(0);
}
+ /* Prevent DDL to drop fts aux tables. */
+ rw_lock_s_lock(&dict_operation_lock);
+
user_table = dict_table_open_on_name(
fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
@@ -3814,6 +3838,8 @@ i_s_fts_index_table_fill(
dict_table_close(user_table, FALSE, FALSE);
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
@@ -3946,16 +3972,25 @@ i_s_fts_config_fill(
DBUG_RETURN(0);
}
+ DEBUG_SYNC_C("i_s_fts_config_fille_check");
+
fields = table->field;
+ /* Prevent DDL to drop fts aux tables. */
+ rw_lock_s_lock(&dict_operation_lock);
+
user_table = dict_table_open_on_name(
fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
} else if (!dict_table_has_fts_index(user_table)) {
dict_table_close(user_table, FALSE, FALSE);
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
@@ -4011,6 +4046,8 @@ i_s_fts_config_fill(
dict_table_close(user_table, FALSE, FALSE);
+ rw_lock_s_unlock(&dict_operation_lock);
+
DBUG_RETURN(0);
}
diff --git a/storage/xtradb/handler/xtradb_i_s.cc b/storage/xtradb/handler/xtradb_i_s.cc
index 7078ab752c2..eb6637dad03 100644
--- a/storage/xtradb/handler/xtradb_i_s.cc
+++ b/storage/xtradb/handler/xtradb_i_s.cc
@@ -32,9 +32,11 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include <read0i_s.h>
#include <trx0i_s.h>
#include "srv0start.h" /* for srv_was_started */
+#include <btr0pcur.h> /* btr_pcur_t */
#include <btr0sea.h> /* btr_search_sys */
#include <log0recv.h> /* recv_sys */
#include <fil0fil.h>
+#include <dict0crea.h> /* for ZIP_DICT_MAX_* constants */
/* for XTRADB_RSEG table */
#include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */
@@ -131,6 +133,30 @@ field_store_string(
return(ret);
}
+static int field_store_blob(Field*, const char*, uint) __attribute__((unused));
+/** Auxiliary function to store (char*, len) value in MYSQL_TYPE_BLOB
+field.
+@return 0 on success */
+static
+int
+field_store_blob(
+ Field* field, /*!< in/out: target field for storage */
+ const char* data, /*!< in: pointer to data, or NULL */
+ uint data_len) /*!< in: data length */
+{
+ int ret;
+
+ if (data != NULL) {
+ ret = field->store(data, data_len, system_charset_info);
+ field->set_notnull();
+ } else {
+ ret = 0; /* success */
+ field->set_null();
+ }
+
+ return(ret);
+}
+
static
int
i_s_common_deinit(
@@ -603,3 +629,331 @@ UNIV_INTERN struct st_mysql_plugin i_s_xtradb_rseg =
STRUCT_FLD(version_info, INNODB_VERSION_STR),
STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
};
+
+
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+/************************************************************************/
+enum zip_dict_field_type
+{
+ zip_dict_field_id,
+ zip_dict_field_name,
+ zip_dict_field_zip_dict
+};
+
+static ST_FIELD_INFO xtradb_sys_zip_dict_fields_info[] =
+{
+ { STRUCT_FLD(field_name, "id"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+ { STRUCT_FLD(field_name, "name"),
+ STRUCT_FLD(field_length, ZIP_DICT_MAX_NAME_LENGTH),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+ { STRUCT_FLD(field_name, "zip_dict"),
+ STRUCT_FLD(field_length, ZIP_DICT_MAX_DATA_LENGTH),
+ STRUCT_FLD(field_type, MYSQL_TYPE_BLOB),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+ END_OF_ST_FIELD_INFO
+};
+
+/** Function to fill INFORMATION_SCHEMA.XTRADB_ZIP_DICT with information
+collected by scanning SYS_ZIP_DICT table.
+@return 0 on success */
+static
+int
+xtradb_i_s_dict_fill_sys_zip_dict(
+ THD* thd, /*!< in: thread */
+ ulint id, /*!< in: dict ID */
+ const char* name, /*!< in: dict name */
+ const char* data, /*!< in: dict data */
+ ulint data_len, /*!< in: dict data length */
+ TABLE* table_to_fill) /*!< in/out: fill this table */
+{
+ DBUG_ENTER("xtradb_i_s_dict_fill_sys_zip_dict");
+
+ Field** fields = table_to_fill->field;
+
+ OK(field_store_ulint(fields[zip_dict_field_id], id));
+ OK(field_store_string(fields[zip_dict_field_name], name));
+ OK(field_store_blob(fields[zip_dict_field_zip_dict], data,
+ data_len));
+
+ OK(schema_table_store_record(thd, table_to_fill));
+
+ DBUG_RETURN(0);
+}
+
+/** Function to populate INFORMATION_SCHEMA.XTRADB_ZIP_DICT table.
+Loop through each record in SYS_ZIP_DICT, and extract the column
+information and fill the INFORMATION_SCHEMA.XTRADB_ZIP_DICT table.
+@return 0 on success */
+static
+int
+xtradb_i_s_sys_zip_dict_fill_table(
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* ) /*!< in: condition (not used) */
+{
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mem_heap_t* heap;
+ mtr_t mtr;
+
+ DBUG_ENTER("xtradb_i_s_sys_zip_dict_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+ /* deny access to user without SUPER_ACL privilege */
+ if (check_global_access(thd, SUPER_ACL)) {
+ DBUG_RETURN(0);
+ }
+
+ heap = mem_heap_create(1000);
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+
+ rec = dict_startscan_system(&pcur, &mtr, SYS_ZIP_DICT);
+ ulint zip_size = dict_table_zip_size(pcur.btr_cur.index->table);
+
+ while (rec) {
+ const char* err_msg;
+ ulint id;
+ const char* name;
+ const char* data;
+ ulint data_len;
+
+ /* Extract necessary information from a SYS_ZIP_DICT row */
+ err_msg = dict_process_sys_zip_dict(
+ heap, zip_size, rec, &id, &name, &data, &data_len);
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+
+ if (!err_msg) {
+ xtradb_i_s_dict_fill_sys_zip_dict(
+ thd, id, name, data, data_len,
+ tables->table);
+ } else {
+ push_warning_printf(thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_CANT_FIND_SYSTEM_REC, "%s", err_msg);
+ }
+
+ mem_heap_empty(heap);
+
+ /* Get the next record */
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+ rec = dict_getnext_system(&pcur, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+ mem_heap_free(heap);
+
+ DBUG_RETURN(0);
+}
+
+static int i_s_xtradb_zip_dict_init(void* p)
+{
+ DBUG_ENTER("i_s_xtradb_zip_dict_init");
+
+ ST_SCHEMA_TABLE* schema = static_cast<ST_SCHEMA_TABLE*>(p);
+
+ schema->fields_info = xtradb_sys_zip_dict_fields_info;
+ schema->fill_table = xtradb_i_s_sys_zip_dict_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin i_s_xtradb_zip_dict =
+{
+ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+ STRUCT_FLD(info, &i_s_info),
+ STRUCT_FLD(name, "XTRADB_ZIP_DICT"),
+ STRUCT_FLD(author, PLUGIN_AUTHOR),
+ STRUCT_FLD(descr, "InnoDB compression dictionaries information"),
+ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+ STRUCT_FLD(init, i_s_xtradb_zip_dict_init),
+ STRUCT_FLD(deinit, i_s_common_deinit),
+ STRUCT_FLD(version, INNODB_VERSION_SHORT),
+ STRUCT_FLD(status_vars, NULL),
+ STRUCT_FLD(system_vars, NULL),
+ STRUCT_FLD(__reserved1, NULL),
+ STRUCT_FLD(flags, 0UL),
+};
+
+enum zip_dict_cols_field_type
+{
+ zip_dict_cols_field_table_id,
+ zip_dict_cols_field_column_pos,
+ zip_dict_cols_field_dict_id
+};
+
+static ST_FIELD_INFO xtradb_sys_zip_dict_cols_fields_info[] =
+{
+ { STRUCT_FLD(field_name, "table_id"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+ { STRUCT_FLD(field_name, "column_pos"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+ { STRUCT_FLD(field_name, "dict_id"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+ END_OF_ST_FIELD_INFO
+};
+
+/** Function to fill INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS with information
+collected by scanning SYS_ZIP_DICT_COLS table.
+@return 0 on success */
+static
+int
+xtradb_i_s_dict_fill_sys_zip_dict_cols(
+ THD* thd, /*!< in: thread */
+ ulint table_id, /*!< in: table ID */
+ ulint column_pos, /*!< in: column position */
+ ulint dict_id, /*!< in: dict ID */
+ TABLE* table_to_fill) /*!< in/out: fill this table */
+{
+ DBUG_ENTER("xtradb_i_s_dict_fill_sys_zip_dict_cols");
+
+ Field** fields = table_to_fill->field;
+
+ OK(field_store_ulint(fields[zip_dict_cols_field_table_id],
+ table_id));
+ OK(field_store_ulint(fields[zip_dict_cols_field_column_pos],
+ column_pos));
+ OK(field_store_ulint(fields[zip_dict_cols_field_dict_id],
+ dict_id));
+
+ OK(schema_table_store_record(thd, table_to_fill));
+
+ DBUG_RETURN(0);
+}
+
+/** Function to populate INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS table.
+Loop through each record in SYS_ZIP_DICT_COLS, and extract the column
+information and fill the INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS table.
+@return 0 on success */
+static
+int
+xtradb_i_s_sys_zip_dict_cols_fill_table(
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* ) /*!< in: condition (not used) */
+{
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mem_heap_t* heap;
+ mtr_t mtr;
+
+ DBUG_ENTER("xtradb_i_s_sys_zip_dict_cols_fill_table");
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+ /* deny access to user without SUPER_ACL privilege */
+ if (check_global_access(thd, SUPER_ACL)) {
+ DBUG_RETURN(0);
+ }
+
+ heap = mem_heap_create(1000);
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+
+ rec = dict_startscan_system(&pcur, &mtr, SYS_ZIP_DICT_COLS);
+
+ while (rec) {
+ const char* err_msg;
+ ulint table_id;
+ ulint column_pos;
+ ulint dict_id;
+
+ /* Extract necessary information from a SYS_ZIP_DICT_COLS
+ row */
+ err_msg = dict_process_sys_zip_dict_cols(
+ heap, rec, &table_id, &column_pos, &dict_id);
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+
+ if (!err_msg) {
+ xtradb_i_s_dict_fill_sys_zip_dict_cols(
+ thd, table_id, column_pos, dict_id,
+ tables->table);
+ } else {
+ push_warning_printf(thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_CANT_FIND_SYSTEM_REC, "%s", err_msg);
+ }
+
+ mem_heap_empty(heap);
+
+ /* Get the next record */
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+ rec = dict_getnext_system(&pcur, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+ mem_heap_free(heap);
+
+ DBUG_RETURN(0);
+}
+
+static int i_s_xtradb_zip_dict_cols_init(void* p)
+{
+ DBUG_ENTER("i_s_xtradb_zip_dict_cols_init");
+
+ ST_SCHEMA_TABLE* schema = static_cast<ST_SCHEMA_TABLE*>(p);
+
+ schema->fields_info = xtradb_sys_zip_dict_cols_fields_info;
+ schema->fill_table = xtradb_i_s_sys_zip_dict_cols_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin i_s_xtradb_zip_dict_cols =
+{
+ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+ STRUCT_FLD(info, &i_s_info),
+ STRUCT_FLD(name, "XTRADB_ZIP_DICT_COLS"),
+ STRUCT_FLD(author, PLUGIN_AUTHOR),
+ STRUCT_FLD(descr, "InnoDB compressed columns information"),
+ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+ STRUCT_FLD(init, i_s_xtradb_zip_dict_cols_init),
+ STRUCT_FLD(deinit, i_s_common_deinit),
+ STRUCT_FLD(version, INNODB_VERSION_SHORT),
+ STRUCT_FLD(status_vars, NULL),
+ STRUCT_FLD(system_vars, NULL),
+ STRUCT_FLD(__reserved1, NULL),
+ STRUCT_FLD(flags, 0UL),
+};
+#endif
diff --git a/storage/xtradb/handler/xtradb_i_s.h b/storage/xtradb/handler/xtradb_i_s.h
index 2f7552c565a..905d84587af 100644
--- a/storage/xtradb/handler/xtradb_i_s.h
+++ b/storage/xtradb/handler/xtradb_i_s.h
@@ -22,5 +22,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
extern struct st_mysql_plugin i_s_xtradb_read_view;
extern struct st_mysql_plugin i_s_xtradb_internal_hash_tables;
extern struct st_mysql_plugin i_s_xtradb_rseg;
+extern struct st_mysql_plugin i_s_xtradb_zip_dict;
+extern struct st_mysql_plugin i_s_xtradb_zip_dict_cols;
#endif /* XTRADB_I_S_H */
diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc
index d62b869df4f..a9f039d3f0f 100644
--- a/storage/xtradb/ibuf/ibuf0ibuf.cc
+++ b/storage/xtradb/ibuf/ibuf0ibuf.cc
@@ -938,7 +938,7 @@ ibuf_set_free_bits_low(
ulint space;
ulint page_no;
- if (!page_is_leaf(buf_block_get_frame(block))) {
+ if (!page_is_leaf(buf_nonnull_block_get_frame(block))) {
return;
}
@@ -1113,7 +1113,7 @@ ibuf_update_free_bits_zip(
page_no = buf_block_get_page_no(block);
zip_size = buf_block_get_zip_size(block);
- ut_a(page_is_leaf(buf_block_get_frame(block)));
+ ut_a(page_is_leaf(buf_nonnull_block_get_frame(block)));
ut_a(zip_size);
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index 7e1083a103c..8110fbc4808 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -1069,10 +1069,20 @@ buf_block_get_frame(
/*================*/
const buf_block_t* block) /*!< in: pointer to the control block */
MY_ATTRIBUTE((pure));
-# define buf_block_get_frame_fast(block) buf_block_get_frame(block)
+
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block, where block is known not to be
+NULL.
+@return pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_nonnull_block_get_frame(
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ MY_ATTRIBUTE((pure));
+
#else /* UNIV_DEBUG */
# define buf_block_get_frame(block) (block ? (block)->frame : 0)
-# define buf_block_get_frame_fast(block) (block)->frame
+# define buf_nonnull_block_get_frame(block) ((block)->frame)
#endif /* UNIV_DEBUG */
/*********************************************************************//**
Gets the space id of a block.
diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic
index b40285ae3f0..8a21f44a2ee 100644
--- a/storage/xtradb/include/buf0buf.ic
+++ b/storage/xtradb/include/buf0buf.ic
@@ -690,6 +690,19 @@ buf_block_get_frame(
{
SRV_CORRUPT_TABLE_CHECK(block, return(0););
+ return(buf_nonnull_block_get_frame(block));
+}
+
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block, where block is known not to be
+NULL.
+@return pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_nonnull_block_get_frame(
+/*========================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+{
switch (buf_block_get_state(block)) {
case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
@@ -711,6 +724,7 @@ buf_block_get_frame(
ok:
return((buf_frame_t*) block->frame);
}
+
#endif /* UNIV_DEBUG */
/*********************************************************************//**
diff --git a/storage/xtradb/include/data0type.h b/storage/xtradb/include/data0type.h
index 111664b0b52..f269c266efb 100644
--- a/storage/xtradb/include/data0type.h
+++ b/storage/xtradb/include/data0type.h
@@ -170,6 +170,9 @@ be less than 256 */
type when the column is true VARCHAR where
MySQL uses 2 bytes to store the data len;
for shorter VARCHARs MySQL uses only 1 byte */
+#define DATA_COMPRESSED 16384 /* this is ORed to the precise data
+ type when the column has COLUMN_FORMAT =
+ COMPRESSED attribute*/
/*-------------------------------------------*/
/* This many bytes we need to store the type information affecting the
@@ -500,6 +503,17 @@ dtype_print(
/*========*/
const dtype_t* type); /*!< in: type */
+/**
+Calculates the number of extra bytes needed for compression header
+depending on precise column type.
+@reval 0 if prtype does not include DATA_COMPRESSED flag
+@reval ZIP_COLUMN_HEADER_LENGTH if prtype includes DATA_COMPRESSED flag
+*/
+UNIV_INLINE
+ulint
+prtype_get_compression_extra(
+ ulint prtype); /*!< in: precise type */
+
/* Structure for an SQL data type.
If you add fields to this structure, be sure to initialize them everywhere.
This structure is initialized in the following functions:
diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic
index d489bef89a8..29dc480a19c 100644
--- a/storage/xtradb/include/data0type.ic
+++ b/storage/xtradb/include/data0type.ic
@@ -26,6 +26,7 @@ Created 1/16/1996 Heikki Tuuri
#include <string.h> /* strlen() */
#include "mach0data.h"
+#include "rem0types.h" /* ZIP_COLUMN_HEADER_LENGTH */
#ifndef UNIV_HOTBACKUP
# include "ha_prototypes.h"
@@ -709,3 +710,18 @@ dtype_get_sql_null_size(
0, 0));
#endif /* !UNIV_HOTBACKUP */
}
+
+/**
+Calculates the number of extra bytes needed for compression header
+depending on precise column type.
+@reval 0 if prtype does not include DATA_COMPRESSED flag
+@reval ZIP_COLUMN_HEADER_LENGTH if prtype includes DATA_COMPRESSED flag
+*/
+UNIV_INLINE
+ulint
+prtype_get_compression_extra(
+ ulint prtype) /*!< in: precise type */
+{
+ return (prtype & DATA_COMPRESSED) != 0 ?
+ ZIP_COLUMN_HEADER_LENGTH : 0;
+}
diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h
index 477e1150f43..d5bee886cbf 100644
--- a/storage/xtradb/include/dict0boot.h
+++ b/storage/xtradb/include/dict0boot.h
@@ -324,6 +324,38 @@ enum dict_fld_sys_datafiles_enum {
DICT_FLD__SYS_DATAFILES__PATH = 3,
DICT_NUM_FIELDS__SYS_DATAFILES = 4
};
+/* The columns in SYS_DICT */
+enum dict_col_sys_zip_dict_enum {
+ DICT_COL__SYS_ZIP_DICT__ID = 0,
+ DICT_COL__SYS_ZIP_DICT__NAME = 1,
+ DICT_COL__SYS_ZIP_DICT__DATA = 2,
+ DICT_NUM_COLS__SYS_ZIP_DICT = 3
+};
+/* The field numbers in the SYS_DICT clustered index */
+enum dict_fld_sys_zip_dict_enum {
+ DICT_FLD__SYS_ZIP_DICT__ID = 0,
+ DICT_FLD__SYS_ZIP_DICT__DB_TRX_ID = 1,
+ DICT_FLD__SYS_ZIP_DICT__DB_ROLL_PTR = 2,
+ DICT_FLD__SYS_ZIP_DICT__NAME = 3,
+ DICT_FLD__SYS_ZIP_DICT__DATA = 4,
+ DICT_NUM_FIELDS__SYS_ZIP_DICT = 5
+};
+/* The columns in SYS_DICT_COLS */
+enum dict_col_sys_zip_dict_cols_enum {
+ DICT_COL__SYS_ZIP_DICT_COLS__TABLE_ID = 0,
+ DICT_COL__SYS_ZIP_DICT_COLS__COLUMN_POS = 1,
+ DICT_COL__SYS_ZIP_DICT_COLS__DICT_ID = 2,
+ DICT_NUM_COLS__SYS_ZIP_DICT_COLS = 3
+};
+/* The field numbers in the SYS_DICT_COLS clustered index */
+enum dict_fld_sys_zip_dict_cols_enum {
+ DICT_FLD__SYS_ZIP_DICT_COLS__TABLE_ID = 0,
+ DICT_FLD__SYS_ZIP_DICT_COLS__COLUMN_POS = 1,
+ DICT_FLD__SYS_ZIP_DICT_COLS__DB_TRX_ID = 2,
+ DICT_FLD__SYS_ZIP_DICT_COLS__DB_ROLL_PTR = 3,
+ DICT_FLD__SYS_ZIP_DICT_COLS__DICT_ID = 4,
+ DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS = 5
+};
/* A number of the columns above occur in multiple tables. These are the
length of thos fields. */
diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h
index 6fd37b90799..686f56ad58c 100644
--- a/storage/xtradb/include/dict0crea.h
+++ b/storage/xtradb/include/dict0crea.h
@@ -163,6 +163,19 @@ UNIV_INTERN
dberr_t
dict_create_or_check_sys_tablespace(void);
/*=====================================*/
+
+#define ZIP_DICT_MAX_NAME_LENGTH 64
+/* Max window size (2^15) minus 262 */
+#define ZIP_DICT_MAX_DATA_LENGTH 32506
+
+/** Creates the zip_dict system table inside InnoDB
+at server bootstrap or server start if it is not found or is
+not of the right form.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_zip_dict(void);
+
/********************************************************************//**
Add a single tablespace definition to the data dictionary tables in the
database.
@@ -178,6 +191,84 @@ dict_create_add_tablespace_to_dictionary(
trx_t* trx, /*!< in: transaction */
bool commit); /*!< in: if true then commit the
transaction */
+
+/** Add a single compression dictionary definition to the SYS_ZIP_DICT
+InnoDB system table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_zip_dict(
+ const char* name, /*!< in: dict name */
+ ulint name_len, /*!< in: dict name length */
+ const char* data, /*!< in: dict data */
+ ulint data_len, /*!< in: dict data length */
+ trx_t* trx); /*!< in/out: transaction */
+
+/** Add a single compression dictionary reference to the SYS_ZIP_DICT_COLS
+InnoDB system table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_zip_dict_reference(
+ ulint table_id, /*!< in: table id */
+ ulint column_pos, /*!< in: column position */
+ ulint dict_id, /*!< in: dict id */
+ trx_t* trx); /*!< in/out: transaction */
+
+/** Get a single compression dictionary id for the given
+(table id, column pos) pair.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_id_by_reference(
+ ulint table_id, /*!< in: table id */
+ ulint column_pos, /*!< in: column position */
+ ulint* dict_id, /*!< out: dict id */
+ trx_t* trx); /*!< in/out: transaction */
+
+/** Get compression dictionary id for the given name.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_id_by_name(
+ const char* dict_name, /*!< in: dict name */
+ ulint dict_name_len, /*!< in: dict name length */
+ ulint* dict_id, /*!< out: dict id */
+ trx_t* trx); /*!< in/out: transaction */
+
+/** Get compression dictionary info (name and data) for the given id.
+Allocates memory for name and data on success.
+Must be freed with mem_free().
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_info_by_id(
+ ulint dict_id, /*!< in: dict id */
+ char** name, /*!< out: dict name */
+ ulint* name_len, /*!< out: dict name length */
+ char** data, /*!< out: dict data */
+ ulint* data_len, /*!< out: dict data length */
+ trx_t* trx); /*!< in/out: transaction */
+
+/** Remove a single compression dictionary from the data dictionary
+tables in the database.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_remove_zip_dict(
+ const char* name, /*!< in: dict name */
+ ulint name_len, /*!< in: dict name length */
+ trx_t* trx); /*!< in/out: transaction */
+
+/** Remove all compression dictionary references for the given table ID from
+the data dictionary tables in the database.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_remove_zip_dict_references_for_table(
+ ulint table_id, /*!< in: table id */
+ trx_t* trx); /*!< in/out: transaction */
+
/********************************************************************//**
Add a foreign key definition to the data dictionary tables.
@return error code or DB_SUCCESS */
diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h
index d8a6bc927b6..1b6110dd010 100644
--- a/storage/xtradb/include/dict0dict.h
+++ b/storage/xtradb/include/dict0dict.h
@@ -1871,6 +1871,52 @@ dict_table_set_corrupt_by_space(
ulint space_id,
ibool need_mutex);
+/** Insert a records into SYS_ZIP_DICT.
+@retval DB_SUCCESS if OK
+@retval dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_create_zip_dict(
+ const char* name, /*!< in: zip_dict name */
+ ulint name_len, /*!< in: zip_dict name length*/
+ const char* data, /*!< in: zip_dict data */
+ ulint data_len); /*!< in: zip_dict data length */
+
+/** Get single compression dictionary id for the given
+(table id, column pos) pair.
+@retval DB_SUCCESS if OK
+@retval DB_RECORD_NOT_FOUND if not found */
+UNIV_INTERN
+dberr_t
+dict_get_dictionary_id_by_key(
+ ulint table_id, /*!< in: table id */
+ ulint column_pos, /*!< in: column position */
+ ulint* dict_id); /*!< out: zip_dict id */
+
+/** Get compression dictionary info (name and data) for the given id.
+Allocates memory in name->str and data->str on success.
+Must be freed with mem_free().
+@retval DB_SUCCESS if OK
+@retval DB_RECORD_NOT_FOUND if not found */
+UNIV_INTERN
+dberr_t
+dict_get_dictionary_info_by_id(
+ ulint dict_id, /*!< in: table name */
+ char** name, /*!< out: dictionary name */
+ ulint* name_len, /*!< out: dictionary name length*/
+ char** data, /*!< out: dictionary data */
+ ulint* data_len); /*!< out: dictionary data length*/
+
+/** Delete a record in SYS_ZIP_DICT with the given name.
+@retval DB_SUCCESS if OK
+@retval DB_RECORD_NOT_FOUND if not found
+@retval DB_ROW_IS_REFERENCED if in use */
+UNIV_INTERN
+dberr_t
+dict_drop_zip_dict(
+ const char* name, /*!< in: zip_dict name */
+ ulint name_len); /*!< in: zip_dict name length*/
+
#ifndef UNIV_NONINL
#include "dict0dict.ic"
#endif
diff --git a/storage/xtradb/include/dict0load.h b/storage/xtradb/include/dict0load.h
index dcbc3de8e94..85e3e565637 100644
--- a/storage/xtradb/include/dict0load.h
+++ b/storage/xtradb/include/dict0load.h
@@ -44,6 +44,8 @@ enum dict_system_id_t {
SYS_FOREIGN_COLS,
SYS_TABLESPACES,
SYS_DATAFILES,
+ SYS_ZIP_DICT,
+ SYS_ZIP_DICT_COLS,
/* This must be last item. Defines the number of system tables. */
SYS_NUM_SYSTEM_TABLES
@@ -386,6 +388,33 @@ dict_process_sys_datafiles(
const rec_t* rec, /*!< in: current SYS_DATAFILES rec */
ulint* space, /*!< out: pace id */
const char** path); /*!< out: datafile path */
+
+/** This function parses a SYS_ZIP_DICT record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_zip_dict(
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ ulint zip_size, /*!< in: nonzero=compressed BLOB page size */
+ const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */
+ ulint* id, /*!< out: dict id */
+ const char** name, /*!< out: dict name */
+ const char** data, /*!< out: dict data */
+ ulint* data_len); /*!< out: dict data length */
+
+/** This function parses a SYS_ZIP_DICT_COLS record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_zip_dict_cols(
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */
+ ulint* table_id, /*!< out: table id */
+ ulint* column_pos, /*!< out: column position */
+ ulint* dict_id); /*!< out: dict id */
+
/********************************************************************//**
Get the filepath for a spaceid from SYS_DATAFILES. This function provides
a temporary heap which is used for the table lookup, but not for the path.
diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h
index 68d4d333245..3e2f359bbeb 100644
--- a/storage/xtradb/include/fts0fts.h
+++ b/storage/xtradb/include/fts0fts.h
@@ -375,6 +375,7 @@ extern bool fts_need_sync;
/** Variable specifying the table that has Fulltext index to display its
content through information schema table */
extern char* fts_internal_tbl_name;
+extern char* fts_internal_tbl_name2;
#define fts_que_graph_free(graph) \
do { \
@@ -823,6 +824,15 @@ void
fts_drop_orphaned_tables(void);
/*==========================*/
+/* Get parent table name if it's a fts aux table
+@param[in] aux_table_name aux table name
+@param[in] aux_table_len aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+ const char* aux_table_name,
+ ulint aux_table_len);
+
/******************************************************************//**
Since we do a horizontal split on the index table, we need to drop
all the split tables.
@@ -840,13 +850,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] table fts table
@param[in] unlock_cache whether unlock cache when write node
@param[in] wait whether wait for existing sync to finish
+@param[in] has_dict whether has dict operation lock
@return DB_SUCCESS on success, error code on failure. */
UNIV_INTERN
dberr_t
fts_sync_table(
dict_table_t* table,
bool unlock_cache,
- bool wait);
+ bool wait,
+ bool has_dict);
/****************************************************************//**
Free the query graph but check whether dict_sys->mutex is already
diff --git a/storage/xtradb/include/os0thread.h b/storage/xtradb/include/os0thread.h
index 815faf97319..671b9b7dc3f 100644
--- a/storage/xtradb/include/os0thread.h
+++ b/storage/xtradb/include/os0thread.h
@@ -131,14 +131,27 @@ os_thread_create_func(
os_thread_id_t* thread_id); /*!< out: id of the created
thread, or NULL */
+/**
+Waits until the specified thread completes and joins it. Its return value is
+ignored.
+
+@param thread thread to join */
+UNIV_INTERN
+void
+os_thread_join(
+ os_thread_t thread);
+
/*****************************************************************//**
Exits the current thread. */
UNIV_INTERN
void
os_thread_exit(
/*===========*/
- void* exit_value) /*!< in: exit value; in Windows this void*
+ void* exit_value, /*!< in: exit value; in Windows this void*
is cast as a DWORD */
+ bool detach = true) /*!< in: if true, the thread will be detached
+ right before exiting. If false, another thread
+ is responsible for joining this thread. */
UNIV_COLD MY_ATTRIBUTE((noreturn));
/*****************************************************************//**
Returns the thread identifier of current thread.
diff --git a/storage/xtradb/include/rem0types.h b/storage/xtradb/include/rem0types.h
index f8133f77466..5da96066f88 100644
--- a/storage/xtradb/include/rem0types.h
+++ b/storage/xtradb/include/rem0types.h
@@ -71,4 +71,7 @@ enum rec_format_enum {
};
typedef enum rec_format_enum rec_format_t;
+/** Compressed field header size in bytes */
+#define ZIP_COLUMN_HEADER_LENGTH 2
+
#endif
diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h
index 8e219d3e856..70da84640e5 100644
--- a/storage/xtradb/include/row0mysql.h
+++ b/storage/xtradb/include/row0mysql.h
@@ -41,6 +41,9 @@ struct SysIndexCallback;
extern ibool row_rollback_on_timeout;
+extern uint srv_compressed_columns_zip_level;
+extern ulong srv_compressed_columns_threshold;
+
struct row_prebuilt_t;
/*******************************************************************//**
@@ -51,6 +54,49 @@ row_mysql_prebuilt_free_blob_heap(
/*==============================*/
row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a
ha_innobase:: table handle */
+
+/** Frees the compress heap in prebuilt when no longer needed. */
+UNIV_INTERN
+void
+row_mysql_prebuilt_free_compress_heap(
+ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a
+ ha_innobase:: table handle */
+
+/** Uncompress blob/text/varchar column using zlib
+@return pointer to the uncompressed data */
+const byte*
+row_decompress_column(
+ const byte* data, /*!< in: data in innodb(compressed) format */
+ ulint *len, /*!< in: data length; out: length of
+ decompressed data*/
+ const byte* dict_data,
+ /*!< in: optional dictionary data used for
+ decompression */
+ ulint dict_data_len,
+ /*!< in: optional dictionary data length */
+ row_prebuilt_t* prebuilt);
+ /*!< in: use prebuilt->compress_heap only
+ here*/
+
+/** Compress blob/text/varchar column using zlib
+@return pointer to the compressed data */
+byte*
+row_compress_column(
+ const byte* data, /*!< in: data in mysql(uncompressed)
+ format */
+ ulint *len, /*!< in: data length; out: length of
+ compressed data*/
+ ulint lenlen, /*!< in: bytes used to store the length of
+ data */
+ const byte* dict_data,
+ /*!< in: optional dictionary data used for
+ compression */
+ ulint dict_data_len,
+ /*!< in: optional dictionary data length */
+ row_prebuilt_t* prebuilt);
+ /*!< in: use prebuilt->compress_heap only
+ here*/
+
/*******************************************************************//**
Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
format.
@@ -89,10 +135,21 @@ row_mysql_store_blob_ref(
to 4 bytes */
const void* data, /*!< in: BLOB data; if the value to store
is SQL NULL this should be NULL pointer */
- ulint len); /*!< in: BLOB length; if the value to store
+ ulint len, /*!< in: BLOB length; if the value to store
is SQL NULL this should be 0; remember
also to set the NULL bit in the MySQL record
header! */
+ bool need_decompression,
+ /*!< in: if the data need to be compressed*/
+ const byte* dict_data,
+ /*!< in: optional compression dictionary
+ data */
+ ulint dict_data_len,
+ /*!< in: optional compression dictionary data
+ length */
+ row_prebuilt_t* prebuilt);
+ /*<! in: use prebuilt->compress_heap only
+ here */
/*******************************************************************//**
Reads a reference to a BLOB in the MySQL format.
@return pointer to BLOB data */
@@ -103,8 +160,17 @@ row_mysql_read_blob_ref(
ulint* len, /*!< out: BLOB length */
const byte* ref, /*!< in: BLOB reference in the
MySQL format */
- ulint col_len); /*!< in: BLOB reference length
+ ulint col_len, /*!< in: BLOB reference length
(not BLOB length) */
+ bool need_compression,
+ /*!< in: if the data need to be
+ compressed*/
+ const byte* dict_data, /*!< in: optional compression
+ dictionary data */
+ ulint dict_data_len, /*!< in: optional compression
+ dictionary data length */
+ row_prebuilt_t* prebuilt); /*!< in: use prebuilt->compress_heap
+ only here */
/**************************************************************//**
Pad a column with spaces. */
UNIV_INTERN
@@ -152,7 +218,16 @@ row_mysql_store_col_in_innobase_format(
necessarily the length of the actual
payload data; if the column is a true
VARCHAR then this is irrelevant */
- ulint comp); /*!< in: nonzero=compact format */
+ ulint comp, /*!< in: nonzero=compact format */
+ bool need_compression,
+ /*!< in: if the data need to be
+ compressed */
+ const byte* dict_data, /*!< in: optional compression
+ dictionary data */
+ ulint dict_data_len, /*!< in: optional compression
+ dictionary data length */
+ row_prebuilt_t* prebuilt); /*!< in: use prebuilt->compress_heap
+ only here */
/****************************************************************//**
Handles user errors and lock waits detected by the database engine.
@return true if it was a lock wait and we should continue running the
@@ -646,6 +721,8 @@ struct mysql_row_templ_t {
ulint is_unsigned; /*!< if a column type is an integer
type and this field is != 0, then
it is an unsigned integer type */
+ bool compressed; /*!< if column format is compressed */
+ LEX_CSTRING zip_dict_data; /*!< associated compression dictionary */
};
#define MYSQL_FETCH_CACHE_SIZE 8
@@ -843,6 +920,8 @@ struct row_prebuilt_t {
in fetch_cache */
mem_heap_t* blob_heap; /*!< in SELECTS BLOB fields are copied
to this heap */
+ mem_heap_t* compress_heap; /*!< memory heap used to compress
+ /decompress blob column*/
mem_heap_t* old_vers_heap; /*!< memory heap where a previous
version is built in consistent read */
bool in_fts_query; /*!< Whether we are in a FTS query */
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index b6507b136bc..8ea92f69368 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -181,8 +181,10 @@ extern os_event_t srv_checkpoint_completed_event;
log tracking iteration */
extern os_event_t srv_redo_log_tracked_event;
-/** srv_redo_log_follow_thread spawn flag */
-extern bool srv_redo_log_thread_started;
+/** Whether the redo log tracker thread has been started. Does not take into
+account whether the tracking is currently enabled (see srv_track_changed_pages
+for that) */
+extern bool srv_redo_log_thread_started;
/* If the last data file is auto-extended, we add this many pages to it
at a time */
@@ -278,6 +280,10 @@ extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
extern ulint* srv_data_file_is_raw_partition;
+
+/** Whether the redo log tracking is currently enabled. Note that it is
+possible for the log tracker thread to be running and the tracking to be
+disabled */
extern my_bool srv_track_changed_pages;
extern ulonglong srv_max_bitmap_file_size;
@@ -507,6 +513,9 @@ extern ibool srv_priority_boost;
extern ulint srv_truncated_status_writes;
extern ulint srv_available_undo_logs;
+extern ulint srv_column_compressed;
+extern ulint srv_column_decompressed;
+
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
@@ -1105,6 +1114,8 @@ struct export_var_t{
ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
- purged view's min trx_id */
#endif /* UNIV_DEBUG */
+ ulint innodb_column_compressed; /*!< srv_column_compressed */
+ ulint innodb_column_decompressed; /*!< srv_column_decompressed */
};
/** Thread slot in the thread table. */
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 3646978ac15..fb103e2c9b8 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -44,10 +44,10 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 5
#define INNODB_VERSION_MINOR 6
-#define INNODB_VERSION_BUGFIX 31
+#define INNODB_VERSION_BUGFIX 32
#ifndef PERCONA_INNODB_VERSION
-#define PERCONA_INNODB_VERSION 77.0
+#define PERCONA_INNODB_VERSION 79.0
#endif
/* Enable UNIV_LOG_ARCHIVE in XtraDB */
diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc
index 9be93c3f5ac..15183b9c1cb 100644
--- a/storage/xtradb/log/log0log.cc
+++ b/storage/xtradb/log/log0log.cc
@@ -1005,6 +1005,7 @@ log_init(void)
log_sys->next_checkpoint_no = 0;
log_sys->last_checkpoint_lsn = log_sys->lsn;
+ log_sys->next_checkpoint_lsn = log_sys->lsn;
log_sys->n_pending_checkpoint_writes = 0;
@@ -1928,6 +1929,7 @@ log_complete_checkpoint(void)
log_sys->next_checkpoint_no++;
+ ut_ad(log_sys->next_checkpoint_lsn >= log_sys->last_checkpoint_lsn);
log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
log_sys->lsn - log_sys->last_checkpoint_lsn);
@@ -2015,11 +2017,17 @@ log_group_checkpoint(
ulint i;
ut_ad(!srv_read_only_mode);
+ ut_ad(srv_shutdown_state != SRV_SHUTDOWN_LAST_PHASE);
ut_ad(mutex_own(&(log_sys->mutex)));
ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
buf = group->checkpoint_buf;
+#ifdef UNIV_DEBUG
+ lsn_t old_next_checkpoint_lsn
+ = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
+ ut_ad(old_next_checkpoint_lsn <= log_sys->next_checkpoint_lsn);
+#endif /* UNIV_DEBUG */
mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
@@ -2294,6 +2302,7 @@ log_checkpoint(
return(FALSE);
}
+ ut_ad(oldest_lsn >= log_sys->next_checkpoint_lsn);
log_sys->next_checkpoint_lsn = oldest_lsn;
#ifdef UNIV_DEBUG
@@ -3612,13 +3621,15 @@ loop:
before proceeding further. */
srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE;
count = 0;
- while (buf_page_cleaner_is_active) {
- ++count;
- os_thread_sleep(100000);
- if (srv_print_verbose_log && count > 600) {
+ while (buf_page_cleaner_is_active || buf_lru_manager_is_active) {
+ if (srv_print_verbose_log && count == 0) {
ib_logf(IB_LOG_LEVEL_INFO,
"Waiting for page_cleaner to "
"finish flushing of buffer pool");
+ }
+ ++count;
+ os_thread_sleep(100000);
+ if (count > 600) {
count = 0;
}
}
@@ -3694,7 +3705,7 @@ loop:
/* Wake the log tracking thread which will then immediatelly
quit because of srv_shutdown_state value */
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
os_event_reset(srv_redo_log_tracked_event);
os_event_set(srv_checkpoint_completed_event);
}
@@ -3773,7 +3784,7 @@ loop:
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
/* Signal the log following thread to quit */
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
os_event_reset(srv_redo_log_tracked_event);
os_event_set(srv_checkpoint_completed_event);
}
@@ -3786,6 +3797,7 @@ loop:
ut_a(freed);
ut_a(lsn == log_sys->lsn);
+ ut_ad(lsn == log_sys->last_checkpoint_lsn);
if (lsn < srv_start_lsn) {
ib_logf(IB_LOG_LEVEL_ERROR,
diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc
index 65176548532..209ca743628 100644
--- a/storage/xtradb/log/log0online.cc
+++ b/storage/xtradb/log/log0online.cc
@@ -433,6 +433,7 @@ log_online_track_missing_on_startup(
current server startup */
{
ut_ad(last_tracked_lsn != tracking_start_lsn);
+ ut_ad(srv_track_changed_pages);
ib_logf(IB_LOG_LEVEL_WARN, "last tracked LSN in \'%s\' is " LSN_PF
", but the last checkpoint LSN is " LSN_PF ". This might be "
@@ -615,6 +616,8 @@ log_online_read_init(void)
compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0);
compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0);
+ ut_ad(srv_track_changed_pages);
+
log_bmp_sys = static_cast<log_bitmap_struct *>
(ut_malloc(sizeof(*log_bmp_sys)));
log_bmp_sys->read_buf_ptr = static_cast<byte *>
@@ -1089,10 +1092,15 @@ log_online_write_bitmap_page(
{
ibool success;
+ ut_ad(srv_track_changed_pages);
ut_ad(mutex_own(&log_bmp_sys->mutex));
/* Simulate a write error */
- DBUG_EXECUTE_IF("bitmap_page_write_error", return FALSE;);
+ DBUG_EXECUTE_IF("bitmap_page_write_error",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "simulating bitmap write error in "
+ "log_online_write_bitmap_page");
+ return FALSE;);
success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file,
block, log_bmp_sys->out.offset,
@@ -1182,7 +1190,9 @@ log_online_write_bitmap(void)
rbt_next(log_bmp_sys->modified_pages, bmp_tree_node);
DBUG_EXECUTE_IF("bitmap_page_2_write_error",
- DBUG_SET("+d,bitmap_page_write_error"););
+ ut_ad(bmp_tree_node); /* 2nd page must exist */
+ DBUG_SET("+d,bitmap_page_write_error");
+ DBUG_SET("-d,bitmap_page_2_write_error"););
}
rbt_reset(log_bmp_sys->modified_pages);
@@ -1203,15 +1213,11 @@ log_online_follow_redo_log(void)
log_group_t* group;
ibool result;
- mutex_enter(&log_bmp_sys->mutex);
-
- if (!srv_track_changed_pages) {
- mutex_exit(&log_bmp_sys->mutex);
- return FALSE;
- }
-
+ ut_ad(srv_track_changed_pages);
ut_ad(!srv_read_only_mode);
+ mutex_enter(&log_bmp_sys->mutex);
+
/* Grab the LSN of the last checkpoint, we will parse up to it */
mutex_enter(&(log_sys->mutex));
log_bmp_sys->end_lsn = log_sys->last_checkpoint_lsn;
@@ -1554,9 +1560,12 @@ log_online_diagnose_bitmap_eof(
/* It's a "Warning" here because it's not a fatal error
for the whole server */
ib_logf(IB_LOG_LEVEL_WARN,
- "changed page bitmap file \'%s\' does not "
- "contain a complete run at the end.",
- bitmap_file->name);
+ "changed page bitmap file \'%s\', size "
+ UINT64PF " bytes, does not "
+ "contain a complete run at the next read "
+ "offset " UINT64PF,
+ bitmap_file->name, bitmap_file->size,
+ bitmap_file->offset);
return FALSE;
}
}
@@ -1788,20 +1797,20 @@ log_online_purge_changed_page_bitmaps(
lsn = LSN_MAX;
}
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
/* User requests might happen with both enabled and disabled
tracking */
mutex_enter(&log_bmp_sys->mutex);
}
if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, LSN_MAX)) {
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
mutex_exit(&log_bmp_sys->mutex);
}
return TRUE;
}
- if (srv_track_changed_pages && lsn > log_bmp_sys->end_lsn) {
+ if (srv_redo_log_thread_started && lsn > log_bmp_sys->end_lsn) {
/* If we have to delete the current output file, close it
first. */
os_file_close(log_bmp_sys->out.file);
@@ -1834,7 +1843,7 @@ log_online_purge_changed_page_bitmaps(
}
}
- if (srv_track_changed_pages) {
+ if (srv_redo_log_thread_started) {
if (lsn > log_bmp_sys->end_lsn) {
lsn_t new_file_lsn;
if (lsn == LSN_MAX) {
@@ -1845,9 +1854,7 @@ log_online_purge_changed_page_bitmaps(
new_file_lsn = log_bmp_sys->end_lsn;
}
if (!log_online_rotate_bitmap_file(new_file_lsn)) {
- /* If file create failed, signal the log
- tracking thread to quit next time it wakes
- up. */
+ /* If file create failed, stop log tracking */
srv_track_changed_pages = FALSE;
}
}
diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc
index 2e9e8b6e75c..9e42fb5cc1c 100644
--- a/storage/xtradb/log/log0recv.cc
+++ b/storage/xtradb/log/log0recv.cc
@@ -386,12 +386,6 @@ recv_sys_init(
}
#ifndef UNIV_HOTBACKUP
- /* Initialize red-black tree for fast insertions into the
- flush_list during recovery process.
- As this initialization is done while holding the buffer pool
- mutex we perform it before acquiring recv_sys->mutex. */
- buf_flush_init_flush_rbt();
-
mutex_enter(&(recv_sys->mutex));
recv_sys->heap = mem_heap_create_typed(256,
@@ -481,9 +475,6 @@ recv_sys_debug_free(void)
recv_sys->last_block_buf_start = NULL;
mutex_exit(&(recv_sys->mutex));
-
- /* Free up the flush_rbt. */
- buf_flush_free_flush_rbt();
}
# endif /* UNIV_LOG_DEBUG */
@@ -3125,6 +3116,11 @@ recv_recovery_from_checkpoint_start_func(
byte* log_hdr_buf_base = reinterpret_cast<byte *>
(alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
dberr_t err;
+
+ /* Initialize red-black tree for fast insertions into the
+ flush_list during recovery process. */
+ buf_flush_init_flush_rbt();
+
ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr);
log_hdr_buf = static_cast<byte *>
@@ -3544,6 +3540,9 @@ recv_recovery_from_checkpoint_finish(void)
#ifndef UNIV_LOG_DEBUG
recv_sys_debug_free();
#endif
+ /* Free up the flush_rbt. */
+ buf_flush_free_flush_rbt();
+
/* Roll back any recovered data dictionary transactions, so
that the data dictionary tables will be free of any locks.
The data dictionary latch should guarantee that there is at
diff --git a/storage/xtradb/mach/mach0data.cc b/storage/xtradb/mach/mach0data.cc
index df68aab8a18..206434dc5ab 100644
--- a/storage/xtradb/mach/mach0data.cc
+++ b/storage/xtradb/mach/mach0data.cc
@@ -56,7 +56,18 @@ mach_parse_compressed(
*val = flag;
return(ptr + 1);
- } else if (flag < 0xC0UL) {
+ }
+
+ /* Workaround GCC bug
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77673:
+ the compiler moves mach_read_from_4 right to the beginning of the
+ function, causing and out-of-bounds read if we are reading a short
+ integer close to the end of buffer. */
+#if defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__clang__)
+ asm volatile("": : :"memory");
+#endif
+
+ if (flag < 0xC0UL) {
if (end_ptr < ptr + 2) {
return(NULL);
}
diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc
index aabdd06d76b..af826027efc 100644
--- a/storage/xtradb/os/os0thread.cc
+++ b/storage/xtradb/os/os0thread.cc
@@ -210,14 +210,42 @@ os_thread_create_func(
#endif
}
+/**
+Waits until the specified thread completes and joins it. Its return value is
+ignored.
+
+@param thread thread to join */
+UNIV_INTERN
+void
+os_thread_join(
+ os_thread_t thread)
+{
+ /*This function is currently only used to workaround glibc bug
+ described in http://bugs.mysql.com/bug.php?id=82886
+
+ On Windows, no workarounds are necessary, all threads
+ are "detached" upon thread exit (handle is closed), so we do
+ nothing.
+ */
+#ifndef _WIN32
+ int ret MY_ATTRIBUTE((unused)) = pthread_join(thread, NULL);
+
+ /* Waiting on already-quit threads is allowed */
+ ut_ad(ret == 0 || ret == ESRCH);
+#endif
+}
+
/*****************************************************************//**
Exits the current thread. */
UNIV_INTERN
void
os_thread_exit(
/*===========*/
- void* exit_value) /*!< in: exit value; in Windows this void*
+ void* exit_value, /*!< in: exit value; in Windows this void*
is cast as a DWORD */
+ bool detach) /*!< in: if true, the thread will be detached
+ right before exiting. If false, another thread
+ is responsible for joining this thread. */
{
#ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "Thread exits, id %lu\n",
@@ -233,7 +261,8 @@ os_thread_exit(
#ifdef __WIN__
ExitThread((DWORD) exit_value);
#else
- pthread_detach(pthread_self());
+ if (detach)
+ pthread_detach(pthread_self());
pthread_exit(exit_value);
#endif
}
diff --git a/storage/xtradb/rem/rem0rec.cc b/storage/xtradb/rem/rem0rec.cc
index 75e0bba343d..f8eb8076547 100644
--- a/storage/xtradb/rem/rem0rec.cc
+++ b/storage/xtradb/rem/rem0rec.cc
@@ -323,7 +323,8 @@ rec_init_offsets_comp_ordinary(
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
- if (UNIV_UNLIKELY(col->len > 255)
+ if (UNIV_UNLIKELY(col->len > 255 -
+ prtype_get_compression_extra(col->prtype))
|| UNIV_UNLIKELY(col->mtype
== DATA_BLOB)) {
if (len & 0x80) {
@@ -844,8 +845,12 @@ rec_get_converted_size_comp_prefix_low(
continue;
}
- ut_ad(len <= col->len || col->mtype == DATA_BLOB
- || (col->len == 0 && col->mtype == DATA_VARCHAR));
+ ut_ad(len <= col->len || col->mtype == DATA_BLOB ||
+ ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY
+ || col->mtype == DATA_VARMYSQL)
+ && (col->len == 0
+ || len <= col->len +
+ prtype_get_compression_extra(col->prtype))));
fixed_len = field->fixed_len;
if (temp && fixed_len
@@ -877,7 +882,9 @@ rec_get_converted_size_comp_prefix_low(
ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
extra_size += 2;
} else if (len < 128
- || (col->len < 256 && col->mtype != DATA_BLOB)) {
+ || (col->len < 256 -
+ prtype_get_compression_extra(col->prtype)
+ && col->mtype != DATA_BLOB)) {
extra_size++;
} else {
/* For variable-length columns, we look up the
@@ -1272,12 +1279,16 @@ rec_convert_dtuple_to_rec_comp(
*lens-- = (byte) (len >> 8) | 0xc0;
*lens-- = (byte) len;
} else {
- ut_ad(len <= dtype_get_len(type)
+ ut_ad(len <= dtype_get_len(type) +
+ prtype_get_compression_extra(
+ dtype_get_prtype(type))
|| dtype_get_mtype(type) == DATA_BLOB
|| !strcmp(index->name,
FTS_INDEX_TABLE_IND_NAME));
if (len < 128
- || (dtype_get_len(type) < 256
+ || (dtype_get_len(type) < 256 -
+ prtype_get_compression_extra(
+ dtype_get_prtype(type))
&& dtype_get_mtype(type) != DATA_BLOB)) {
*lens-- = (byte) len;
diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc
index 51cc53ff260..cb47d605623 100644
--- a/storage/xtradb/row/row0ftsort.cc
+++ b/storage/xtradb/row/row0ftsort.cc
@@ -961,7 +961,7 @@ fts_parallel_merge(
CloseHandle(psort_info->thread_hdl);
#endif /*__WIN__ */
- os_thread_exit(NULL);
+ os_thread_exit(NULL, false);
OS_THREAD_DUMMY_RETURN;
}
diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc
index b84e3113ea8..3c5d5773aee 100644
--- a/storage/xtradb/row/row0log.cc
+++ b/storage/xtradb/row/row0log.cc
@@ -613,7 +613,7 @@ row_log_table_delete(
&old_pk_extra_size);
ut_ad(old_pk_extra_size < 0x100);
- mrec_size = 4 + old_pk_size;
+ mrec_size = 6 + old_pk_size;
/* Log enough prefix of the BLOB unless both the
old and new table are in COMPACT or REDUNDANT format,
@@ -643,8 +643,8 @@ row_log_table_delete(
*b++ = static_cast<byte>(old_pk_extra_size);
/* Log the size of external prefix we saved */
- mach_write_to_2(b, ext_size);
- b += 2;
+ mach_write_to_4(b, ext_size);
+ b += 4;
rec_convert_dtuple_to_temp(
b + old_pk_extra_size, new_index,
@@ -2268,14 +2268,14 @@ row_log_table_apply_op(
break;
case ROW_T_DELETE:
- /* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
- if (mrec + 4 >= mrec_end) {
+ /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
+ if (mrec + 6 >= mrec_end) {
return(NULL);
}
extra_size = *mrec++;
- ext_size = mach_read_from_2(mrec);
- mrec += 2;
+ ext_size = mach_read_from_4(mrec);
+ mrec += 4;
ut_ad(mrec < mrec_end);
/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc
index 83f6ccb90c3..e397053949e 100644
--- a/storage/xtradb/row/row0merge.cc
+++ b/storage/xtradb/row/row0merge.cc
@@ -525,7 +525,12 @@ row_merge_buf_add(
dfield_set_len(field, len);
}
- ut_ad(len <= col->len || col->mtype == DATA_BLOB);
+ ut_ad(len <= col->len || col->mtype == DATA_BLOB ||
+ ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY
+ || col->mtype == DATA_VARMYSQL)
+ && (col->len == 0
+ || len <= col->len +
+ prtype_get_compression_extra(col->prtype))));
fixed_len = ifield->fixed_len;
if (fixed_len && !dict_table_is_comp(index->table)
@@ -554,7 +559,9 @@ row_merge_buf_add(
} else if (dfield_is_ext(field)) {
extra_size += 2;
} else if (len < 128
- || (col->len < 256 && col->mtype != DATA_BLOB)) {
+ || (col->len < 256 -
+ prtype_get_compression_extra(col->prtype)
+ && col->mtype != DATA_BLOB)) {
extra_size++;
} else {
/* For variable-length columns, we look up the
@@ -1995,7 +2002,7 @@ wait_again:
/* Sync fts cache for other fts indexes to keep all
fts indexes consistent in sync_doc_id. */
err = fts_sync_table(const_cast<dict_table_t*>(new_table),
- false, true);
+ false, true, false);
if (err == DB_SUCCESS) {
fts_update_next_doc_id(
@@ -3823,6 +3830,13 @@ wait_again:
" exited when creating FTS"
" index '%s'",
indexes[i]->name);
+ } else {
+ for (j = 0; j < FTS_NUM_AUX_INDEX;
+ j++) {
+
+ os_thread_join(merge_info[j]
+ .thread_hdl);
+ }
}
} else {
/* This cannot report duplicates; an
diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc
index 733c7fef374..46bf523750c 100644
--- a/storage/xtradb/row/row0mysql.cc
+++ b/storage/xtradb/row/row0mysql.cc
@@ -63,11 +63,54 @@ Created 9/17/2000 Heikki Tuuri
#include "row0import.h"
#include "m_string.h"
#include "my_sys.h"
+#include "zlib.h"
#include <algorithm>
/** Provide optional 4.x backwards compatibility for 5.0 and above */
UNIV_INTERN ibool row_rollback_on_timeout = FALSE;
+/**
+Z_NO_COMPRESSION = 0
+Z_BEST_SPEED = 1
+Z_BEST_COMPRESSION = 9
+Z_DEFAULT_COMPRESSION = -1
+Compression level to be used by zlib for compressed-blob columns.
+Settable by user.
+*/
+UNIV_INTERN uint srv_compressed_columns_zip_level = DEFAULT_COMPRESSION_LEVEL;
+/**
+(Z_FILTERED | Z_HUFFMAN_ONLY | Z_RLE | Z_FIXED | Z_DEFAULT_STRATEGY)
+
+The strategy parameter is used to tune the compression algorithm. Use the
+value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only
+(no string match), or Z_RLE to limit match distances to one
+(run-length encoding). Filtered data consists mostly of small values with a
+somewhat random distribution. In this case, the compression algorithm is
+tuned to compress them better.
+The effect of Z_FILTERED is to force more Huffman coding and less string
+matching; it is somewhat intermediate between Z_DEFAULT_STRATEGY and
+Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as Z_HUFFMAN_ONLY,
+but give better compression for PNG image data. The strategy parameter only
+affects the compression ratio but not the correctness of the compressed
+output even if it is not set appropriately. Z_FIXED prevents the use of
+dynamic Huffman codes, allowing for a simpler decoder for special
+applications.
+*/
+const uint srv_compressed_columns_zlib_strategy = Z_DEFAULT_STRATEGY;
+/** Compress the column if the data length exceeds this value. */
+UNIV_INTERN ulong srv_compressed_columns_threshold = 96;
+/**
+Determine if zlib needs to compute adler32 value for the compressed data.
+This variables is similar to page_zip_zlib_wrap, but only used by
+compressed blob columns.
+*/
+const bool srv_compressed_columns_zlib_wrap = true;
+/**
+Determine if zlib will use custom memory allocation functions based on
+InnoDB memory heap routines (mem_heap_t*).
+*/
+const bool srv_compressed_columns_zlib_use_heap = false;
/** Chain node of the list of tables to drop in the background. */
struct row_mysql_drop_t{
char* table_name; /*!< table name */
@@ -171,6 +214,17 @@ row_mysql_prebuilt_free_blob_heap(
prebuilt->blob_heap = NULL;
}
+/** Frees the compress heap in prebuilt when no longer needed. */
+UNIV_INTERN
+void
+row_mysql_prebuilt_free_compress_heap(
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct of a
+ ha_innobase:: table handle */
+{
+ mem_heap_free(prebuilt->compress_heap);
+ prebuilt->compress_heap = NULL;
+}
+
/*******************************************************************//**
Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
format.
@@ -227,6 +281,425 @@ row_mysql_read_true_varchar(
return(field + 1);
}
+/**
+ Compressed BLOB header format:
+ ---------------------------------------------------------------
+ | reserved | wrap | algorithm | len-len | compressed | unused |
+ | [1] | [1] | [5] | [3] | [1] | [5] |
+ ---------------------------------------------------------------
+ | 0 0 | 1 1 | 2 6 | 7 9 | 10 10 | 11 15 |
+ ---------------------------------------------------------------
+ * 'reserved' bit is planned to be used in future versions of the BLOB
+ header. In this version it must always be
+ 'default_zip_column_reserved_value' (0).
+ * 'wrap' identifies if compression algorithm calculated a checksum
+ (adler32 in case of zlib) and appended it to the compressed data.
+ * 'algorithm' identifies which algoritm was used to compress this BLOB.
+ Currently, the only value 'default_zip_column_algorithm_value' (0) is
+ supported.
+ * 'len-len' field identifies the length of the column length data portion
+ followed by this header (see below).
+ * If 'compressed' bit is set to 1, then this header is immediately followed
+ by 1..8 bytes (depending on the value of 'len-len' bitfield) which
+ determine original (uncompressed) block size. These 'len-len' bytes are
+ followed by compressed representation of the original data.
+ * If 'compressed' bit is set to 0, every other bitfield ('wrap',
+ 'algorithm' and 'le-len') must be ignored. In this case the header is
+ immediately followed by uncompressed (original) data.
+*/
+
+/**
+ Currently the only supported value for the 'reserved' field is
+ false (0).
+*/
+static const bool default_zip_column_reserved_value = false;
+
+/**
+ Currently the only supported value for the 'algorithm' field is 0, which
+ means 'zlib'.
+*/
+static const uint default_zip_column_algorithm_value = 0;
+
+static const size_t zip_column_prefix_max_length =
+ ZIP_COLUMN_HEADER_LENGTH + 8;
+static const size_t zip_column_header_length = ZIP_COLUMN_HEADER_LENGTH;
+
+/* 'reserved', bit 0 */
+static const uint zip_column_reserved = 0;
+/* 0000 0000 0000 0001 */
+static const uint zip_column_reserved_mask = 0x0001;
+
+/* 'wrap', bit 1 */
+static const uint zip_column_wrap = 1;
+/* 0000 0000 0000 0010 */
+static const uint zip_column_wrap_mask = 0x0002;
+
+/* 'algorithm', bit 2,3,4,5,6 */
+static const uint zip_column_algorithm = 2;
+/* 0000 0000 0111 1100 */
+static const uint zip_column_algorithm_mask = 0x007C;
+
+/* 'len-len', bit 7,8,9 */
+static const uint zip_column_data_length = 7;
+/* 0000 0011 1000 0000 */
+static const uint zip_column_data_length_mask = 0x0380;
+
+/* 'compressed', bit 10 */
+static const uint zip_column_compressed = 10;
+/* 0000 0100 0000 0000 */
+static const uint zip_column_compressed_mask = 0x0400;
+
+/** Updates compressed block header with the given components */
+static void
+column_set_compress_header(
+ byte* data,
+ bool compressed,
+ ulint lenlen,
+ uint alg,
+ bool wrap,
+ bool reserved)
+{
+ ulint header = 0;
+ header |= (compressed << zip_column_compressed);
+ header |= (lenlen << zip_column_data_length);
+ header |= (alg << zip_column_algorithm);
+ header |= (wrap << zip_column_wrap);
+ header |= (reserved << zip_column_reserved);
+ mach_write_to_2(data, header);
+}
+
+/** Parse compressed block header into components */
+static void
+column_get_compress_header(
+ const byte* data,
+ bool* compressed,
+ ulint* lenlen,
+ uint* alg,
+ bool* wrap,
+ bool* reserved
+)
+{
+ ulint header = mach_read_from_2(data);
+ *compressed = ((header & zip_column_compressed_mask) >>
+ zip_column_compressed);
+ *lenlen = ((header & zip_column_data_length_mask) >>
+ zip_column_data_length);
+ *alg = ((header & zip_column_algorithm_mask) >>
+ zip_column_algorithm);
+ *wrap = ((header & zip_column_wrap_mask) >>
+ zip_column_wrap);
+ *reserved = ((header & zip_column_reserved_mask) >>
+ zip_column_reserved);
+}
+
+/** Allocate memory for zlib. */
+static
+void*
+column_zip_zalloc(
+ void* opaque, /*!< in/out: memory heap */
+ uInt items, /*!< in: number of items to allocate */
+ uInt size) /*!< in: size of an item in bytes */
+{
+ return(mem_heap_zalloc(static_cast<mem_heap_t*>(opaque),
+ items * size));
+}
+
+/** Deallocate memory for zlib. */
+static
+void
+column_zip_free(
+ void* opaque MY_ATTRIBUTE((unused)), /*!< in: memory heap */
+ void* address MY_ATTRIBUTE((unused))) /*!< in: object to free */
+{
+}
+
+/** Configure the zlib allocator to use the given memory heap. */
+UNIV_INTERN
+void
+column_zip_set_alloc(
+ void* stream, /*!< in/out: zlib stream */
+ mem_heap_t* heap) /*!< in: memory heap to use */
+{
+ z_stream* strm = static_cast<z_stream*>(stream);
+
+ if (srv_compressed_columns_zlib_use_heap) {
+ strm->zalloc = column_zip_zalloc;
+ strm->zfree = column_zip_free;
+ strm->opaque = heap;
+ } else {
+ strm->zalloc = (alloc_func)0;
+ strm->zfree = (free_func)0;
+ strm->opaque = (voidpf)0;
+ }
+}
+
+/** Compress blob/text/varchar column using zlib
+@return pointer to the compressed data */
+byte*
+row_compress_column(
+ const byte* data, /*!< in: data in mysql(uncompressed)
+ format */
+ ulint *len, /*!< in: data length; out: length of
+ compressed data*/
+ ulint lenlen, /*!< in: bytes used to store the length of
+ data */
+ const byte* dict_data,
+ /*!< in: optional dictionary data used for
+ compression */
+ ulint dict_data_len,
+ /*!< in: optional dictionary data length */
+ row_prebuilt_t* prebuilt)
+ /*!< in: use prebuilt->compress_heap only
+ here*/
+{
+ int err = 0;
+ ulint comp_len = *len;
+ ulint buf_len = *len + zip_column_prefix_max_length;
+ byte* buf;
+ byte* ptr;
+ z_stream c_stream;
+ bool wrap = srv_compressed_columns_zlib_wrap;
+
+ int window_bits = wrap ? MAX_WBITS : -MAX_WBITS;
+
+ if (!prebuilt->compress_heap) {
+ prebuilt->compress_heap =
+ mem_heap_create(max(UNIV_PAGE_SIZE, buf_len));
+ }
+
+ buf = static_cast<byte*>(mem_heap_zalloc(
+ prebuilt->compress_heap,buf_len));
+
+ if (*len < srv_compressed_columns_threshold ||
+ srv_compressed_columns_zip_level == Z_NO_COMPRESSION)
+ goto do_not_compress;
+
+ ptr = buf + zip_column_header_length + lenlen;
+
+ /*init deflate object*/
+ c_stream.next_in = const_cast<Bytef*>(data);
+ c_stream.avail_in = *len;
+ c_stream.next_out = ptr;
+ c_stream.avail_out = comp_len;
+
+ column_zip_set_alloc(&c_stream, prebuilt->compress_heap);
+
+ err = deflateInit2(&c_stream, srv_compressed_columns_zip_level,
+ Z_DEFLATED, window_bits, MAX_MEM_LEVEL,
+ srv_compressed_columns_zlib_strategy);
+ ut_a(err == Z_OK);
+
+ if (dict_data != 0 && dict_data_len != 0) {
+ err = deflateSetDictionary(&c_stream, dict_data,
+ dict_data_len);
+ ut_a(err == Z_OK);
+ }
+
+ err = deflate(&c_stream, Z_FINISH);
+ if (err != Z_STREAM_END) {
+ deflateEnd(&c_stream);
+ if (err == Z_OK)
+ err = Z_BUF_ERROR;
+ } else {
+ comp_len = c_stream.total_out;
+ err = deflateEnd(&c_stream);
+ }
+
+ switch (err) {
+ case Z_OK:
+ break;
+ case Z_BUF_ERROR:
+ /* data after compress is larger than uncompressed data*/
+ break;
+ default:
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "failed to compress the column, error: %d\n", err);
+ }
+
+ /* make sure the compressed data size is smaller than
+ uncompressed data */
+ if (err == Z_OK &&
+ *len > (comp_len + zip_column_header_length + lenlen)) {
+ column_set_compress_header(buf, true, lenlen - 1,
+ default_zip_column_algorithm_value, wrap,
+ default_zip_column_reserved_value);
+ ptr = buf + zip_column_header_length;
+ /*store the uncompressed data length*/
+ switch (lenlen) {
+ case 1:
+ mach_write_to_1(ptr, *len);
+ break;
+ case 2:
+ mach_write_to_2(ptr, *len);
+ break;
+ case 3:
+ mach_write_to_3(ptr, *len);
+ break;
+ case 4:
+ mach_write_to_4(ptr, *len);
+ break;
+ default:
+ ut_error;
+ }
+
+ *len = comp_len + zip_column_header_length + lenlen;
+ return buf;
+ }
+
+do_not_compress:
+ ptr = buf;
+ column_set_compress_header(ptr, false, 0,
+ default_zip_column_algorithm_value, false,
+ default_zip_column_reserved_value);
+ ptr += zip_column_header_length;
+ memcpy(ptr, data, *len);
+ *len += zip_column_header_length;
+ return buf;
+}
+
+/** Uncompress blob/text/varchar column using zlib
+@return pointer to the uncompressed data */
+const byte*
+row_decompress_column(
+ const byte* data, /*!< in: data in innodb(compressed) format */
+ ulint *len, /*!< in: data length; out: length of
+ decompressed data*/
+ const byte* dict_data,
+ /*!< in: optional dictionary data used for
+ decompression */
+ ulint dict_data_len,
+ /*!< in: optional dictionary data length */
+ row_prebuilt_t* prebuilt)
+ /*!< in: use prebuilt->compress_heap only
+ here*/
+{
+ ulint buf_len = 0;
+ byte* buf;
+ int err = 0;
+ int window_bits = 0;
+ z_stream d_stream;
+ bool is_compressed = false;
+ bool wrap = false;
+ bool reserved = false;
+ ulint lenlen = 0;
+ uint alg = 0;
+
+ ut_ad(*len != ULINT_UNDEFINED);
+ ut_ad(*len >= zip_column_header_length);
+
+ column_get_compress_header(data, &is_compressed, &lenlen, &alg,
+ &wrap, &reserved);
+
+ if (reserved != default_zip_column_reserved_value) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "unsupported compressed BLOB header format\n");
+ }
+
+ if (alg != default_zip_column_algorithm_value) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "unsupported 'algorithm' value in the"
+ " compressed BLOB header\n");
+ }
+
+ ut_a(lenlen < 4);
+
+ data += zip_column_header_length;
+ if (!is_compressed) { /* column not compressed */
+ *len -= zip_column_header_length;
+ return data;
+ }
+
+ lenlen++;
+
+ ulint comp_len = *len - zip_column_header_length - lenlen;
+
+ ulint uncomp_len = 0;
+ switch (lenlen) {
+ case 1:
+ uncomp_len = mach_read_from_1(data);
+ break;
+ case 2:
+ uncomp_len = mach_read_from_2(data);
+ break;
+ case 3:
+ uncomp_len = mach_read_from_3(data);
+ break;
+ case 4:
+ uncomp_len = mach_read_from_4(data);
+ break;
+ default:
+ ut_error;
+ }
+
+ data += lenlen;
+
+ /* data is compressed, decompress it*/
+ if (!prebuilt->compress_heap) {
+ prebuilt->compress_heap =
+ mem_heap_create(max(UNIV_PAGE_SIZE, uncomp_len));
+ }
+
+ buf_len = uncomp_len;
+ buf = static_cast<byte*>(mem_heap_zalloc(
+ prebuilt->compress_heap, buf_len));
+
+ /* init d_stream */
+ d_stream.next_in = const_cast<Bytef*>(data);
+ d_stream.avail_in = comp_len;
+ d_stream.next_out = buf;
+ d_stream.avail_out = buf_len;
+
+ column_zip_set_alloc(&d_stream, prebuilt->compress_heap);
+
+ window_bits = wrap ? MAX_WBITS : -MAX_WBITS;
+ err = inflateInit2(&d_stream, window_bits);
+ ut_a(err == Z_OK);
+
+ err = inflate(&d_stream, Z_FINISH);
+ if (err == Z_NEED_DICT) {
+ ut_a(dict_data != 0 && dict_data_len != 0);
+ err = inflateSetDictionary(&d_stream, dict_data,
+ dict_data_len);
+ ut_a(err == Z_OK);
+ err = inflate(&d_stream, Z_FINISH);
+ }
+
+ if (err != Z_STREAM_END) {
+ inflateEnd(&d_stream);
+ if (err == Z_BUF_ERROR && d_stream.avail_in == 0)
+ err = Z_DATA_ERROR;
+ } else {
+ buf_len = d_stream.total_out;
+ err = inflateEnd(&d_stream);
+ }
+
+ switch (err) {
+ case Z_OK:
+ break;
+ case Z_BUF_ERROR:
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "zlib buf error, this shouldn't happen\n");
+ break;
+ default:
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "failed to decompress column, error: %d\n", err);
+ }
+
+ if (err == Z_OK) {
+ if (buf_len != uncomp_len) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "failed to decompress blob column, may"
+ " be corrupted\n");
+ }
+ *len = buf_len;
+ return buf;
+ }
+
+ *len -= (zip_column_header_length + lenlen);
+ return data;
+}
+
+
/*******************************************************************//**
Stores a reference to a BLOB in the MySQL format. */
UNIV_INTERN
@@ -240,10 +713,21 @@ row_mysql_store_blob_ref(
to 4 bytes */
const void* data, /*!< in: BLOB data; if the value to store
is SQL NULL this should be NULL pointer */
- ulint len) /*!< in: BLOB length; if the value to store
+ ulint len, /*!< in: BLOB length; if the value to store
is SQL NULL this should be 0; remember
also to set the NULL bit in the MySQL record
header! */
+ bool need_decompression,
+ /*!< in: if the data need to be compressed*/
+ const byte* dict_data,
+ /*!< in: optional compression dictionary
+ data */
+ ulint dict_data_len,
+ /*!< in: optional compression dictionary data
+ length */
+ row_prebuilt_t* prebuilt)
+ /*<! in: use prebuilt->compress_heap only
+ here */
{
/* MySQL might assume the field is set to zero except the length and
the pointer fields */
@@ -255,13 +739,28 @@ row_mysql_store_blob_ref(
In 32-bit architectures we only use the first 4 bytes of the pointer
slot. */
- ut_a(col_len - 8 > 1 || len < 256);
- ut_a(col_len - 8 > 2 || len < 256 * 256);
- ut_a(col_len - 8 > 3 || len < 256 * 256 * 256);
+ ut_a(col_len - 8 > 1 ||
+ len < 256 +
+ (need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0));
+ ut_a(col_len - 8 > 2 ||
+ len < 256 * 256 +
+ (need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0));
+ ut_a(col_len - 8 > 3 ||
+ len < 256 * 256 * 256 +
+ (need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0));
- mach_write_to_n_little_endian(dest, col_len - 8, len);
+ const byte *ptr = NULL;
- memcpy(dest + col_len - 8, &data, sizeof data);
+ if (need_decompression)
+ ptr = row_decompress_column((const byte*)data, &len,
+ dict_data, dict_data_len, prebuilt);
+
+ if (ptr)
+ memcpy(dest + col_len - 8, &ptr, sizeof ptr);
+ else
+ memcpy(dest + col_len - 8, &data, sizeof data);
+
+ mach_write_to_n_little_endian(dest, col_len - 8, len);
}
/*******************************************************************//**
@@ -274,15 +773,32 @@ row_mysql_read_blob_ref(
ulint* len, /*!< out: BLOB length */
const byte* ref, /*!< in: BLOB reference in the
MySQL format */
- ulint col_len) /*!< in: BLOB reference length
+ ulint col_len, /*!< in: BLOB reference length
(not BLOB length) */
+ bool need_compression,
+ /*!< in: if the data need to be
+ compressed*/
+ const byte* dict_data, /*!< in: optional compression
+ dictionary data */
+ ulint dict_data_len, /*!< in: optional compression
+ dictionary data length */
+ row_prebuilt_t* prebuilt) /*!< in: use prebuilt->compress_heap
+ only here */
{
- byte* data;
+ byte* data = NULL;
+ byte* ptr = NULL;
*len = mach_read_from_n_little_endian(ref, col_len - 8);
memcpy(&data, ref + col_len - 8, sizeof data);
+ if (need_compression) {
+ ptr = row_compress_column(data, len, col_len - 8, dict_data,
+ dict_data_len, prebuilt);
+ if (ptr)
+ data = ptr;
+ }
+
return(data);
}
@@ -365,7 +881,16 @@ row_mysql_store_col_in_innobase_format(
necessarily the length of the actual
payload data; if the column is a true
VARCHAR then this is irrelevant */
- ulint comp) /*!< in: nonzero=compact format */
+ ulint comp, /*!< in: nonzero=compact format */
+ bool need_compression,
+ /*!< in: if the data need to be
+ compressed*/
+ const byte* dict_data, /*!< in: optional compression
+ dictionary data */
+ ulint dict_data_len, /*!< in: optional compression
+ dictionary data length */
+ row_prebuilt_t* prebuilt) /*!< in: use prebuilt->compress_heap
+ only here */
{
const byte* ptr = mysql_data;
const dtype_t* dtype;
@@ -418,8 +943,14 @@ row_mysql_store_col_in_innobase_format(
lenlen = 2;
}
- ptr = row_mysql_read_true_varchar(&col_len, mysql_data,
- lenlen);
+ const byte* tmp_ptr = row_mysql_read_true_varchar(
+ &col_len, mysql_data, lenlen);
+ if (need_compression)
+ ptr = row_compress_column(tmp_ptr, &col_len,
+ lenlen, dict_data, dict_data_len,
+ prebuilt);
+ else
+ ptr = tmp_ptr;
} else {
/* Remove trailing spaces from old style VARCHAR
columns. */
@@ -501,7 +1032,9 @@ row_mysql_store_col_in_innobase_format(
}
} else if (type == DATA_BLOB && row_format_col) {
- ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
+ ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len,
+ need_compression, dict_data, dict_data_len,
+ prebuilt);
}
dfield_set_data(dfield, ptr, col_len);
@@ -559,7 +1092,11 @@ row_mysql_convert_row_to_innobase(
TRUE, /* MySQL row format data */
mysql_rec + templ->mysql_col_offset,
templ->mysql_col_len,
- dict_table_is_comp(prebuilt->table));
+ dict_table_is_comp(prebuilt->table),
+ templ->compressed,
+ reinterpret_cast<const byte*>(
+ templ->zip_dict_data.str),
+ templ->zip_dict_data.length, prebuilt);
next_column:
;
}
@@ -905,6 +1442,10 @@ row_prebuilt_free(
mem_heap_free(prebuilt->blob_heap);
}
+ if (prebuilt->compress_heap) {
+ mem_heap_free(prebuilt->compress_heap);
+ }
+
if (prebuilt->old_vers_heap) {
mem_heap_free(prebuilt->old_vers_heap);
}
@@ -1334,6 +1875,9 @@ row_insert_for_mysql(
return(DB_READ_ONLY);
}
+ if (UNIV_LIKELY_NULL(prebuilt->compress_heap))
+ mem_heap_empty(prebuilt->compress_heap);
+
trx->op_info = "inserting";
row_mysql_delay_if_needed();
@@ -2729,6 +3273,10 @@ loop:
return(n_tables + n_tables_dropped);
}
+ DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
+ os_thread_sleep(5000000);
+ );
+
table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
DICT_ERR_IGNORE_NONE);
@@ -2739,6 +3287,16 @@ loop:
goto already_dropped;
}
+ if (!table->to_be_dropped) {
+ /* There is a scenario: the old table is dropped
+ just after it's added into drop list, and new
+ table with the same name is created, then we try
+ to drop the new table in background. */
+ dict_table_close(table, FALSE, FALSE);
+
+ goto already_dropped;
+ }
+
ut_a(!table->can_be_evicted);
dict_table_close(table, FALSE, FALSE);
@@ -2869,6 +3427,12 @@ row_mysql_table_id_reassign(
pars_info_add_ull_literal(info, "old_id", table->id);
pars_info_add_ull_literal(info, "new_id", *new_id);
+ /* As micro-SQL does not support int4 == int8 comparisons,
+ old and new IDs are added again under different names as
+ int4 values*/
+ pars_info_add_int4_literal(info, "old_id_narrow", table->id);
+ pars_info_add_int4_literal(info, "new_id_narrow", *new_id);
+
err = que_eval_sql(
info,
"PROCEDURE RENUMBER_TABLE_PROC () IS\n"
@@ -2879,6 +3443,8 @@ row_mysql_table_id_reassign(
" WHERE TABLE_ID = :old_id;\n"
"UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
" WHERE TABLE_ID = :old_id;\n"
+ "UPDATE SYS_ZIP_DICT_COLS SET TABLE_ID = :new_id_narrow\n"
+ " WHERE TABLE_ID = :old_id_narrow;\n"
"END;\n", FALSE, trx);
return(err);
@@ -3645,6 +4211,12 @@ next_rec:
pars_info_add_ull_literal(info, "old_id", table->id);
pars_info_add_ull_literal(info, "new_id", new_id);
+ /* As micro-SQL does not support int4 == int8 comparisons,
+ old and new IDs are added again under different names as
+ int4 values*/
+ pars_info_add_int4_literal(info, "old_id_narrow", table->id);
+ pars_info_add_int4_literal(info, "new_id_narrow", new_id);
+
err = que_eval_sql(info,
"PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
"BEGIN\n"
@@ -3656,6 +4228,9 @@ next_rec:
"UPDATE SYS_INDEXES"
" SET TABLE_ID = :new_id, SPACE = :new_space\n"
" WHERE TABLE_ID = :old_id;\n"
+ "UPDATE SYS_ZIP_DICT_COLS\n"
+ " SET TABLE_ID = :new_id_narrow\n"
+ " WHERE TABLE_ID = :old_id_narrow;\n"
"END;\n"
, FALSE, trx);
@@ -4006,6 +4581,13 @@ row_drop_table_for_mysql(
}
}
+
+ DBUG_EXECUTE_IF("row_drop_table_add_to_background",
+ row_add_table_to_background_drop_list(table->name);
+ err = DB_SUCCESS;
+ goto funct_exit;
+ );
+
/* TODO: could we replace the counter n_foreign_key_checks_running
with lock checks on the table? Acquire here an exclusive lock on the
table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
@@ -4276,6 +4858,19 @@ row_drop_table_for_mysql(
filepath = fil_make_ibd_name(tablename, false);
}
+ /* Remove all compression dictionary references for the
+ table */
+ err = dict_create_remove_zip_dict_references_for_table(
+ table->id, trx);
+ if (err != DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Error: (%s) not "
+ "able to remove compression dictionary "
+ "references for table %s", ut_strerr(err),
+ tablename);
+
+ goto funct_exit;
+ }
+
if (dict_table_has_fts_index(table)
|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
ut_ad(table->n_ref_count == 0);
@@ -4623,6 +5218,19 @@ loop:
row_mysql_lock_data_dictionary(trx);
while ((table_name = dict_get_first_table_name_in_db(name))) {
+ /* Drop parent table if it is a fts aux table, to
+ avoid accessing dropped fts aux tables in information
+ scheam when parent table still exists.
+ Note: Drop parent table will drop fts aux tables. */
+ char* parent_table_name;
+ parent_table_name = fts_get_parent_table_name(
+ table_name, strlen(table_name));
+
+ if (parent_table_name != NULL) {
+ mem_free(table_name);
+ table_name = parent_table_name;
+ }
+
ut_a(memcmp(table_name, name, namelen) == 0);
table = dict_table_open_on_name(
diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc
index a42923de8ca..ad1e9e2bf9d 100644
--- a/storage/xtradb/row/row0sel.cc
+++ b/storage/xtradb/row/row0sel.cc
@@ -2456,9 +2456,11 @@ row_sel_convert_mysql_key_to_innobase(
if (UNIV_LIKELY(!is_null)) {
buf = row_mysql_store_col_in_innobase_format(
dfield, buf,
- FALSE, /* MySQL key value format col */
+ /* MySQL key value format col */
+ FALSE,
key_ptr + data_offset, data_len,
- dict_table_is_comp(index->table));
+ dict_table_is_comp(index->table),
+ false, 0, 0 ,0);
ut_a(buf <= original_buf + buf_len);
}
@@ -2551,12 +2553,16 @@ row_sel_store_row_id_to_prebuilt(
#ifdef UNIV_DEBUG
/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
- row_sel_field_store_in_mysql_format_func(dest,templ,idx,field,src,len)
+# define row_sel_field_store_in_mysql_format( \
+ dest,templ,idx,field,src,len,prebuilt) \
+ row_sel_field_store_in_mysql_format_func \
+ (dest,templ,idx,field,src,len, prebuilt)
#else /* UNIV_DEBUG */
/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
- row_sel_field_store_in_mysql_format_func(dest,templ,src,len)
+# define row_sel_field_store_in_mysql_format( \
+ dest,templ,idx,field,src,len,prebuilt) \
+ row_sel_field_store_in_mysql_format_func \
+ (dest,templ,src,len, prebuilt)
#endif /* UNIV_DEBUG */
/**************************************************************//**
@@ -2586,7 +2592,10 @@ row_sel_field_store_in_mysql_format_func(
templ->icp_rec_field_no */
#endif /* UNIV_DEBUG */
const byte* data, /*!< in: data to store */
- ulint len) /*!< in: length of the data */
+ ulint len, /*!< in: length of the data */
+ row_prebuilt_t* prebuilt)
+ /*!< in: use prebuilt->compress_heap
+ only here */
{
byte* ptr;
#ifdef UNIV_DEBUG
@@ -2630,6 +2639,15 @@ row_sel_field_store_in_mysql_format_func(
field_end = dest + templ->mysql_col_len;
if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
+ /* If this is a compressed column,
+ decompress it first */
+ if (templ->compressed)
+ data = row_decompress_column(data, &len,
+ reinterpret_cast<const byte*>(
+ templ->zip_dict_data.str),
+ templ->zip_dict_data.length,
+ prebuilt);
+
/* This is a >= 5.0.3 type true VARCHAR. Store the
length of the data to the first byte or the first
two bytes of dest. */
@@ -2680,7 +2698,11 @@ row_sel_field_store_in_mysql_format_func(
already copied to the buffer in row_sel_store_mysql_rec */
row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
- len);
+ len, templ->compressed,
+ reinterpret_cast<const byte*>(
+ templ->zip_dict_data.str),
+ templ->zip_dict_data.length,
+ prebuilt);
break;
case DATA_MYSQL:
@@ -2833,7 +2855,7 @@ row_sel_store_mysql_field_func(
row_sel_field_store_in_mysql_format(
mysql_rec + templ->mysql_col_offset,
- templ, index, field_no, data, len);
+ templ, index, field_no, data, len, prebuilt);
if (heap != prebuilt->blob_heap) {
mem_heap_free(heap);
@@ -2883,7 +2905,7 @@ row_sel_store_mysql_field_func(
row_sel_field_store_in_mysql_format(
mysql_rec + templ->mysql_col_offset,
- templ, index, field_no, data, len);
+ templ, index, field_no, data, len, prebuilt);
}
ut_ad(len != UNIV_SQL_NULL);
@@ -2931,6 +2953,9 @@ row_sel_store_mysql_rec(
prebuilt->blob_heap = NULL;
}
+ if (UNIV_LIKELY_NULL(prebuilt->compress_heap))
+ mem_heap_empty(prebuilt->compress_heap);
+
for (i = 0; i < prebuilt->n_template; i++) {
const mysql_row_templ_t*templ = &prebuilt->mysql_template[i];
const ulint field_no
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
index a0dd32c203f..4a709160ea6 100644
--- a/storage/xtradb/srv/srv0mon.cc
+++ b/storage/xtradb/srv/srv0mon.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -1367,7 +1367,10 @@ srv_mon_set_module_control(
module */
set_current_module = FALSE;
} else if (module_id == MONITOR_ALL_COUNTER) {
- continue;
+ if (!(innodb_counter_info[ix].monitor_type
+ & MONITOR_GROUP_MODULE)) {
+ continue;
+ }
} else {
/* Hitting the next module, stop */
break;
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index 7886d705ae4..c04b446f480 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -195,6 +195,9 @@ UNIV_INTERN char** srv_data_file_names = NULL;
/* size in database pages */
UNIV_INTERN ulint* srv_data_file_sizes = NULL;
+/** Whether the redo log tracking is currently enabled. Note that it is
+possible for the log tracker thread to be running and the tracking to be
+disabled */
UNIV_INTERN my_bool srv_track_changed_pages = FALSE;
UNIV_INTERN ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024;
@@ -793,6 +796,9 @@ UNIV_INTERN os_event_t srv_checkpoint_completed_event;
UNIV_INTERN os_event_t srv_redo_log_tracked_event;
+/** Whether the redo log tracker thread has been started. Does not take into
+account whether the tracking is currently enabled (see srv_track_changed_pages
+for that) */
UNIV_INTERN bool srv_redo_log_thread_started = false;
/*********************************************************************//**
@@ -2431,13 +2437,8 @@ DECLARE_THREAD(srv_redo_log_follow_thread)(
os_event_wait(srv_checkpoint_completed_event);
os_event_reset(srv_checkpoint_completed_event);
-#ifdef UNIV_DEBUG
- if (!srv_track_changed_pages) {
- continue;
- }
-#endif
-
- if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
+ if (srv_track_changed_pages
+ && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
if (!log_online_follow_redo_log()) {
/* TODO: sync with I_S log tracking status? */
ib_logf(IB_LOG_LEVEL_ERROR,
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index dbddb4a7a1c..82beb83192f 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -2780,6 +2780,12 @@ files_checked:
}
}
+ /* Create the SYS_ZIP_DICT system table */
+ err = dict_create_or_check_sys_zip_dict();
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
srv_is_being_started = FALSE;
ut_a(trx_purge_state() == PURGE_STATE_INIT);
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index bd1e74becaa..318c248f742 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1,5 +1,5 @@
/* Copyright (c) 2003, 2013, Oracle and/or its affiliates
- Copyright (c) 2009, 2014, SkySQL Ab.
+ Copyright (c) 2009, 2016, MariaDB
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index bcb07daae03..b4fc6297afd 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1,5 +1,5 @@
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
- Copyright (c) 2009, 2013, Monty Program Ab
+ Copyright (c) 2009, 2016, MariaDB
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh
index b562354a586..8d2e78edbd8 100644
--- a/support-files/mysql.server.sh
+++ b/support-files/mysql.server.sh
@@ -319,7 +319,7 @@ case "$mode" in
then
# Give extra arguments to mysqld with the my.cnf file. This script
# may be overwritten at next upgrade.
- $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" >/dev/null 2>&1 &
+ $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" &
wait_for_ready; return_value=$?
# Make lock for RedHat / SuSE
diff --git a/tests/async_queries.c b/tests/async_queries.c
index 76e884e6a69..a8889fc8d5a 100644
--- a/tests/async_queries.c
+++ b/tests/async_queries.c
@@ -425,7 +425,7 @@ main(int argc, char *argv[])
event_dispatch();
- free(sds);
+ my_free(sds);
mysql_library_end();
diff --git a/win/packaging/CMakeLists.txt b/win/packaging/CMakeLists.txt
index 0535a486d57..1682bae6986 100644
--- a/win/packaging/CMakeLists.txt
+++ b/win/packaging/CMakeLists.txt
@@ -24,10 +24,13 @@ ENDIF()
SET(MANUFACTURER "MariaDB Corporation Ab")
-FIND_PATH(WIX_DIR heat.exe
- "$ENV{ProgramFiles}/WiX Toolset v3.9/bin"
- "$ENV{ProgramFiles}/WiX Toolset v3.10/bin"
-)
+SET(WIX_BIN_PATHS)
+FOREACH(WIX_VER 3.9 3.10 3.11)
+ LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles}/WiX Toolset v${WIX_VER}/bin")
+ LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles} (x86)/WiX Toolset v${WIX_VER}/bin")
+ENDFOREACH()
+
+FIND_PATH(WIX_DIR heat.exe ${WIX_BIN_PATHS})
SET(CPACK_WIX_PACKAGE_BASE_NAME "MariaDB")
IF(CMAKE_SIZEOF_VOID_P EQUAL 4)
SET(CPACK_WIX_UPGRADE_CODE "49EB7A6A-1CEF-4A1E-9E89-B9A4993963E3")
diff --git a/win/packaging/create_msi.cmake.in b/win/packaging/create_msi.cmake.in
index c2ab648a6db..1f847a39695 100644
--- a/win/packaging/create_msi.cmake.in
+++ b/win/packaging/create_msi.cmake.in
@@ -434,6 +434,7 @@ EXECUTE_PROCESS(
IF(SIGNCODE)
EXECUTE_PROCESS(
COMMAND ${SIGNTOOL_EXECUTABLE} sign ${SIGNTOOL_PARAMETERS}
+ /d ${CPACK_PACKAGE_FILE_NAME}.msi
${CPACK_PACKAGE_FILE_NAME}.msi
)
ENDIF()