summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2016-07-26 11:10:50 +1000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2016-07-26 11:11:27 +1000
commit848e5f5c0bd836781b379f6ade56f16433bd3853 (patch)
tree6f03ed5cde97aedc762215c14d23ee5305998a2b
parent8b7110bfacd291cdd4a25420f07b70d3bae7a647 (diff)
parentd8fb874fc40989cb9675e56ca80b3b64e6fa2ee3 (diff)
downloadmongo-848e5f5c0bd836781b379f6ade56f16433bd3853.tar.gz
Merge branch 'mongodb-3.4' into mongodb-3.2
Choosing the content of the 3.4 branch, rather than manually resolving merge conflicts.
-rw-r--r--NEWS.MONGODB2523
-rw-r--r--SConstruct41
-rw-r--r--bench/wtperf/config.c212
-rw-r--r--bench/wtperf/runners/evict-btree-1.wtperf11
-rw-r--r--bench/wtperf/runners/evict-btree-stress.wtperf12
-rw-r--r--bench/wtperf/runners/evict-lsm-1.wtperf12
-rw-r--r--bench/wtperf/wtperf.c41
-rw-r--r--bench/wtperf/wtperf.h28
-rw-r--r--bench/wtperf/wtperf_opt.i2
-rw-r--r--build_posix/Make.subdirs2
-rw-r--r--build_posix/aclocal/options.m410
-rw-r--r--build_posix/aclocal/strict.m474
-rw-r--r--build_posix/aclocal/types.m41
-rw-r--r--build_posix/configure.ac.in55
-rwxr-xr-xbuild_posix/makemake22
-rw-r--r--build_win/filelist.win172
-rw-r--r--dist/api_data.py28
-rw-r--r--dist/api_err.py10
-rw-r--r--dist/dist.py15
-rw-r--r--dist/filelist63
-rw-r--r--dist/flags.py10
-rw-r--r--dist/log.py2
-rwxr-xr-xdist/s_c_test_create105
-rw-r--r--dist/s_copyright.list2
-rwxr-xr-xdist/s_define2
-rw-r--r--dist/s_define.list5
-rwxr-xr-xdist/s_docs3
-rwxr-xr-xdist/s_funcs2
-rwxr-xr-xdist/s_label10
-rw-r--r--dist/s_label_loop.py28
-rwxr-xr-xdist/s_longlines6
-rwxr-xr-xdist/s_prototypes64
-rwxr-xr-xdist/s_stat4
-rwxr-xr-xdist/s_string4
-rw-r--r--dist/s_string.ok40
-rwxr-xr-xdist/s_style26
-rwxr-xr-xdist/s_typedef2
-rwxr-xr-xdist/s_whitespace2
-rwxr-xr-xdist/s_win35
-rw-r--r--dist/stat_data.py49
-rw-r--r--examples/c/Makefile.am3
-rw-r--r--examples/c/ex_access.c6
-rw-r--r--examples/c/ex_all.c9
-rw-r--r--examples/c/ex_async.c20
-rw-r--r--examples/c/ex_backup.c8
-rw-r--r--examples/c/ex_call_center.c6
-rw-r--r--examples/c/ex_config.c91
-rw-r--r--examples/c/ex_config_parse.c26
-rw-r--r--examples/c/ex_cursor.c12
-rw-r--r--examples/c/ex_data_source.c15
-rw-r--r--examples/c/ex_encrypt.c10
-rw-r--r--examples/c/ex_event_handler.c17
-rw-r--r--examples/c/ex_extending.c13
-rw-r--r--examples/c/ex_extractor.c2
-rw-r--r--examples/c/ex_file_system.c975
-rw-r--r--examples/c/ex_hello.c20
-rw-r--r--examples/c/ex_log.c9
-rw-r--r--examples/c/ex_pack.c20
-rw-r--r--examples/c/ex_process.c20
-rw-r--r--examples/c/ex_schema.c82
-rw-r--r--examples/c/ex_scope.c17
-rw-r--r--examples/c/ex_stat.c9
-rw-r--r--examples/c/ex_sync.c7
-rw-r--r--examples/c/ex_thread.c4
-rw-r--r--examples/java/com/wiredtiger/examples/ex_all.java12
-rw-r--r--examples/java/com/wiredtiger/examples/ex_schema.java76
-rw-r--r--ext/compressors/zlib/zlib_compress.c10
-rw-r--r--ext/datasources/helium/helium.c4
-rw-r--r--ext/test/kvs_bdb/kvs_bdb.c2
-rw-r--r--lang/java/java_doc.i1
-rw-r--r--lang/java/src/com/wiredtiger/db/PackFormatInputStream.java7
-rw-r--r--lang/java/src/com/wiredtiger/db/PackInputStream.java35
-rw-r--r--lang/java/src/com/wiredtiger/db/PackOutputStream.java2
-rw-r--r--lang/java/wiredtiger.i65
-rw-r--r--src/async/async_worker.c5
-rw-r--r--src/block/block_ckpt.c16
-rw-r--r--src/block/block_compact.c4
-rw-r--r--src/block/block_ext.c89
-rw-r--r--src/block/block_map.c48
-rw-r--r--src/block/block_mgr.c33
-rw-r--r--src/block/block_open.c42
-rw-r--r--src/block/block_read.c61
-rw-r--r--src/block/block_vrfy.c56
-rw-r--r--src/block/block_write.c54
-rw-r--r--src/bloom/bloom.c2
-rw-r--r--src/btree/bt_curnext.c15
-rw-r--r--src/btree/bt_curprev.c18
-rw-r--r--src/btree/bt_cursor.c50
-rw-r--r--src/btree/bt_debug.c119
-rw-r--r--src/btree/bt_delete.c7
-rw-r--r--src/btree/bt_discard.c55
-rw-r--r--src/btree/bt_handle.c40
-rw-r--r--src/btree/bt_huffman.c25
-rw-r--r--src/btree/bt_io.c2
-rw-r--r--src/btree/bt_misc.c16
-rw-r--r--src/btree/bt_page.c23
-rw-r--r--src/btree/bt_read.c6
-rw-r--r--src/btree/bt_rebalance.c5
-rw-r--r--src/btree/bt_ret.c2
-rw-r--r--src/btree/bt_slvg.c45
-rw-r--r--src/btree/bt_split.c189
-rw-r--r--src/btree/bt_stat.c3
-rw-r--r--src/btree/bt_sync.c27
-rw-r--r--src/btree/bt_vrfy.c59
-rw-r--r--src/btree/bt_vrfy_dsk.c9
-rw-r--r--src/btree/col_modify.c20
-rw-r--r--src/btree/col_srch.c30
-rw-r--r--src/btree/row_key.c6
-rw-r--r--src/btree/row_modify.c13
-rw-r--r--src/btree/row_srch.c18
-rw-r--r--src/cache/cache_las.c11
-rw-r--r--src/checksum/checksum.c (renamed from src/support/cksum.c)2
-rw-r--r--src/checksum/power8/LICENSE.TXT (renamed from src/support/power8/LICENSE.TXT)0
-rw-r--r--src/checksum/power8/README.md (renamed from src/support/power8/README.md)0
-rw-r--r--src/checksum/power8/crc32.S (renamed from src/support/power8/crc32.S)2
-rw-r--r--src/checksum/power8/crc32_constants.h (renamed from src/support/power8/crc32_constants.h)0
-rw-r--r--src/checksum/power8/crc32_wrapper.c (renamed from src/support/power8/crc32_wrapper.c)5
-rw-r--r--src/checksum/power8/ppc-opcode.h (renamed from src/support/power8/ppc-opcode.h)0
-rw-r--r--src/config/config_collapse.c4
-rw-r--r--src/config/config_def.c63
-rw-r--r--src/conn/api_strerror.c2
-rw-r--r--src/conn/conn_api.c245
-rw-r--r--src/conn/conn_cache.c30
-rw-r--r--src/conn/conn_cache_pool.c3
-rw-r--r--src/conn/conn_dhandle.c34
-rw-r--r--src/conn/conn_handle.c8
-rw-r--r--src/conn/conn_log.c42
-rw-r--r--src/conn/conn_stat.c32
-rw-r--r--src/cursor/cur_backup.c91
-rw-r--r--src/cursor/cur_bulk.c1
-rw-r--r--src/cursor/cur_dump.c23
-rw-r--r--src/cursor/cur_index.c16
-rw-r--r--src/cursor/cur_join.c1515
-rw-r--r--src/cursor/cur_json.c135
-rw-r--r--src/cursor/cur_std.c5
-rw-r--r--src/cursor/cur_table.c3
-rw-r--r--src/docs/Doxyfile2
-rw-r--r--src/docs/backup.dox44
-rw-r--r--src/docs/cursor-join.dox25
-rw-r--r--src/docs/custom-file-systems.dox47
-rw-r--r--src/docs/error-handling.dox3
-rw-r--r--src/docs/examples.dox6
-rw-r--r--src/docs/in-memory.dox12
-rw-r--r--src/docs/programming.dox2
-rw-r--r--src/docs/spell.ok6
-rw-r--r--src/docs/tune-cache.dox4
-rw-r--r--src/docs/wtperf.dox2
-rw-r--r--src/evict/evict_file.c5
-rw-r--r--src/evict/evict_lru.c789
-rw-r--r--src/include/api.h2
-rw-r--r--src/include/bitstring.i14
-rw-r--r--src/include/block.h10
-rw-r--r--src/include/btmem.h83
-rw-r--r--src/include/btree.h2
-rw-r--r--src/include/btree.i72
-rw-r--r--src/include/btree_cmp.i20
-rw-r--r--src/include/cache.h51
-rw-r--r--src/include/cache.i16
-rw-r--r--src/include/cell.i26
-rw-r--r--src/include/column.i24
-rw-r--r--src/include/config.h71
-rw-r--r--src/include/connection.h45
-rw-r--r--src/include/ctype.i69
-rw-r--r--src/include/cursor.h56
-rw-r--r--src/include/dhandle.h5
-rw-r--r--src/include/extern.h131
-rw-r--r--src/include/extern_posix.h31
-rw-r--r--src/include/extern_win.h32
-rw-r--r--src/include/flags.h30
-rw-r--r--src/include/log.h7
-rw-r--r--src/include/meta.h5
-rw-r--r--src/include/misc.h13
-rw-r--r--src/include/misc.i245
-rw-r--r--src/include/os.h170
-rw-r--r--src/include/os_fhandle.i176
-rw-r--r--src/include/os_fs.i244
-rw-r--r--src/include/os_fstream.i97
-rw-r--r--src/include/packing.i16
-rw-r--r--src/include/session.h2
-rw-r--r--src/include/stat.h45
-rw-r--r--src/include/wiredtiger.in918
-rw-r--r--src/include/wiredtiger_ext.h13
-rw-r--r--src/include/wt_internal.h33
-rw-r--r--src/log/log.c145
-rw-r--r--src/log/log_auto.c2
-rw-r--r--src/lsm/lsm_cursor.c2
-rw-r--r--src/lsm/lsm_merge.c28
-rw-r--r--src/lsm/lsm_meta.c2
-rw-r--r--src/lsm/lsm_tree.c14
-rw-r--r--src/lsm/lsm_work_unit.c2
-rw-r--r--src/meta/meta_table.c12
-rw-r--r--src/meta/meta_track.c6
-rw-r--r--src/meta/meta_turtle.c86
-rw-r--r--src/os_common/filename.c44
-rw-r--r--src/os_common/os_errno.c (renamed from src/os_posix/os_errno.c)43
-rw-r--r--src/os_common/os_fhandle.c163
-rw-r--r--src/os_common/os_fs_inmemory.c685
-rw-r--r--src/os_common/os_fs_stdio.c239
-rw-r--r--src/os_common/os_fstream.c217
-rw-r--r--src/os_common/os_fstream_stdio.c84
-rw-r--r--src/os_common/os_getline.c51
-rw-r--r--src/os_common/os_init.c41
-rw-r--r--src/os_posix/os_dir.c101
-rw-r--r--src/os_posix/os_dlopen.c2
-rw-r--r--src/os_posix/os_fallocate.c160
-rw-r--r--src/os_posix/os_fs.c639
-rw-r--r--src/os_posix/os_map.c121
-rw-r--r--src/os_posix/os_thread.c2
-rw-r--r--src/os_posix/os_time.c4
-rw-r--r--src/os_win/os_dir.c117
-rw-r--r--src/os_win/os_dlopen.c29
-rw-r--r--src/os_win/os_errno.c151
-rw-r--r--src/os_win/os_fs.c687
-rw-r--r--src/os_win/os_getenv.c20
-rw-r--r--src/os_win/os_map.c127
-rw-r--r--src/os_win/os_mtx_cond.c32
-rw-r--r--src/os_win/os_path.c2
-rw-r--r--src/os_win/os_thread.c27
-rw-r--r--src/os_win/os_winerr.c130
-rw-r--r--src/reconcile/rec_write.c170
-rw-r--r--src/schema/schema_create.c4
-rw-r--r--src/schema/schema_list.c2
-rw-r--r--src/schema/schema_open.c6
-rw-r--r--src/schema/schema_project.c6
-rw-r--r--src/schema/schema_rename.c4
-rw-r--r--src/schema/schema_stat.c8
-rw-r--r--src/session/session_api.c60
-rw-r--r--src/support/err.c23
-rw-r--r--src/support/global.c4
-rw-r--r--src/support/hash_city.c2
-rw-r--r--src/support/hazard.c14
-rw-r--r--src/support/hex.c2
-rw-r--r--src/support/huffman.c25
-rw-r--r--src/support/scratch.c60
-rw-r--r--src/support/stat.c98
-rw-r--r--src/txn/txn.c4
-rw-r--r--src/txn/txn_ckpt.c14
-rw-r--r--src/txn/txn_log.c20
-rw-r--r--src/txn/txn_nsnap.c2
-rw-r--r--src/txn/txn_recover.c2
-rw-r--r--src/utilities/util_backup.c2
-rw-r--r--src/utilities/util_dump.c379
-rw-r--r--src/utilities/util_dump.h11
-rw-r--r--src/utilities/util_load.c3
-rw-r--r--src/utilities/util_load_json.c43
-rw-r--r--src/utilities/util_main.c1
-rw-r--r--src/utilities/util_misc.c2
-rw-r--r--src/utilities/util_verify.c20
-rw-r--r--test/bloom/Makefile.am11
-rw-r--r--test/bloom/test_bloom.c17
-rw-r--r--test/checkpoint/Makefile.am9
-rw-r--r--test/checkpoint/test_checkpoint.c6
-rw-r--r--test/checkpoint/test_checkpoint.h12
-rw-r--r--test/csuite/Makefile.am27
-rw-r--r--test/csuite/wt1965_col_efficiency/main.c186
-rw-r--r--test/csuite/wt2246_col_append/main.c158
-rw-r--r--test/csuite/wt2447_join_main_table/main.c189
-rw-r--r--test/csuite/wt2535_insert_race/main.c159
-rw-r--r--test/csuite/wt2592_join_schema/main.c222
-rw-r--r--test/cursor_order/Makefile.am12
-rw-r--r--test/cursor_order/cursor_order.c3
-rw-r--r--test/cursor_order/cursor_order.h2
-rw-r--r--test/cursor_order/cursor_order_ops.c19
-rw-r--r--test/fops/Makefile.am9
-rw-r--r--test/fops/fops.c6
-rw-r--r--test/fops/t.c3
-rw-r--r--test/fops/thread.h18
-rw-r--r--test/format/Makefile.am17
-rw-r--r--test/format/backup.c118
-rw-r--r--test/format/bdb.c45
-rw-r--r--test/format/bulk.c51
-rw-r--r--test/format/compact.c2
-rw-r--r--test/format/config.c182
-rw-r--r--test/format/config.h4
-rw-r--r--test/format/format.h60
-rw-r--r--test/format/lrt.c24
-rw-r--r--test/format/ops.c755
-rw-r--r--test/format/salvage.c8
-rw-r--r--test/format/t.c12
-rw-r--r--test/format/util.c235
-rw-r--r--test/format/wts.c30
-rw-r--r--test/huge/Makefile.am11
-rw-r--r--test/huge/huge.c19
-rw-r--r--test/manydbs/Makefile.am11
-rw-r--r--test/manydbs/manydbs.c44
-rw-r--r--test/mciproject.yml115
-rw-r--r--test/packing/Makefile.am8
-rw-r--r--test/packing/intpack-test.c9
-rw-r--r--test/packing/intpack-test2.c9
-rw-r--r--test/packing/intpack-test3.c15
-rw-r--r--test/packing/packing-test.c12
-rw-r--r--test/readonly/Makefile.am11
-rw-r--r--test/readonly/readonly.c19
-rw-r--r--test/recovery/Makefile.am13
-rw-r--r--test/recovery/random-abort.c133
-rw-r--r--test/recovery/truncated-log.c19
-rw-r--r--test/salvage/Makefile.am11
-rw-r--r--test/salvage/salvage.c8
-rw-r--r--test/suite/run.py18
-rw-r--r--test/suite/suite_subprocess.py31
-rw-r--r--test/suite/test_backup05.py35
-rw-r--r--test/suite/test_config04.py55
-rw-r--r--test/suite/test_dump.py27
-rw-r--r--test/suite/test_join01.py409
-rw-r--r--test/suite/test_join07.py548
-rw-r--r--test/suite/test_join08.py265
-rw-r--r--test/suite/test_jsondump01.py35
-rw-r--r--test/suite/test_jsondump02.py179
-rw-r--r--test/suite/test_reconfig02.py10
-rw-r--r--test/suite/test_stat05.py5
-rw-r--r--test/suite/test_txn04.py2
-rw-r--r--test/suite/test_util02.py29
-rw-r--r--test/suite/test_util07.py9
-rw-r--r--test/suite/test_util12.py5
-rw-r--r--test/suite/test_verify.py12
-rw-r--r--test/suite/wttest.py10
-rw-r--r--test/thread/Makefile.am13
-rw-r--r--test/thread/rw.c12
-rw-r--r--test/thread/t.c3
-rw-r--r--test/thread/thread.h12
-rw-r--r--test/utility/Makefile.am4
-rw-r--r--test/utility/misc.c (renamed from test/utility/test_util.i)137
-rw-r--r--test/utility/parse_opts.c132
-rw-r--r--test/utility/test_util.h125
-rw-r--r--test/utility/thread.c141
-rw-r--r--tools/wtstats/stat_data.py12
326 files changed, 13337 insertions, 9779 deletions
diff --git a/NEWS.MONGODB b/NEWS.MONGODB
deleted file mode 100644
index 831237ba149..00000000000
--- a/NEWS.MONGODB
+++ /dev/null
@@ -1,2523 +0,0 @@
-3.2.0, Date TBA
-------------------
-
-commit 9166bca3d07d6592c1426c2c33bd56b6be0667e0
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Thu Apr 23 05:43:35 2015 +0000
-
- Fix a deadlock related to handle locking.
-
- If one thread does a __wt_session_release_btree of a handle at the same time
- as another thread does a __wt_session_get_btree both wanting exclusive access
- to the file. It was possible for one thread to get the dhandle list lock and
- wait on the handle lock, which another thread held the handle lock waiting for
- the handle list lock.
-
- Temporarily fix by doing a try-lock on the __wt_session_get_btree path, long term
- the solution is to get rid of the __conn_dhandle_open_lock method, and split get
- and lock into two passes.
-
- Refs BF-716
-
-commit 3e254079484ce35a3cb70c48478c69defdb8f012
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Thu Apr 23 05:42:08 2015 +0000
-
- Fix a deadlock related to LSM. There are cases where closing a file with
- an existing checkpoint could self deadlock.
-
- Check in the meta tracking whether we've already visited a checkpoint handle.
-
- Refs WT-716
-
-commit 1e80654b284b47b2dd9c302395ba908bf3a9b898
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 22 13:54:03 2015 -0400
-
- __wt_config_concat() is a special case of __wt_config_merge(), replace
- the three calls to __wt_config_concat() with __wt_config_merge().
-
-commit 1c412df22489dc3c18aa5390164ff68474293daf
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 22 13:29:05 2015 -0400
-
- Instead of doing a configuration line merge to remove configuration
- values we don't included, followed by a call to concatenate additional
- configuration values, do a single merge call including all of the
- configuration values we care about.
-
- This change is slightly more defensive -- if a user tries to change one
- of the configuration values we don't allow, we'll strip it, rather than
- failing the load.
-
- Clean up memory handling a bit so we don't leak memory, someday Coverity
- will thank me.
-
-commit 6093e42b21d7e947d7bd053f6691aa1fed1a7f99
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 22 11:33:21 2015 -0400
-
- Instead of rolling our own configuration removal code, use
- __wt_config_merge(), reference WT-1898.
-
-commit 4322191125284717af1a0c6734b6ea123ca6c50d
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Tue Apr 21 15:45:30 2015 -0400
-
- Run recovery earlier. WT-1897
-
-commit 6155c465a519612e16cec5feb9fcf79fd0121d7f
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Apr 21 13:15:06 2015 +1000
-
- Revert a change to split large in-memory pages.
-
- Refs WT-1890 WT-1896
-
-commit 610f629949726b16f938ded85188bb6a21820f7e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Apr 20 10:40:54 2015 -0400
-
- Create a "clear a single walk" function and call it from __evict_clear_walks
- and __evict_clear_all_walks, that way we can use the WT_WITH_DHANDLE macro,
- and the comment about clearing the eviction reference before releasing the
- page appears everywhere it should.
-
-commit 6ea949933ff9e68d220738d18fa72eb7a91aab65
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Apr 20 16:50:13 2015 +1000
-
- Fix coverity reports in test/format code.
-
- CID 1295092: Integer handling issues (OVERFLOW_BEFORE_WIDEN)
- /test/format/ops.c: 93
- CID 1295091: Null pointer dereferences (REVERSE_INULL)
- /test/format/ops.c: 489
-
-commit 3eceb85ce623dcce9273f7b722bb64f509dbe24d
-Author: Michael Cahill <michael.cahill@mongodb.com>
-Date: Mon Apr 20 16:15:41 2015 +1000
-
- Clear session->dhandle so that future error messages don't dereference freed memory.
-
-commit 23ce8bae4d6507d6b6d599cb73a26a6c856cce98
-Author: Michael Cahill <michael.cahill@mongodb.com>
-Date: Mon Apr 20 15:48:29 2015 +1000
-
- Clear eviction walks in all trees before the eviction server exits.
-
-commit a4bce0e0bc05d528f118b645d0d1915db00cdcf3
-Author: Michael Cahill <michael.cahill@mongodb.com>
-Date: Mon Apr 20 14:11:57 2015 +1000
-
- Move the "cache is empty" check to where the cache is destoyed, not when the eviction server is exiting.
-
-commit c8fdd9c676c4a24bee6328a56cf7fd074cd045e0
-Author: Michael Cahill <michael.cahill@mongodb.com>
-Date: Mon Apr 20 13:25:36 2015 +1000
-
- Shut down the eviction server before closing file handles to avoid a race.
-
- refs WT-1893
-
-commit fb4a089fa71876232478e1181d821f29dedc0bd8
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Apr 19 12:43:47 2015 -0400
-
- Add the version to the configuration stack before reading the base
- configuration file: that way it's always set, but it's always set
- to the value of the base configuration file.
-
-commit 7aee6d5dabb7942aeb685e797b103c47c9337186
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Apr 19 11:56:22 2015 -0400
-
- Add support to the __wt_config_merge() call to remove strings we don't
- want in the final configuration string, then strip out "create=" and
- "encryption=(secretkey=)".
-
-commit 071d545f47ff4e4badcf9b8c066a44dac7fa2f20
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Apr 17 11:39:23 2015 -0400
-
- Instead of explicitly writing the version into the base configuration
- file, append it to the configuration list.
-
-commit ab5443aa13d62c71423c128853735f699935c01e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Apr 17 11:35:13 2015 -0400
-
- There's no reason to loop through each application-specified configuration
- string separately, there's a call to check them all for the same key.
-
- This also prevents a value being listed multiple times. Previously, if
- you set buffer_alignment in the wiredtiger_open() configuration string,
- but also in the WIREDTIGER_CONFIG environment variable, it would appear
- twice in the base configuration file, with this change, it only appears
- once.
-
-commit abb0bb80cc6dce29b8db61c6747c228c2701ae5a
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Fri Apr 17 10:49:41 2015 -0400
-
- Look for any number of non-data-changing log records to determine if we
- can skip recovery. WT-1892
-
-commit e7f4ecd2055bab4d683eae119f8da95fa7acf21f
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Apr 17 04:32:52 2015 +0000
-
- Update API documentation to explain session usage.
-
- This allows users to account for internal WiredTiger session handle
- usage. We already allocate additional handles for those we use
- unconditionally. This allows users to do specific calculations based
- on their session_max setting.
-
-commit ee02428d1fdf1118c482688ec870a9da69bee45a
-Author: Michael Cahill <michael.cahill@mongodb.com>
-Date: Fri Apr 17 12:28:52 2015 +1000
-
- If an LSM search-near operation lands on a deleted item, make a copy of the key before stepping to the next record.
-
- refs WT-1891
-
-commit 54e856d57da291c5f84da6d0d0ab56280d9956dc
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Thu Apr 16 12:22:31 2015 -0400
-
- Remove use of unneeded tmp_fh. WT-1872
-
-commit 6a32905c397e57643b15e5a3038dbcb99a8a8dc8
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Thu Apr 16 06:18:31 2015 +0000
-
- Fix a deadlock in LSM with schema operations.
-
- There is special code in LSM to co-ordinate schema operations on
- tables (drop, rename, etc). The code does a dance dropping and
- acquiring locks, to allow utility operations to drain for the tree
- while waiting for it to close.
-
- We were doing the dance with the schema and dhandle list locks. We
- needed to include the table lock, or parallel cursor opens could block:
-
- The cursor open is waiting for the table lock:
- __wt_spin_lock src/include/mutex.i:175
- __schema_add_table src/schema/schema_list.c:26
- __wt_schema_get_table src/schema/schema_list.c:98
- __wt_curtable_open src/third_party/wiredtiger/src/cursor/cur_table.c:875
- __wt_open_cursor src/session/session_api.c:240
-
- The LSM table drop is waiting for the schema lock:
- __wt_spin_lock src/include/mutex.i:175
- __lsm_tree_close src/lsm/lsm_tree.c:107
- __wt_lsm_tree_drop src/lsm/lsm_tree.c:943
- __wt_schema_drop src/schema/schema_drop.c:174
- __drop_table src/schema/schema_drop.c:124
- __wt_schema_drop src/schema/schema_drop.c:176
- __session_drop src/session/session_api.c:528
-
-commit 790646183cc5dd056bbf95c4563c20c51602a808
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Thu Apr 16 04:11:36 2015 +0000
-
- Fix a bug in LSM where updates with overwrite could be skipped.
-
- References JIRA BF-829
-
- The issue was that we were not looking in all chunks of an LSM
- tree before deciding whether to apply an update (insert or remove).
-
-commit 72ccd267fea9e491fcf3506e85191f71471cf51a
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 15 09:44:13 2015 -0400
-
- A WT_CURSOR.next operation followed by a WT_CURSOR.search has the same
- problem as referenced in #1887: the key returned to the application is
- in WT_CURSOR_BTREE.tmp, and that WT_ITEM is used as temporary storage
- during the search of a row-store leaf page, so the search can overwrite
- the search key while it's still in use.
-
- Change WT_CURSOR.next to return the ey in WT_CURSOR_BTREE.search_key.
-
- Rename WT_CURSOR_BTREE.search_key to be WT_CURSOR_BTREE.row_key, it's
- exclusive to row-store, and no longer exclusive to search.
-
-commit 50f8bedf616a4009068516df865374f688c76c70
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Apr 14 12:52:32 2015 -0400
-
- When row-search builds keys during leaf-page search, the last key built
- is left in WT_CURSOR_BTREE.search_key, and if it's an exact match, that
- buffer's contents are returned to the application (or, in the case of
- cursor, the search key can be overwritten during the search, that is,
- the searched-for-key, and the temporary buffer where we're building the
- on-page keys for comparison, are the same.
-
- Use the WT_CURSOR_BTREE.tmp buffer during the row-search instead of
- WT_CURSOR_BTREE.search_key, and set WT_CURSOR_BTREE.search_key to the
- returned key only when we've found an exact match and are returning to
- our caller.
-
- Making WT_CURSOR_BTREE.search_key and WT_CURSOR_BTREE.tmp pointers so
- it's easy to swap back-and-forth makes this change noisy; note the new
- __wt_btcur_open() function to set things up when a Btree cursor is first
- created.
-
- Reference #1887.
-
-commit 4cf1871ca6770b035a3b30aca29fa89dc11bfc18
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Apr 14 06:30:41 2015 +0000
-
- Make test/thread use WT_TEST not cwd, and add some more logging.
-
-commit b9e2e76511a24505014369aaf0e1ec286e9c473d
-Author: Michael Cahill <michael.cahill@mongodb.com>
-Date: Tue Apr 14 16:30:31 2015 +1000
-
- Merge bulk cursor close with regular file cursor close: we were missing a decrement that kept bulk-loaded files pinned.
-
-commit a9e6a51f4ace1da5c73dd54c108d9a764fc5d8a4
-Author: Michael Cahill <michael.cahill@mongodb.com>
-Date: Tue Apr 14 14:37:12 2015 +1000
-
- Fix some 64-to-32-bit conversion warnings.
-
- include/cell.i|652 col 20| error: implicit conversion loses integer precision: 'uint64_t' (aka 'unsigned long long') to 'uint32_t' (aka 'unsigned int')
- copy.v = unpack->v;
-
- include/cell.i|705 col 24| error: implicit conversion loses integer precision: 'uint64_t' (aka 'unsigned long long') to 'uint32_t' (aka 'unsigned int')
- copy.v = unpack->v;
-
- reconcile/rec_write.c|2683 col 17| error: implicit conversion loses integer precision: 'size_t' (aka 'unsigned long') to 'uint32_t' (aka 'unsigned int')
- tmp->size = ((u_int)(28 + (btree)->block_header)) + len;
-
-commit 4528ddaec1d4b3382055e5c1c53fb7b1772133a2
-Author: Don Anderson <dda@ddanderson.com>
-Date: Mon Apr 13 15:05:16 2015 -0400
-
- Generated tables for config subcategories now contains the name of the
- method they are applicable for. Refs #1879.
-
-commit 1ac393ca9adce1f6d5e4bec035e7c49b32e5a722
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Apr 13 13:00:57 2015 -0400
-
- We can't pass the reconfiguration functions the base configuration array
- because repeated calls will cause us to fallback to the default value.
-
- For example:
-
- conn->reconfigure(conn, "file_manager=(close_handle_minimum=37)");
- conn->reconfigure(conn, "file_manager=(close_handle_minimum=38)");
- conn->reconfigure(conn, "file_manager=(close_handle_minimum=39)");
- conn->reconfigure(conn, "eviction=(threads_max=10)");
-
- The reconfigure call for eviction will reset close_handle_minimum back
- to its default value, because it will find the base value, not the
- reconfigured value.
-
- Try and make sure we don't mess this up again, use "cfg" instead of a
- separately constructed local variable.
-
-commit ac37b924ac18f15726d3bd1984c61e89fbdd405e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Apr 13 07:17:18 2015 -0400
-
- Search the strings in reverse order, that way the first hit wins
- and we don't search the base set until there's no other choice.
-
-commit 110164271dc688c90a092c0c95d37feed2cec188
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Apr 13 05:48:19 2015 +0000
-
- Add functionality to drop files at the end of a wtperf run.
-
- Helps figure out how long a drop takes.
-
-commit ed3158e71f0bd2716269a5771fd162b60b9a1cc0
-Author: daveh86 <howsdav@gmail.com>
-Date: Mon Apr 13 12:59:29 2015 +1000
-
- Allow forced eviction of pages already queued for eviction
-
-commit 9c83351f63afc2e032e492e3030df4f3b1cd6883
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Sun Apr 12 19:02:32 2015 -0400
-
- Disable fallocate on Windows since SetEndofFile does not
- ignore truncation requests like POSIX fallocate.
-
-commit 61a7d81ad26db1f2bfb65258d9b8ae4a4ca25b34
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Apr 12 12:44:05 2015 -0400
-
- When using ftruncate as the file-extension call, we must use WT_FH.size
- as the starting point of the extension (not offset), and we have to read
- the size value after acquiring the lock that prevents racing with writers.
-
- Split the extension functionality out into a separate function and
- try to make it a little simpler to understand.
-
- Reference #1871.
-
-commit f26f1c1c59d5cbbc8f5f543215d8fc636e7175d2
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Apr 12 10:16:34 2015 -0400
-
- The OS layer ftruncate() code sets the WT_FH file size, so when
- ftruncate is used to extend the file, we skip over the bytes added to
- the file during block allocation, and verify eventually fails because
- there are unverified blocks in the file.
-
- Reference #1871.
-
-commit c27c201de9c766aea5249d3eeb85b8103ea6cefc
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sat Apr 11 09:16:11 2015 -0400
-
- Possible approach to avoiding lockout when when sweeping files: before
- closing the file, flush the file from the cache.
-
- Add a new cache-operation, WT_SYNC_CLOSE_SWEEP, that walks the cache for
- a handle and discards any clean pages it finds.
-
- Add call to __wt_txn_update_oldest() before checking if a handle can be
- swept.
-
-commit d20f20f1ac324030986b3ee23e1edf96486c92b4
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Apr 10 05:59:04 2015 +0000
-
- Update file_manager=(close_idle_time=,close_scan_interval=) max values.
-
- The old maximum for both was 1000, the new value is 100 thousand. Setting
- such large values is not recommended, but there is no internal limitation
- on them.
-
-commit c36a3308f685d3b85efe9ac6ee0835f0974574b4
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 9 14:32:40 2015 -0400
-
- Don't ignore sweeping entirely when we reach the open-file-count limit,
- just ignore the in-use files.
-
-commit 46ef2555bbc51ce6453536e72202782be4949855
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 9 11:06:50 2015 -0400
-
- Don't keep sweeping once we reach the minimum number of handles.
-
-commit 1fdfcc62726d25a7cceeeefff174a8e1bf9f9e67
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 9 12:39:03 2015 -0400
-
- const: At condition ret == -1, the value of ret must be equal to -1.
- CID 72082 (#1 of 1): Redundant test (DEADCODE)
- dead_error_condition: The condition ret == -1 must be true.
-
-commit d04e3c25d46a5c4426e1c6d4881cd9e250014931
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 9 12:19:24 2015 -0400
-
- Remove unnecessary atomic operation, fixing CID 69810 along the way.
-
- CID 69810 (#1 of 1): Parse warning (PW.CONVERSION_TO_POINTER_ADDS_BITS)
- 1. conversion_to_pointer_adds_bits: conversion from smaller integer to pointer
-
-commit d585bdab980508e590cf70508f053182c556d6f2
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 9 12:00:04 2015 -0400
-
- CID 72073 (#1 of 1): Redundant test (DEADCODE)
- dead_error_condition: The condition session != NULL must be true.
-
-commit 21907f9193e30f51a59fcbaddfbc46cb7732d3b7
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 9 11:52:13 2015 -0400
-
- Remove unnecessary error labels, fixing SERVER-17948 along the way.
-
- Coverity analysis defect 72088: Redundant test:
- File: /src/third_party/wiredtiger/src/log/log.c
- Function __log_decompress
- /src/third_party/wiredtiger/src/log/log.c, line: 363
- At condition "ret != 0", the value of "ret" must be equal to 0.
-
-commit a29f4d2f40eee784950147af848fdbf277328b7f
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 9 11:31:11 2015 -0400
-
- SERVER-17954: Coverity analysis defect 72085: Redundant test
- At condition "ret != 0", the value of "ret" cannot be equal to 0.
-
-commit 1298fd6db4f0c1d6133b3e547b2e2db51ec4a708
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 8 14:38:49 2015 -0400
-
- Fix places where we were using the wrong link for traversing hash buckets.
-
-commit 7d0e5fe3dfa39f7ff377a1d4660bc2bc36dc0df8
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Wed Apr 8 10:19:33 2015 -0400
-
- Enable test/fops for Windows, and add to CI
-
- - Added Windows shim for gettimeofday
-
-commit c6270b677499525067d5d729a6fbdce6ad2f533a
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Wed Apr 8 13:28:41 2015 -0400
-
- test/format for Windows
- - Fixed an issue where fallocate was setting fh->size (incorrectly copied from ftruncate implementation)
-
-commit fac74b4665d6dfa3aebecf741c914fd1678fa628
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 8 08:47:06 2015 -0400
-
- Rename file_manager.open_handles to file_manager.close_handle_minimum,
- try and match existing naming for file_manager configuration options.
-
-commit 709cc8d7ac85d31aeae2387b192092910e6cf854
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 8 08:09:23 2015 -0400
-
- Don't sweep unless there's a reason we need to close file handles.
- Add a new configuration option, file_manager.open_handles that sets
- a minimum number of file handles that must be open before sweep runs,
- default is 250. Reference #1856, SERVER-17907.
-
-commit b59dfec2609847bb22bb48f4e7eede8c45312ce9
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 8 07:58:49 2015 -0400
-
- Fix WT_STAT_ATOMIC_INCR, it didn't get upgraded to the new atomic
- macros needed for the Windows port.
-
-commit 9f9fbb19ba19f90e4fc52d7568bd66427edb31e6
-Author: Pat Gunn <pgunn@mongodb.com>
-Date: Tue Apr 7 22:56:55 2015 -0400
-
- Add a Python ex_stat example
-
-commit b79dcdebf0b1987b59fa70e50c8c61e5e0a64e64
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Apr 7 11:52:12 2015 -0400
-
- If the underlying split buffer grows, existing boundary references may
- point into freed memory, switch the boundary "first byte" field from a
- pointer to a buffer offset to avoid the problem, reference #1852.
-
-commit 27b37db17e70ef73432e1fb48c530246fd753670
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Apr 6 07:27:25 2015 -0400
-
- Fix a broken line, #1846.
-
-commit dbb58f0b8f7b26a70f71f6c67dba93c966b162e0
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Apr 5 12:24:31 2015 -0400
-
- Search the currently pinned page first in WT_CURSOR::search-near,
- before descending the tree from the root.
-
-commit 353093e16eb4a955c4f2e2f4245577cd9156955f
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Apr 5 08:36:57 2015 -0400
-
- WT_CURSOR::search() near current position.
-
-commit 96022e89162f746d8252db637a0305aed1965cd1
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sat Apr 4 10:09:17 2015 -0400
-
- Inside wiredtiger_open(), we may have allocated scratch memory when using
- the dummy session or the subsequently created real session, and we don't
- want to tie down memory for the rest of the run in either of them.
-
-commit 928409dbb64e222f722c5f6692f1d7638ce9e617
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Apr 3 20:37:45 2015 -0400
-
- Use scratch buffers in wiredtiger_open(), but clear them out when we're
- done (we have no idea what other functions are using scratch buffers when
- called via wiredtiger_open(), so just make them work).
-
-commit d7e2351db8b855af3b5b8860e000528ac99d57a4
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 2 19:44:16 2015 -0400
-
- Windows doesn't have <x86intrin.h>, it does have <intrin.h> which is
- included by <msvc.h>.
-
-commit d3dacbffd2a87ea79ec05fa569bdd5d34f90254c
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 2 13:06:19 2015 -0400
-
- Make --with-spinlock=pthread_logging option compile again.
-
- Remove the WT_SESSION_IMPL argument to the fprintf, vfprintf, fflush and
- fclose functions, there are places we want to use it that don't have
- session handles, and it's not currently needed. Clean up error handling
- in the vfprintf function.
-
-commit fccb479aa9b97fb22d9ec2827c94ba49faa5ab79
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 2 09:58:52 2015 -0400
-
- Gcc 4.1 can't figure out that copy.v can't be used uninitialized.
- Inline a length check in the short-key/value case to avoid it.
-
-commit dca44d7f2762052bf675a5edc0a2f63534c85cf1
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Apr 2 08:55:41 2015 -0400
-
- Coverity 44262 (#1 of 1): Parse warning (PW.MISSING_INITIALIZER_ON_CONST)
- 1. missing_initializer_on_const: const variable "__clear" requires an
- initializer.
-
- This isn't a bug, but we only use WT_CLEAR_INLINE() in a single place
- in WiredTiger, and we can probably do better by not clearing the unpack
- structure at all.
-
-commit 0b60cfdfa3912d3048e3c5dbce56db7745c20964
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 1 17:43:31 2015 -0400
-
- Do a binary search of the base configuration options when checking
- configuration at the API layer instead of a linear walk.
-
-commit 078cb46916b55c254abe1a966a2413410a5f6174
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 1 15:57:04 2015 -0400
-
- Use the existence of the <x86intrin.h> #include file to configure the
- x86 vector instructions. Some old versions of gcc don't have it, but I
- believe they're old enough that we don't care.
-
-commit 684fd71475cbc6b15290945af0160fac0313ad6b
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Wed Apr 1 15:44:42 2015 -0400
-
- Reset eol if we continue. Return NOTFOUND if checksum mismatch. #1840
-
-commit 507a3005b3bc4f9cc69153da5acb534702b734b8
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Apr 1 09:40:36 2015 -0400
-
- Don't #include <x86intrin.h>, gcc 4.1.2 doesn't have it, use
- <emmintrin.h> instead.
-
-commit 9c29e0f13268c03038704372c069353c81357791
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Tue Mar 31 16:49:47 2015 -0400
-
- Modify log_scan callback args to send in next LSN. #1837
-
-commit 76cba586685828fc3a8598b48e70c7614818859c
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Mar 31 12:11:04 2015 -0400
-
- Coverity 1199719 (#1 of 1): Dereference after null check (FORWARD_NULL)
- var_deref_op: Dereferencing null pointer "ref".
-
-commit f9edf738c6075601bb2885cd2aba3ea4a0134b5e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Mar 31 12:00:50 2015 -0400
-
- Coverty 1129071 (#1 of 1): Dereference before null check (REVERSE_INULL)
- check_after_deref: Null-checking "conn" suggests that it may be null,
- but it has already been dereferenced on all paths leading to the check.
-
-commit 4317a14ffead9029c6524c3a5013e1d91b2f0a02
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Mar 31 11:47:50 2015 -0400
-
- Coverity 1129018, 1129019, 1129020, 1129021: Side effect in assertion
- (ASSERT_SIDE_EFFECT).
-
-commit a5bb492b41e4121ff69776ba70072585aef405af
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Mar 31 07:42:01 2015 -0400
-
- When looking for the next-larger item (__col_insert_search_gt), stay
- at the same level if the checked record is equal to the searched for
- record (can't happen in the current use of this code, but it's the
- correct thing to do in a skiplist).
-
- When looking for the next-smaller item (__col_insert_search_lt), the
- search still has to be greater-than, reference #1835.
-
-commit f2055cab87688cbb26da659c8038dbb4f032eb30
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Mar 31 07:41:40 2015 -0400
-
- Add a __setitem__ to cursors in the Python API.
-
-commit 674170067c4016455e78d14ec24e3641d047f1c4
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Mar 31 14:26:00 2015 +1100
-
- Add a __setitem__ to cursors in the Python API, remove lots of boilerplate c.set_key ... c.set_value ... c.insert code.
-
-commit 4790f13cd8191d834f40c97a0c00d729f91acd1b
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Mar 30 09:52:34 2015 -0400
-
- async_max_queue: collection statistic, aggregate doesn't apply, don't
- clear.
-
- cache_eviction_maximum_page_size: collection statistic, aggregate
- doesn't apply, don't clear.
-
- various txn_checkpoint_XXX statistics: collection statistic, aggregate
- doesn't apply
-
- various Btree maximum size statistics: aggregate the maximum value,
- rather than doing no aggregation at all.
-
- lsm_chunk_count: aggregate the count of LSM chunks.
-
- Add syntax checking code to stat.py to disallow aggregation flags
- for connection-level statistics.
-
- Reference #1742.
-
-commit 0c9f1341e2fdb93d3bd4d3fc58176f6ad169825e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sat Mar 28 13:15:53 2015 -0400
-
- When we find a record in the slot's update skiplist, but then want to
- jump past the rest of the deleted records, we have to adjust based on
- the starting record of the slot, use the page's repeat array to find
- that starting record.
-
- Another run at the __col_insert_search_gt (the greater-than skiplist
- search), hopefully it's finally correct.
-
-commit b5edc28c2588a0257daf50f43db7b5bf335c7ef5
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Mar 27 14:40:36 2015 -0400
-
- Cleanup, fix, simplify the cursor-insert-greater-than search, add a
- corresponding less-than search and hook it into the WT_CURSOR.prev
- function. This runs better than before, but there are still problems.
-
-commit c7cdb2e1f1960bc9432185df8fb7b507198889ff
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Mar 27 10:38:17 2015 -0400
-
- The gaps in column-store tables can be large enough we spend too much
- time looping through the "deleted" records when writing out the page,
- skip the boring part. Reference #1807.
-
-commit 488d064b45fb194ea2f3b9e2187214fb44b86a5f
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Mar 27 10:07:16 2015 -0400
-
- There can be huge gaps in the variable-length column-store name space
- appearing as deleted records. If more than one deleted record, do the
- work of finding the next useful record. Reference #1807.
-
-commit e1ee6432fb5a948ebd5cafe2ba9c2b79411458f5
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Thu Mar 26 11:31:27 2015 -0400
-
- Revert "Make LZ4 default off in scons builds for now."
-
- This reverts commit 67b71e1eaad1a5825f99a415d9851976f11dbfee.
-
-commit 003e6c3598408c7670f65a8720622c38fdaf148d
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Thu Mar 26 11:17:57 2015 -0400
-
- Use Standard C type uint64_t for zlib
-
-commit e12b9e0b005da7364330f4d3409256ded26ba90d
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Mar 26 18:31:15 2015 -0400
-
- Add checks for Windows builds: _M_AMD64 and _MSC_VER.
-
-commit 1b1371c83e3e78feab0921c88a63d960288d58e0
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Mar 26 11:09:28 2015 -0400
-
- __wt_btree_size_overflow uses the page's WT_PAGE_INDEX, wrap the call
- in WT_ENTER_PAGE_INDEX/WT_LEAVE_PAGE_INDEX.
-
-commit e4c9309756f8bcda4bd7b9be5232887cdea377be
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Mar 26 10:57:33 2015 -0400
-
- Instead of asserting split-generation is set in WT_INTL_FOREACH_BEGIN,
- move the test into the WT_INTL_INDEX_COPY macro (which is called by
- WT_INTL_FOREACH_BEGIN), WT_INTL_INDEX_COPY is the code that reads the
- actual WT_PAGE_INDEX value: if a page can split, it's not safe to look
- at anything a WT_PAGE_INDEX references, and we have code that uses a
- WT_PAGE_INDEX reference outside of the WT_INTL_FOREACH_BEGIN macro.
-
- Add two versions of the WT_INTL_INDEX_COPY macro, WT_INTL_INDEX_GET
- which checks the split-generation, and WT_INTL_INDEX_GET_SAFE which
- doesn't.
-
-commit 8bdb29fed9ef4a124e8c1c3c1a6c031f4ecbe130
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Mar 26 09:34:19 2015 -0400
-
- Remove safe version of the WT_INTL_FOREACH_BEGIN macro (it was only used
- in a single non-DIAGNOSTIC code path), wrap child page-index walk during
- a deepen split with WT_ENTER_PAGE_INDEX/WT_LEAVE_PAGE_INDEX, reference
- issue #1799.
-
-commit 87326b721a443936e118e72e58e4f51e14845132
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Mar 25 21:44:02 2015 -0400
-
- Use the size of the vector chunk to decide if we'll execute vector
- instructions, rather than some specific length.
-
-commit fc4eea6e3eb39cdf1b147d6a7b65af4f0d08f5c1
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Mar 25 18:20:23 2015 -0400
-
- Instead of using _mm_cmplt_epi8 when we find a mismatch, fall into the
- slow comparison code.
-
-commit a29b65429f2963bb3e3ff8ca4416f16ae8ca2e52
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Mar 25 16:28:34 2015 -0400
-
- Cleanup #includes so the whole tree builds.
-
-commit 35b436dfd47895cc19310095f4f4e5f3b9b86501
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Mar 23 17:51:50 2015 -0400
-
- We don't have to reinitialize userp and treep when falling into the
- slow/remainder loop, they're correctly positioned.
-
-commit a307a61135394d5006bd3257325bffd82ea80f21
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Mar 23 17:14:30 2015 -0400
-
- Don't call _mm_cmplt_epi8() until we don't compare equal.
-
-commit a609c82bfa6fbf80cd5fb853e1d97c16878a0180
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Mon Mar 23 15:25:23 2015 -0400
-
- Set checkpoint LSN to existing log record. #1700
-
-commit 076336e9b244664440777cfeab9fe0d925d4c25e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Mar 23 10:13:37 2015 -0400
-
- Integrate Sasha's vectorized search code.
-
-
-3.0.2, Mar 26 2015
-------------------
-
-commit 17bd2356a5b17893e626749be399fb8fda23db1e
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Fri Mar 20 01:07:59 2015 -0400
-
- Use beginthreadex, and ensure we use the correct C calling conventions in all callbacks
-
-commit 269e847ad64dd12dfcadb58f84f905069e5b8dce
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Mar 25 15:47:46 2015 +1100
-
- Split sweep into two passes: one that walks the handle list without locking and finds candidate handles to close, and a second pass holding the handle list lock that removes dead handles.
-
- refs #1814, #1811, #1808
-
-commit 065a435f6b1d1b3fcb640d59c3109d0c2e24d308
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Mar 24 16:53:28 2015 +1100
-
- Add statistics to track internal/overflow/leaf pages in cache.
-
- It is often useful to know if a cache is full of internal pages. I could have added page count tracking, but byte count tracking seems more useful (enough to justify the extra performance overhead).
-
-commit f4616895f1ac35aaa50fc1a68e0c6dd9e0cf7717
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Mar 23 17:00:15 2015 +1100
-
- If a walk for pages to evict ends on a page requiring forced eviction, don't keep it pinned.
-
- refs SERVER-16662, SERVER-17382
-
-commit 49ddcca712db3a50c390f48b518f0835f28bc9d4
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sat Mar 21 16:17:51 2015 -0400
-
- Separate out the code to flush a file handle and rename the file into place.
-
-commit a0e88c41781c5b7ea0155fb57a58c91a964af4a3
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Mar 20 19:08:45 2015 -0400
-
- If the underlying FILE was opened for writing, flush and fsync it during close.
-
-commit 8d918f0ef8588056ecf729e72ffdd8bc0a79fd6c
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Fri Mar 20 22:54:13 2015 +1100
-
- Fix a race closing eviction helper threads: close the main eviction thread first, so the number of helper threads can be read safely.
-
- Partial fix for #1698
-
-commit 294b0bce296bebf9790418e5575f59be5bec000c
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Fri Mar 20 17:19:28 2015 +1100
-
- Only align buffers on Linux if direct I/O is configured. Clarify why we round up the allocation size: it usually won't matter because we allocate multiples of the alignment size anyway.
-
-commit ee7456799277e19ff4771de79ac4eafac43aff9b
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Thu Mar 19 16:16:15 2015 -0400
-
- Visual Studio 2015 Preview support
-
-commit 9c60b462a25a36915207dde240579c5aa673f2c7
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Mar 20 00:09:42 2015 +0000
-
- Update the eviction server to not set stuck if it isn't full.
-
- When only looking for pages to force out, the cache isn't really stuck. The trouble is that if we set stuck the eviction server doesn't clear walks which can lead to it always holding a reference t othe page we (really) want to evict.
-
- refs #1777
-
-commit f0138353697d8706bd1f26e83b9fff8f4e83af8d
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Mar 19 13:04:17 2015 -0400
-
- Create the base configuration file in a temporary file and rename it into place so a crash can't result in a corrupted base configuration file.
-
- refs #1775, #1776, SERVER-17571
-
-commit 2e15cd6fc322c90c763394c52056bfebd4153aad
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Mar 19 10:25:31 2015 +1100
-
- Avoid EBUSY errors from truncate caused by checkpoints.
-
- refs #1643
-
-commit 3188352d623de85803db3dd6e5b5188822e2db4f
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Mar 18 13:00:53 2015 +1100
-
- If the session-level attempt to lock a handle gets EBUSY, fall back to the slow path. The sweep server relies on there being a retry loop if an exclusive operation such as verify conflicts with a sweep.
-
- refs #1404
-
-commit 5d8641568ac27e21d6671cb646a818708eb0aa28
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Tue Mar 17 15:38:57 2015 -0400
-
- Set the end of the log if we find a zero hole.
-
- refs #1766, SERVER-17569, SERVER-17613
-
-commit f38e325f87d94f178c932328632857361fc3c92c
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Mar 17 04:30:11 2015 +0000
-
- Implement review feedback. The new configuration strings are:
-
- file_manager=(close_idle_time=30,close_scan_interval=10)
-
- Update the sweep test to take advantage of new configurations and
- shorten the runtime.
-
-commit 87dba78b65bc869c426363d86d02134aa8f8f2ac
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Mar 17 14:39:49 2015 +1100
-
- Round up the "in-memory size" for WT_UPDATEs to a multiple of 32. They are a very common case, and for tiny updates, this seems to to a better job of matching tcmalloc's behavior at least.
-
- refs SERVER-17424
-
-commit d277a088385401ff6e6664dd688a2fcab2ac4087
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Mar 16 11:42:56 2015 -0400
-
- Sanity check all splits for at least 100 keys on the page, if there are huge keys and a too-small cache, there's nothing to be done.
-
-commit d4aa136a20c9baf151c3185058ce089679e0de0e
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Mar 16 06:10:12 2015 +0000
-
- Ensure we free memory when statistics cursor open fails.
-
- refs #1760
-
-commit 66622ac33357d53212ddfa362fdf3c6b439bf34b
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Mar 16 05:55:45 2015 +0000
-
- Update the split deepen sanity check. Allow a page that is using more than 1/4 of the cache to be split.
-
- refs #1759
-
-commit f99114f785985d152ba8ddfa735771574e6bff5d
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Mar 16 16:43:27 2015 +1100
-
- Fix the search for a free hazard pointer slot. Previously, it gave up early once all slots were allocated, even if they weren't all in use.
-
-commit 15fe04460c1f057811e0f3e02feb249c710d05ce
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Mar 16 16:40:49 2015 +1100
-
- Track the number of hazard pointers active in the eviction server. Don't start a new walk if we are close to the limit.
-
- refs SERVER-17551
-
-commit e6d8c8a56c4dc83f206d43fcaa21902b35b1e4e6
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Mar 16 05:20:22 2015 +0000
-
- Add a new statistic tracking range of IDs pinned by a transaction.
-
- refs #1746
-
-commit f954ab6b3fb6a51ecf3cf625cd3f5c5f818e0fde
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sat Mar 14 12:31:00 2015 -0400
-
- Pass a allocated memory size to __wt_page_inmem().
-
-commit 308dc500adcde7b62c8dbb9aa0fb401795445546
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sat Mar 14 10:51:21 2015 -0400
-
- Don't drop core if duplicate symbols are detected (caller's table gets free'd twice).
-
-commit dc19643859063a03b985d97e2f24b1262ec4b15d
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Mar 13 15:35:28 2015 -0400
-
- Use SCNxxx #defines in sscanf() calls, not PRIxxx.
-
- We can't cast a pointer to a WT_LSN.offset to a pointer to a different type, we can't know WT_LSN.offset is the same size as that type.
-
-commit 99a992ed5207822ce2bc2fd69cb5b37408e6080f
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Mar 13 14:17:34 2015 -0400
-
- Switch sscanf from SCNu46 to SCNi64 so we handle both hexadecimal and integer symbol/frequency values
-
- Add testing support for out-of-range symbol/frequency values.
-
- refs #1536
-
-commit 48a6f6e76aafbf7b33dbb22325ddb4e8e27603a1
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Mar 13 10:04:06 2015 -0400
-
- Fix problems with configuration value parsing, break out the Huffman configuration file parsing code into a single routine.
-
-commit e7ef6d0c3d107f374de9924d981d731fe36fb4ec
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Mar 13 16:32:37 2015 +1100
-
- Update memory allocation accounting for new pages and realloc.
-
- They used to not always account for alignment overhead.
-
-commit 1e97f9730a6fd70c03784e292cb0435c8fb82354
-Author: Don Anderson <dda@ddanderson.com>
-Date: Thu Mar 12 14:02:24 2015 -0400
-
- Fix to track deleted cells. Remove unused vars.
-
-commit 5778e4098862665d4cdaf2cda3ef40d47d951efb
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Thu Mar 12 16:35:27 2015 +1100
-
- Add ability to configure sweep server timings via API.
-
- Still needs some test code.
-
-commit a1c9f8f8833482c9773fc30b7b9a7b5f1ab09014
-Author: Don Anderson <dda@ddanderson.com>
-Date: Wed Mar 11 22:48:12 2015 -0400
-
- Made some adjustments to stats so that the btree entries is an accurate total for row and column stores. Entries that appear in internal pages (pointing to leaf pages or other internal pages) are no longer counted. Added and modified tests to check the number of btree entries reported by stats against the number of KV pairs expected to be in btree.
-
- refs #1733
-
-commit 6d7c061a8292021195b32260a8b41d3e92e1958f
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Wed Mar 11 11:11:57 2015 -0400
-
- Support no-logging mode in wt command for debug.
-
- refs #1732
-
-commit 4a802bd592b6c8506b07900bf89a9d9fe53cfc25
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Mar 11 09:30:39 2015 -0400
-
- If the application only has 1 outstandin async operation at a time, we won't let them specify that. Change the minimum ops_max value to 1, but set the minimum maximum bound in the code to 10 as before.
-
-commit 3a412a2371e80ed9e26e546b6fe3c09d2d4e4091
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Mar 11 09:29:55 2015 -0400
-
- If the application re-uses a WT_SYNC_OP structure, we assert: return EINVAL instead, it's easier to debug than a core dump.
-
-commit 16119f98fee9525bf1990ccbcc979b9f08a8b000
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Mar 11 20:24:20 2015 +1100
-
- Don't cache an ikey before winning the race to split into a parent.
-
- refs #1582
-
-commit 4d0b4093e3d3c9fa0be2bbb01467579f05deddc5
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Wed Mar 11 03:44:22 2015 +0000
-
- Update visibility check to know about checkpoints.
-
- This should allow us to evict more pages while a checkpoint is in progress. We can evict dirty pages from a file once the checkpoint is finished in that file. Similarly for the row store specific obsolete update check.
-
- refs #1745
-
-commit 272daddb3734cd196cd303df4271f7e9e8f00cd3
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Wed Mar 11 02:55:56 2015 +0000
-
- Add a global checkpoint generation and track it per data handle. Not yet used, but will be used to allow more eviction during checkpoints.
-
- refs #1745
-
-commit 89db28287d7b212efb31203eca253fbf144ea207
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Mar 10 07:19:59 2015 +0000
-
- Mark statistics with the right aggregation flags (specifically cache dirty tracking and log preallocation). This helps wtstats graph generator.
-
- refs #1742
-
-commit e51cc35a88ed497f911a25fec9f4bfcc6617d6a2
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Mon Mar 9 15:40:59 2015 -0400
-
- SERVER-17471: Use uint64_t instead of long since long is compiler specific
-
-commit 105903c1beb0d16b40e243ed3624417c4fa0702c
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Mar 9 12:19:50 2015 -0400
-
- Remove pockets of case-sensitivity for configuration strings.
-
-commit 1e8c2b89de09462253231213329674b8769d11fe
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Mar 8 13:05:51 2015 -0400
-
- Change the statistics cursor WT_CURSOR.reset method to re-load statistics values.
-
- refs #1533
-
-commit 769dc5976fe0d2448fb4fde511c5bc29eea39bfb
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Mar 3 16:45:09 2015 -0500
-
- Change checkpoints to do first-fit allocation when we start writing the actual checkpoint blocks, that way if we delete significant space, the checkpoint blocks won't prevent file truncation.
-
-
-3.0.1, Mar 9 2015
------------------
-
-commit d654795bb763b95d14604b9b65d09ae79b8ee5b6
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Mar 9 05:35:33 2015 +0000
-
- Add a test case for checkpoint consistency.
-
- We recently fixed a bug where updating a page after the write leaves phase of a checkpoint had completed, then either checkpointed again or closed the database. We would end up with inconsistent data. That was the case because we weren't marking trees dirty all the time.
-
- This test case reproduces the scenario so we don't reintroduce the failure.
-
- refs #1735 SUPPORT-1248, SERVER-17506
-
-commit 0315ee75f712ed0ccddca0616339de93b17835a3
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Mar 9 11:21:06 2015 +1100
-
- Bump release version to 2.5.2
-
-commit 4235c69d37474fb4e14673e0ea99337659db948d
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Mar 9 08:32:50 2015 +1100
-
- When skipping a dirty page during a checkpoint, make sure the tree is marked dirty.
-
- refs SUPPORT-1248, SERVER-17506
-
-commit 8382d14f32efc53b19aecd596cab3ba0d682b22d
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Mar 6 16:10:37 2015 -0500
-
- Only increment/decrement from the current position on the first position set by our caller, that is, the position passed-in from our caller is the only position the caller has already seen.
-
- refs SERVER-17345.
-
-commit ed9c48c7b8fa5dd3362e417fda8337f1690585ed
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Fri Mar 6 22:48:45 2015 +1100
-
- Ignore empty child pages in column stores.
-
-commit 553a351ca6a81179c7f1db9c04d6f96aef0545ac
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Thu Mar 5 05:46:15 2015 +0000
-
- Don't allow LSM bloom create to block waiting for space in the cache.
-
- We are only accessing one page at a time, and allowing the bloom create to block can lead to LSM not making progress.
-
- Refs: #1720
-
-commit 5285b768de3237b4379f1526176efdeb55860971
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Mar 5 16:15:08 2015 +1100
-
- Remove the special "discard" path for trees marked clean: always go through eviction, which checks page modify flags. This may mean additional writes to internal pages when discarding a tree, but means that if a discard stops part-way through, the remaining in-memory tree has not lost any context.
-
- refs SUPPORT-1248, SERVER-17510
-
-commit 00edf7a47115923bc9f3eaa7eee84c9f7d6b0d77
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Mar 5 16:15:07 2015 +1100
-
- If a page is split by eviction, mark the tree dirty to avoid having dirty pages in a tree marked clean.
-
- refs SUPPORT-1248
-
-commit 57d3eba53fb91a0287374b9642b7cd4ef644854a
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Wed Mar 4 10:00:59 2015 -0500
-
- Add fsync before closing log files and after header. #1717
-
- refs SERVER-17451
-
-commit d970bfe6b1bed7d1919b800bf2d65a3789b74d6f
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Mar 3 09:38:59 2015 -0500
-
- Don't set eviction_workers_min/eviction_workers_max in the connection structure before checking the values are OK.
-
-commit 90f3f34a97440b6788a1a558e560a33fd116f166
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Tue Mar 3 09:26:38 2015 -0500
-
- Move writing into log worker thread. #1683
-
-commit 1266bbb1143b22fec6b1c255b3aade5d0506477e
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Mar 3 03:37:18 2015 +0000
-
- Fix a bug in the reconfigure API related to shared cache quotas.
-
- While fixing the bug simplify the code flow for reconfigure and caches/shared caches.
-
- refs #1712
-
-commit 1ebd617e6dfcf542983d62d9666c5a328dd41bb8
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Mar 2 15:31:05 2015 -0500
-
- Add overflow key/value counts to the statistics code
-
- refs #1520, #1703
-
-commit a2166ead528ef61da478db67e3c0209a6bef5ac6
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Mar 2 14:18:37 2015 +1100
-
- Allow memory_page_max to be at most 1/4 of the cache size not 1/2.
-
- If we let a single page grow to half the cache size, it's too easy for the cache to get pinned full if it is tiny to start with.
-
-commit b036921625e415bb66ac458922b81a9fae07740b
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Mar 2 14:17:12 2015 +1100
-
- Take all eviction candidates if we are aggressive.
-
-commit 0ddd3face0b99f5653001825bf3df0662ffcdc10
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Mar 2 13:07:08 2015 +1100
-
- Reverse the direction of the LRU walk each time the eviction server goes to sleep. Keep looking for candidates if eviction is stuck. Don't give up our walk position if eviction is stuck.
-
-commit cff10ba30f2eac379197e5d7dea49da2b8159890
-Author: Don Anderson <dda@ddanderson.com>
-Date: Fri Feb 27 15:05:24 2015 -0500
-
- Added test for wt dump on an index.
-
- refs #1709
-
-commit a9f0e3ac769a060a8c3b06bac71fbed0e5f46cc6
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Fri Feb 27 16:59:35 2015 +1100
-
- If we encounter a dirty page when closing a clean tree, switch to checkpointing.
-
- refs SERVER-17319, #1643?, #1404?
-
-commit bfcf5987b2b6f08d931d620330aed46837e3a8c2
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Feb 26 16:32:09 2015 +1100
-
- Add some paranoia to LSM around transaction checks: make sure we have allocated an ID before using it for anything.
-
- Recheck switch transactions before doing update checks in old chunks.
-
- refs #1641, #1701, #1702
-
-commit da0bc67c821282e9fd0da725279811b59b25a675
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Feb 26 16:32:09 2015 +1100
-
- Add some paranoia around setting row-store internal keys.
-
- refs #1582
-
-commit 5ea91f6ed0e0677530c5ab8215f81eb48ea307b8
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Thu Feb 26 04:51:25 2015 +0000
-
- Several optimizations to large page eviction:
-
- * Don't update the read generation on page in if it's set to oldest.
- * Clear the walk positions before the eviction server sleeps.
- * If only looking for pages that would block add them all to the queue.
- * If evicting dirty pages use the worker threads, not the server.
-
-commit 0eecd0a2d97771380ecbd7fd27bd44988db1148c
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Thu Feb 26 02:08:36 2015 +0000
-
- Fix a bug in checkpoint, where it could get an EBUSY return.
-
- The case that could return EBUSY was when checkpointing with a specific target, while that target was open exclusively or for a bulk load.
-
- Refs #1404 #1589
-
-commit 392a540deec817c5d6738b8e848a68882df3ac8a
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Wed Feb 25 15:03:50 2015 -0500
-
- If the LSN given doesn't exist, don't return an error, but do force recovery. Fix recover.sh to grep the CONFIG. #1700
-
-commit 59b699b7085868d1b12a41ae4cd7a01f25f6e865
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Feb 24 23:55:20 2015 +0000
-
- Handle the case where a large record pushes us over a page boundary and we spill across two pages correctly. The previous fix could let us span additional space.
-
- Fix another bug in the fixup code - ensure there is enough space in the temporary buffer when fixing up after the fist page is full.
-
- refs #1697
-
-commit ad8b58188961943e74c57b85e3b976aa03b79617
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Tue Feb 24 18:54:18 2015 -0500
-
- Set flags to SLOT_INIT_FLAGS on free. refs #1683
-
-commit 45e4c049044efc96c682f390466a35d22dac555f
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Feb 24 16:17:32 2015 -0500
-
- Move WT_PAGE.u.row.d next to WT_PAGE.u.row.entries, it reduces cache misses inside of row-search.
-
- refs #1665
-
-commit 3c6d7adf422f432ae117e2292dcce00cc3b531a3
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Feb 24 16:07:16 2015 -0500
-
- Turn off key prefix-compression and rebuild the key before doing the boundary split. The size of the key is likely to increase by a few bytes, and if the value is large enough to consume almost all of the buffer, we overflow the space available.
-
- refs #1697
-
-commit ca9ab16c320f6f154ff1fd3d0b65316f87e8bddc
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Feb 24 06:51:14 2015 +0000
-
- Fix a bug in reconciliation. In cases where we decide to squeeze one more large entry onto a page, we could have attempted to decrement space available negative.
-
-commit 777c35b074fce3656c14ca5770b424a65c719134
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Thu Feb 19 12:32:16 2015 -0500
-
- Add log worker thread to advance write_lsn.
-
- refs #1683
-
-commit ae686a225a011bac07119e2d66e837e08d5a3a0e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Feb 10 12:46:04 2015 -0500
-
- Sasha says (my paraphrasing): I simply rearranged the fields in the WT_PAGE struct, so that u.row.d and u.row.entries are close together at the very end of the definition of union u, and the "uint8_t type;" immediately follows that.
-
-
-3.0-RC10, Feb 24 2015
----------------------
-
-commit 2fdfb2bbed56e42e1717e567828c68d0b2eb868d
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 24 14:44:48 2015 +1100
-
- Review places that set/clear session->dhandle, replace with macros. Change callers to save/restore if they need to release a handle after a call.
-
-commit ce89b608835561b11ce4e525a5ebdad86558f115
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Feb 23 19:55:56 2015 -0500
-
- The statistics server has open handles and may be opening underlying handles, make sure we don't overwrite them, reference #1694.
-
-commit 8827d909d7f14e9ad767d909d65598508ff0025c
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Feb 23 09:39:21 2015 -0500
-
- Add an explicit barrier after setting the checkpointing value. (I'm pretty sure it's not actually necessary to have a barrier in the current code, but the barrier we rely on is in a different function and isn't always called, depending the eviction configuration of this file, I'd just rather be safe than try and debug this some time in the future.)
-
-commit 10abb2c47cffbcc215c3507256cee3e2cae2dd5f
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Feb 23 15:23:54 2015 +1100
-
- If eviction is walking a file with only one live page (e.g., the last page), be careful to step over it, or eviction's hazard reference can make forced eviction stall.
-
- SERVER-17344
-
-commit f72367f97357cf3e77f0d57d39992686b400ebe7
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Feb 22 15:40:37 2015 -0500
-
- Make sure we have a local copy of the start/stop cursor keys before calling into the underlying Btree range truncate function
-
- SERVER-17141.
-
-commit 09d345951d43e7ed928980048e5c5c927406a66b
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Feb 20 11:25:22 2015 -0500
-
- If WT_SESSION.truncate isn't given a start cursor, instantiate one, it's always faster to traverse Btree objects in a forward direction.
-
-commit ce38bc5b40f1aa3ffc07cdd2cf993e32143135b5
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Feb 20 08:57:57 2015 -0500
-
- We don't need to do a search in the row-store cursor truncate setup code, the WT_SESSION.truncate API code already did one.
-
- This does not mean WT_SESSION.truncate can't return WT_NOTFOUND in some cases, the first thing cursor truncate does is a cursor remove call in order to acquire the page's write generation information -- that remove call does the usual "discard my current reference and get a new one" work, which allows another deleting thread to race and remove one of the truncate's start/stop keys.
-
- refs SERVER-17141
-
-
-3.0-RC9, Feb 18 2015
---------------------
-
-commit bf3ee2cd064b46cf0175d75950c825aa1f42c694
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Feb 18 12:53:41 2015 +1100
-
- Flip cache overhead to apply to the allocated bytes rather than the total size. Include the overhead in stats so that tools (e.g., mongostat) report accurate cache full and dirty percentages. This also makes eviction triggers and targets meaningful: with the default trigger of 95% and overhead 8%, eviction was previously never triggered until the cache was completely full.
-
-commit f9e6f942cf73c8a53aaadbc587c1b7efad6cc832
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Feb 17 11:33:13 2015 -0500
-
- Coverity notes the TXN_API_END_RETRY macro has an unnecessary test for "(ret == 0)" at the end of the do {} while loop.
-
-commit c34a56f357e21d134a2d9d0fefc032544069d8d7
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 17 21:56:34 2015 +1100
-
- Allow the maximum number of eviction threads to be reconfigured. This was previously permitted by the API, but the array of thread contexts was not correctly resized, leading to segfaults.
-
- refs SERVER-17293
-
-commit 67527fc235406469e69dbaec3dcd571469e660c0
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 17 21:50:53 2015 +1100
-
- Make the eviction walk incremental: don't spend too long in any one file, fix tracking of whether we are making progress.
-
-commit 788265ed273c63183053e6325a9aa03c89c02860
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 17 21:48:44 2015 +1100
-
- Combine the various checks for whether a page can be evicted into one place.
-
-commit 748e7b0c58b358b14340bacae41f9c46f3c06f7e
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 17 21:47:57 2015 +1100
-
- Skip hot pages during write leaves: checkpoint will have to visit them anyway.
-
-commit a9de0f7ac8ad373d7aef6a480c69a2a7e0b55c59
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 17 21:18:41 2015 +1100
-
- Run recovery after crashing test/format in the recovery test.
-
-commit 4733961a3c1fa37988178d1b1dd4eb44d83b63f6
-Author: Thomas Rueckstiess <thomas@rueckstiess.net>
-Date: Mon Feb 16 12:50:26 2015 +1100
-
- fixes and improvements for wtperf parsing
-
- - convert wtperf microsec to millisec
- - don't skip monitor* files when parsing directory
- - parsing code reorganization
- - renamed wtperf stats fixture
- - added tests
-
-commit dc396e1cd64871219b9e5a1b6558707feb70706e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Feb 15 13:44:30 2015 -0500
-
- Clear the btree object statistics we're about to count, otherwise each cursor gets a cumulative value.
-
-commit 195b144bb37814b31cfa413029cda0b28f13f261
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Feb 13 12:06:36 2015 -0500
-
- Don't map WT_NOTFOUND to ENOENT unless a uri was specified, that's the only interesting case. Reference SERVER-17141.
-
-commit e9d7fee2c2c08985b8e2d2716e899853c5198290
-Author: Thomas Rueckstiess <thomas@rueckstiess.net>
-Date: Fri Feb 13 17:22:05 2015 +1100
-
- added support to parse wtperf files.
-
- they go into a separate stats section named "wtperf".
-
-commit 29d0d26fd1cd76392ea8225c1c4022ca54443737
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Feb 12 18:05:33 2015 -0500
-
- Ignore unexpected information in the metadata entry, the metadata entry might have been created by a future release, with unknown options.
-
-commit 05f07753059a4fa7f0f1bab7a107a9e6d17bf4af
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Feb 12 15:42:35 2015 -0500
-
- Remove the requirement of a HAVE_DIAGNOSTIC build for the verify commands to work (except for dump_offsets, that requires the btree debugging code and so won't work anywhere but a HAVE_DIAGNOSTIC build).
-
-commit 006ed9f17c7fc0fe65dc43717ed0239b3bac564c
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Feb 12 15:31:36 2015 -0500
-
- Add support for a new verify debug option, "dump_shape", that reports the levels of the tree.
-
- __wt_config_gets() returns WT_NOTFOUND when there's no entry, don't fail every command when DIAGNOSTIC #defined and debug options aren't set.
-
-commit 46b7721215856d08ca3a37f7ffc27c57b1d4c1d7
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Thu Feb 12 13:27:32 2015 -0500
-
- Add recover config setting and use it in the wt command. #1651
-
-commit 0305a51ffba383af13d6078d409a03b249c502c5
-Author: Don Anderson <dda@ddanderson.com>
-Date: Wed Feb 11 10:21:04 2015 -0500
-
- Add test to detect file ID problems in recovery. Refs #1622.
-
-commit fc0ff5a9ea09e54512353d2275126cb54dbc5451
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Tue Feb 10 13:02:28 2015 -0500
-
- Allow 'wt' command to run with or without recovery. #1651
-
-commit a26d87a53eb2ac2dcae9312b7979499c34c11613
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Feb 2 19:20:33 2015 -0500
-
- Replace wiredtiger_strerror_r with WT_SESSION.strerror, reference #1516.
-
-commit 33c146b51fdac86999e2eaa67f5636490eb441fb
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Feb 12 13:44:35 2015 +1100
-
- Disable aggregation across all open checkpoints if statistics cursors don't specify a checkpoint.
-
-commit 04ec3d021d2f8b08b69d3ea5d0f243f468c71f2e
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Feb 12 13:00:49 2015 +1100
-
- Move server thread waits to the beginning of their loops: check that we're still running before waiting. This makes more sense to me, but also fixes a problem introduced recently where the checkpoint server could hang on shutdown if the signal from the closing thread got lost.
-
-commit 85aae87cb1e019b0bcac4854e6508f11104f5339
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Feb 11 12:34:03 2015 -0500
-
- Mimic Alex's fix in 152a0ef, to fsync created files to disk, for truncated
- files, never surprise the upper-layer.
-
-commit f445f3bf63e3fa096479c5963f75d91e02f9b616
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Feb 11 17:49:04 2015 +1100
-
- If logs crossed the threshold size while we were taking a checkpoint, don't take another one immediately.
-
- Should help with SERVER-17206, where we saw two checkpoints every iteration.
-
-commit 0d85a9716b786de5fc90c00fb31765ade8aefd1f
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Feb 11 17:48:03 2015 +1100
-
- Check if a page was recently split before doing forced eviction. We used to do this, but it got lost in a recent reorg of __wt_page_release. This change should mean that after an in-memory split, application threads that are appending have time to move to the new page at the end of the tree, rather than getting stuck trying to force out a page.
-
- SERVER-16938, SERVER-17121
-
-commit 545d064fd4cbb0b35dc536e772c60b26a193d3f2
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Feb 11 17:45:26 2015 +1100
-
- When doing truncates, if we see a clean page in memory, try to evict it before truncating. It should be cheap (just freeing memory), and if the eviction succeeds, the fast truncate code can kick in and mark the whole page deleted immediately. Otherwise, truncate will mark each record on the page deleted, and the next time through will try to force that page out, which has to go through reconciliation to figure out that all of the records are deleted.
-
- SERVER-17157
-
-commit 9bbb8595abd6ac962a0debf20a6cdcef73d83855
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 10 16:09:36 2015 +1100
-
- Allow size-limited LSM trees to have Bloom filters, based on the normal configuration.
-
-commit c040f84a765c7c39f03e173a555eb50f85e2e698
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 10 15:58:02 2015 +1100
-
- Re-enable the global setting to disable LSM merges.
-
- refs #1657
-
-commit 8f14899ba0ce5b1a8df689e3c68db9a68bfeee66
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Feb 10 04:36:57 2015 +0000
-
- Fix a bug when re-opening an LSM tree.
-
- We could have attempted to update the last chunk that is already on disk.
-
-commit d8263d46c1aa136d24ef194a8f62f0b02b92b9b0
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 10 15:11:59 2015 +1100
-
- Improve LRU eviction of large pages: don't give up because a large page has recent updates: push on and try to do eviction and restore updates.
-
-commit eb02caa2564a18e857d18ef4b3f25683b438111c
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 10 15:01:37 2015 +1100
-
- Fix a local variable read when looking for pages evict racing with a page becoming dirty.
-
- refs #1660
-
-commit da4d99e7ad57057a1b8397629d59a3c83c28de21
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Feb 10 02:32:25 2015 +0000
-
- Fix a bug in LSM cursor open.
-
- The bug caused us to re-open more cursors than necessary in open.
-
- Related to fix: 439a655e
-
-commit eec16c3052af107bbe57aaf547eb8e70d2de4966
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Feb 10 00:26:37 2015 +0000
-
- Don't do LSM merge throttling if merges are disabled.
-
-commit fcee4c8ce0b5db9d3340169deb321601b81f4a1b
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Feb 9 14:04:02 2015 -0500
-
- Track splits during eviction by data-source as well as by connection.
-
- Don't double-count in-memory splits (we're incrementing cache_eviction_split in the underlying split-parent routine, not in the caller, so it's counting both normal and in-memory splits). Instead, cache_eviction_split is normal eviction splits, cache_inmem_split is in-memory
-splits,
-
-commit 3d1f9eace79b1aff84369d0caee245f9d6d96a60
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Feb 9 06:25:36 2015 +0000
-
- Add a mode to LSM where we can limit the size of data in the tree.
-
- A feature request to allow for a high insert throughput into a table with a size limitation.
-
- Adds a new configuration option to WT_SESSION::create which is lsm=(chunk_count_limit=0), default to 0 which is disabled.
-
- Refs #1652
-
-commit c63ba34c915d95c156aaf6c47a04fe6d361b91ad
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Feb 9 14:07:57 2015 +1100
-
- Don't double-count the on-disk header size when setting split boundaries.
-
- refs #1655
-
-commit 152a0efdbd3ea66b142f52eed3c9224437143eec
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Feb 9 12:25:10 2015 +1100
-
- Fix a bug in table create. A crash could cause recovery to break.
-
- Refs SERVER-17204
-
- The bug is that we weren't doing an fsync of the file after it was created. Recovery assumes that if there are records for a particular file, then it will exist on disk.
-
-commit 4d50f5878073e582567848ae03ee506bb5058227
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Feb 9 00:43:27 2015 +0000
-
- Remove obsolete updates every time we add a new update.
-
- We used to only do the check when the cache was full. That could lead to update chains growing immensely long, which is bad.
-
- Refs: #1647, SERVER-17195
-
-commit e891a1f312850bcaaf5183f3fd2e091567044a96
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Feb 8 17:59:49 2015 -0500
-
- If we find a "removed" page, clean or dirty, leaf or internal, fast-path eviction, it helps with append-only workloads.
-
-commit ab2a7e9b397adf657081458e11f3dc472b10fd2b
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Feb 5 15:54:01 2015 -0500
-
- There's a problem that went in in #1282, the key difference is that we are setting a split boundary at the end of the first page when there is more than a page worth of data. See also #1630 and #1631. This is an alternate approach to #1631: the real change is to fallthrough into the split case if the next item won't fit, callers of the split code can't handle failure from split, it has to create enough room for the next item to be entered into the buffer.
-
-commit 90a352717a45a40d047b33c9fb00e7174e1ae04f
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Thu Feb 5 12:59:08 2015 -0500
-
- Initialize first_lsn if we have no logs. #1638
-
-commit 7cc7efb75c90e778f9757b954ad3ec85912b58fd
-Author: Don Anderson <dda@ddanderson.com>
-Date: Thu Feb 5 12:20:55 2015 -0500
-
- For wt printlog, make operations into a JSON array. Without that, any tool that parses JSON is almost certain to merge successive values of repeated fields.
- Refs #1438.
-
-commit 5bf11d893548804b890836a3d9ef4335c4319bb7
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Wed Feb 4 15:46:30 2015 -0500
-
- Add name_hash and hash bucket queues for fh and block. #1643
-
- SERVER-17078
-
-commit 3b0c18f612c9cf4d61bc13785ff7125fa67b265a
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Wed Feb 4 06:27:07 2015 +0000
-
- Keep filling pages in reconciliation until we hit a boundary.
-
- This reverts some of a change for #1282 (without reverting the functionality in that change, AFAI can tell).
-
- Refs #1630
-
-
-3.0-RC8 Feb 4 2015
-------------------
-
-commit d8b7f0b8db92a2ad6d64b95cafeaf20f0a90c8ce
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Feb 4 16:00:11 2015 +1100
-
- Updates should always mark pages dirty (before checking for obsolete updates to free).
-
-commit 0947f82e01587836277d911b147bc98eefb58507
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Feb 3 10:28:00 2015 -0500
-
- Fixes for split cache accounting: multi-page splits weren't correctly accounting for the allocated WT_REFs, insert splits weren't correctly handling the new right-page's instantiated key (the parent needs to be incremened by both the left- and right-hand page's keys, and cannot assume it's the same size as the original WT_REF's key), insert splits need to increment the parent page's WT_REF size by two, not one.
-
-commit df96addef5f3ffcb495b4bf54390cf3fd41ac924
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Feb 2 16:45:46 2015 +1100
-
- In recovery, track the maximum file ID in the metadata, regardless of whether there are any updates to roll forward.
-
- Previously, we tracked the largest file ID that was updated in the logs being rolled forward. It was usually the case that the most recently created file was also the most recently updated, so that calculation usually worked and wasn't detected until the repro in SERVER-17142 that created tables, did a clean shutdown and restart, then created more tables and did a dirty shutdown and restart, which was rolling forward updates into the wrong tables.
-
- refs SERVER-17142, SERVER-17131(?)
-
-commit 71f1559c91ed119082ebe42772da15e28915e1c8
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Feb 3 10:40:27 2015 +1100
-
- Start with clean trees so we can detect updates racing with sweep.
-
- Use a deleted ref to a leaf page that is created on first update, which is the same state the tree should be in if an empty leaf page is evicted. The only wrinkle is that bulk operations expect to find a leaf page in the tree: create it explicitly before the bulk insert starts. This was probably a bug before: if we had created a tree and kept it around for long enough with cache pressure before a bulk load started, the initial leaf page could have been evicted.
-
-commit 8545c4b3b7f5ed306215c82f1ad1cbe3664f0c50
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Feb 2 17:13:09 2015 -0500
-
- Make the "split to deepen the tree" configuration values real, stored
- in the metadata file, but they remain undocumented for now.
-
-commit fb769dafee4aca91a60a28cd89317c268ac79d4f
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Feb 2 16:36:22 2015 -0500
-
- WT_CELL_ADDR_DEL is 0, so we can't test vtype against 0 to know if it's
- been set or not. Reference SERVER-16866.
-
-commit feca80738c1b9103b4faa04ddb0718344347f640
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Mon Feb 2 13:53:21 2015 -0500
-
- Wrap calls to functions using pindex with WT_WITH_PAGE_INDEX.
-
-commit 23f2e1ba0680a2e8fa7a081f1b46e1ae2ab220d4
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Feb 2 17:18:03 2015 +1100
-
- Once we decide to force-evict a page, do it directly rather than setting read_gen and hoping page release agrees.
-
-commit 5f00de07b5bad20a6ffb5ec7d412c4ca0b10c64f
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Feb 2 17:11:27 2015 +1100
-
- split_gen paranoia: always increment split_gen once per split, use the allocated value to check for existing readers. Make sure that publishing a split_gen doesn't miss an update.
-
-commit 10a74d6af4f945e34368bc5754797ef1d684d8ab
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Feb 2 16:52:34 2015 +1100
-
- If discarding a tree for sweep races with an update, give up the discard gracefully.
-
- refs #1618, SERVER-17048
-
-commit a2d20dc49cac870977d91213a7fe6dabf362ce70
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Feb 2 16:45:46 2015 +1100
-
- In recovery, track the maximum file ID in the metadata, regardless of whether there are any updates to roll forward.
-
- Previously, we tracked the largest file ID that was updated in the logs being rolled forward. It was usually the case that the most recently created file was also the most recently updated, so that calculation usually worked and wasn't detected until the repro in SERVER-17142 that created tables, did a clean shutdown and restart, then created more tables and did a dirty shutdown and restart, which was rolling forward updates into the wrong tables.
-
- refs SERVER-17142, SERVER-17131(?)
-
-commit b0a828b262a2d0d3cf1361eed98aa25a1168a7a6
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sat Jan 31 12:59:34 2015 -0500
-
- We no longer calculate allocation overhead per allocation chunk, revert the workaround the problem with page memory size calculations during splits where we forced the new parent page memory size to 5% of its current value; reference #1564, #1565. This fixes a problem where 5% of a page's memory footprint isn't large enough to accommodate the cache decrements that will be done in the page's future, leading to page underflow.
-
- Minor cleanups: we no longer calculate allocation overhead per allocation chunk, the macro WT_MEMSIZE_ADD is no longer needed at all, and WT_MEMSIZE_TRANSFER is renamed to WT_MEM_TRANSFER.
-
-commit b640366c28fc66744e482c20c16973cb052aef8e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Jan 30 10:19:31 2015 -0500
-
- I believe we can race with pages being marked clean or dirty, which means we need to entirely divorce the page's dirty-byte count from page state: the page's dirty-byte count is just a value that tells us how many dirty bytes this page has contributed to the cache's total dirty-bytes count. Sync the cache's information to that value when possible, but don't worry if we can't.
-
-commit d02ea7246ec33e05b5fd60c499fea3ffe25c57d2
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Fri Jan 30 17:38:09 2015 +1100
-
- Use reads to measure cache pressure with shared caches. We previously tracked writes, which were skewed by checkpoints.
-
- refs #1569
-
-commit a326c3ba10e0d299944a650b890f8c2d851db34a
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Jan 29 17:19:06 2015 -0500
-
- Simplify the cache calculation when a page is marked clean, use the page's dirty-byte count (which allows a race between the page being marked clean and being re-dirtied).
-
- This branch is still not correct, but appears to be able to run the CONFIG from #1582 without underflow for a much longer time).
-
- Reference #1605.
-
-commit 1c60c4966dd68ea2bf05ebe62e3f1d8de1a7d033
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Jan 29 14:33:36 2015 +1100
-
- Use a copy of the oldest transaction ID when sweeping cached overflow items. Otherwise, we could free structures that are still hooked into the skiplist.
-
- refs #1615
-
-commit 42724267278c64f5af68b281c9ee5742d1a56d72
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Wed Jan 28 10:31:51 2015 -0500
-
- Adjust logging yield and timeout values. #1610
-
-commit ae102f4fe604f7fd547dece8ee138e8292d4f07c
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Jan 28 17:40:06 2015 +1100
-
- Cleanup accounting for update lists when restoring updates to evicted pages. Previously, we only accounted for the first update in a list.
-
- refs SERVER-16997
-
-commit 4adf9c929b1b46f273239214b4e2757fcfdb8f96
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Fri Jan 23 18:29:15 2015 -0500
-
- Windows Install Documentation
-
-commit 8faa218d27e7f21091f0b51a973f27047db1d950
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Tue Jan 27 13:47:24 2015 -0500
-
- MCI configuration update
-
-commit 422cbb6cea5fa5be6829044215ae46dc10be5f70
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Mon Jan 26 16:11:29 2015 -0500
-
- Add Install Target to SCons
-
-commit 41e7ab083d79a650e93a34d09e01e973ca4100d9
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Fri Jan 23 15:54:40 2015 -0500
-
- WiredTiger DLL support
- - Examples that only depend on public API use DLL now
-
-commit 23b2493e75cd166075eaccdaef75c8beee4576db
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Fri Jan 23 15:50:16 2015 -0500
-
- Scons Improvements
- - Added --enable-attach, --enable-diagnostic, --enable-verbose
- - Renamed --enable-swig to --enable-python for consistency + swig cleanup
- - Renamed wiredtiger static library to libwiredtiger.lib
-
-commit 96ab0ef67eee20fa75fa6d52c97d98bc119b74ae
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Thu Jan 15 15:07:01 2015 -0500
-
- Struct alignment and packing for MSVC
-
-commit f3b65997ece52382eed91730416d5f919bea79cd
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Fri Jan 9 10:49:59 2015 -0500
-
- Fix huffman config and add huffman tests. #1536
-
-2.8-RC7 Jan 27 2015
--------------------
-
-commit 2b4172f17008ff36dbeb50cadaf4fb97fc859e4e
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Jan 27 15:50:09 2015 +1100
-
- Revert a workaround for splits during truncate.
-
- refs #1583, #1563, SERVER-16868
-
-commit c2e108e2774ae79504579bcdca33f26fcff8cb07
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Jan 27 09:58:32 2015 +1100
-
- Change recovery to start from the checkpoint LSN in the metadata. Don't assert that we see a checkpoint complete in the available log: if the application crashes in between syncing the metadata and writing the final checkpoint record, there is no need to roll anything forward but we don't have the final checkpoint.
-
- refs #1529
-
-commit 2555e80d2020ba9833c436a22d1031f6c5778a64
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Jan 26 14:31:25 2015 -0500
-
- Coverity CID 50796 (#1 of 1): Unintentional integer overflow (OVERFLOW_BEFORE_WIDEN)
- SERVER-17001
-
-commit 1ce3b94d6e40d37a77e62eda500f286bd3816eb9
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Jan 26 15:56:25 2015 +1100
-
- Grab the table list lock while building the list of handles to checkpoint.
-
- This avoids a potential deadlock during compact operations and/or checkpoints with a target list (and an assertion about lock ordering in diagnostic builds).
-
- Note that nested locking is not ideal: the medium-term fix here is #1598.
-
- refs #1589, SERVER-16967
-
-commit db3943563a87c3e4c42445ae9f3a07efacfdf4ac
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Jan 26 14:54:47 2015 +1100
-
- Free WT_REFs deleted by truncate. We were doing this when a page spontaneously became empty, but not if the "fast truncate" code kicked in.
-
- refs SERVER-16921
-
-commit 2063efb22c3c29b980f86f7fee77b6d03ba63ec1
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Fri Jan 23 16:21:06 2015 -0500
-
- Don't count pages evicted by a worker thread as an "application thread" eviction; add a new statistic to distinguish between the server itself evicting pages and the eviction worker threads evicting.
-
- Don't increment the eviction counters unless we find a page to evict, __evict_lru_pages() gets called a huge number of times in any workload where eviction is happening.
-
- Reference SERVER-16997, SERVER-17020.
-
-commit 3abb99d58aaa46b0b3fcd338293a668422e3fcaf
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Fri Jan 23 15:05:16 2015 -0500
-
- Close Thread Handle after thread join on Windows
-
-commit 7d677aedfdcaa5458e900e556b662def460d0281
-Author: Don Anderson <dda@ddanderson.com>
-Date: Fri Jan 23 08:52:13 2015 -0500
-
- Fix drop index on a newly opened session.
- Fix __wt_schema_open_index to return WT_NOTFOUND when opening a single index. This fixes opening a cursor on a non-existant index.
- Refs #1567.
-
-commit 3626081dff24e1448281d10658752b996897ca82
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Jan 22 18:08:15 2015 -0500
-
- Add the cache_overhead configuration string to allow applications to configure their cache overhead.
-
-commit 4843cd78e7f90937ebdb23f84fbd7c133a7e5256
-Author: Don Anderson <dda@ddanderson.com>
-Date: Thu Jan 22 10:40:11 2015 -0500
-
- Prepend underscores to SWIG methods that could have name conflicts
- with WT internal names. refs #1574.
-
-commit ebb1d9402c0ce2911069b0437d71766b92c3dc12
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Wed Jan 21 12:57:20 2015 -0500
-
- Add log code to ensure write-no-sync. #1585
-
-commit 44fa4fbff95d0689b20c3fe3f4a55202554f0d9f
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Jan 19 14:25:39 2015 -0500
-
- Make compact more aggressive about finding blocks to move.
-
-2.8-RC6, Jan 20 2015
---------------------
-
-commit ab1d63d3aa2371ce53287c6c6c77833eb281a38a
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Tue Jan 20 15:37:46 2015 -0500
-
- Check for valid log_fh handle in wt_log_write. #1580
-
-commit e2de971061abea9451e92d60f0870136c9c0af42
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Jan 20 13:24:06 2015 -0500
-
- Quit page eviction immediately if we're trying to evict a tree, that is, an internal page that has other internal pages as children.
-
-commit 6f3c5a933ef8ce79efc03a22a8c03526ffb2197b
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Jan 19 12:38:24 2015 -0500
-
- The size of the file is decreasing each time, so compacting 10 times, at 10%, is not sufficient to drive a file to its smallest size. The right fix is probably to get better information from the block manager as to exactly how much the size of the file has decreased, but that's messy, especially when you consider the checkpoints requires to get to that smallest size. For now, do 100 compaction attempts instead of 10, and depend on the no-progress state and/or the compaction timeout to limit the amount of time we spend here.
-
-commit 72172b088fba6769866aecabba8176303140f5c4
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Jan 19 10:25:13 2015 -0500
-
- Coverity 1264611, memory leak (WT_RET that should have been a WT_ERR).
-
-commit f61f984cf5241ac54bc2ea368c8c15e0cdfa91aa
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Sat Jan 17 22:25:02 2015 +0000
-
- Fix a deadlock opening statistics cursors.
-
- Refs #1575 and JIRA SERVER-16738
-
-commit c5fa51a0f18e4117d9f7b841de86eb35af751264
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Sat Jan 17 09:07:04 2015 -0500
-
- Log close thread needs to wait for any outstanding writes. #1571
-
-commit 9cd8120f491595ad6ac1c25c4b154ad6556b5fe7
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Sat Jan 17 09:49:09 2015 +1100
-
- Close the session for the log close server thread. Fixes a leak detected by address sanitizer.
-
-commit bd7364ea9a0542bee61db0a89e771faf814f6f53
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Jan 16 21:03:55 2015 +0000
-
- Fix a bug in raw compression, where we were overflowing memory.
-
- We weren't growing the buffer enough when adding new items in.
-
- Refs SERVER-16664
-
-commit 76addf73581c53f24462ab5fd724048aec36eaf3
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Sat Jan 17 05:48:03 2015 +1100
-
- Have WT_CURSOR::equals return 1 when cursors are equal, 0 when not.
-
-commit b2841dfc015d9502e1def870605968144b935570
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Thu Jan 15 21:58:39 2015 -0500
-
- Add log thread to fsync and close log files. #1560
-
-commit ebb93969ebfb6b9bb9dc60621933f2fbeac4b472
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Jan 15 22:47:52 2015 +0000
-
- Don't do memory adjustments for the WT_REF's WT_ADDR structures, we don't do those adjustments in other places we set addresses.
-
- Workaround the problem with page memory size calculations during splits by forcing the new parent page memory size to 5% of its current value; reference #1564.
-
- Minor cleanups/renaming of the code instantiating the WT_REF structures during a tree-deepening split to clarify what's going on there.
-
-commit e0031209183c880fb1a1b99399013e7675a75e88
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Jan 16 09:26:34 2015 +1100
-
- Don't look at a page after it may be freed during split.
-
- During the process of doing a split we switch the ref to WT_REF_MEM - after which it's no longer safe to refer to the page. Shuffle the code so that we don't.
-
- SERVER-16868
-
-commit b6d7532cbf823d537b8f1733169fe4de08173c09
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Thu Jan 15 16:55:00 2015 -0500
-
- Only advance sync_lsn to the end of our write. We waited until the log->sync_lsn is advanced into our file. It was a bug to set the sync_lsn to the current write_lsn as that can be too far ahead in a new log file when earlier log files aren't done yet.
-
-commit 85851933a938c53dfa57d1621cab1a959db672eb
-Author: Thomas Rueckstiess <thomas@rueckstiess.net>
-Date: Mon Dec 15 11:04:43 2014 +1100
-
- wtstats.py: removed python-nvd3 dependencies, rewriting with HTML template
-
-commit 4c26d2324bae1d7030b0142d50dbd2ccf11ddeb6
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Dec 11 19:32:50 2014 -0500
-
- Add support for a WT_CURSOR.reconfigure method, reference #1381.
-
-2.8-RC5 Jan 15 2015
--------------------
-
-commit 2e54a27683c5e2fd88918575383c76d3f60c3c78
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Jan 15 07:17:21 2015 +1100
-
- Workaround a read-after-free involving eviction during truncates. We were implicitly relying on first_dirty_txn to prevent pages being immediately force-evicted by truncate. The bug is not fully understood, but this change restores the previous window where reads can complete before the page is evicted for real.
-
- refs BF-759
-
-commit 8a1bfe3c35f0c1d90ea3e8e70c2aae8dff1fdbb3
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Wed Jan 14 15:07:39 2015 -0500
-
- Force log file closes to go in sequence. #1555
-
- Update the sync_lsn after sync'ing and closing an earlier log file and make sure archive doesn't try to remove a file that is still in use.
-
-2.8-RC4 Dec 22 2014
--------------------
-
-commit fbb96d94cdba9a28f5c5d737ce6c96543f3289f4
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Dec 22 15:59:46 2014 +1100
-
- Use the original page's first_dirty_txn when restoring updates to match what we do for in-memory splits.
-
- refs #1475
-
-commit 4df72e8e20139ddf667e1f0d3b6b7dcf91deb006
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Dec 22 13:12:07 2014 +1100
-
- Avoid EBUSY returns to verify and salvage caused by checkpoints. The "fix" involves blocking checkpoints while salvage or verify are in progress.
-
- refs #1404, SERVER-16457
-
-commit 864f3495721b1311b49df19ee241bfca9adf0863
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Sun Dec 21 20:47:52 2014 -0500
-
- Make the cache bytes-written and bytes-read match, both should
- ignore compression. Reference #1505.
-
-commit 995d6f8c26ae19013a1eb921fd871481ca643f47
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Mon Dec 22 12:42:46 2014 +1100
-
- Eviction should do update-restore if upper layers are trying to force out a page, regardless of its size. Also, only look at ref->page after checking for exclusive access. It is possible (but very unlikely) that a child page pointer could be replaced in the window where we are checking hazard pointers.
-
-commit d4abc51ea61211f90f4b70a0486442264ededc27
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Dec 22 10:43:04 2014 +1100
-
- Fix a bug where a custom extractor terminate was being called twice.
-
- Resolves issue #1503.
-
- Clarify the custom extractor and collator terminate documentation while I'm here.
-
-commit 16972ef63de1283d85146530c35f522b053e2c1e
-Author: Don Anderson <dda@ddanderson.com>
-Date: Fri Dec 19 09:56:47 2014 -0500
-
- Remove version numbering from the pkg-config file. We don't create include files that are named by version. Programs linked using -lwiredtiger will follow the symlink to wiredtiger-a.b.c.so, so their referred library name is forever stamped as wiredtiger-a.b.c.so, which won't conflict even when we ship wiredtiger-a.b.d.so.
- Refs #1458.
-
-commit e913b0811114d65b543cd78824e809eb487fd330
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Fri Dec 19 17:19:58 2014 +1100
-
- Check that handles are not being walked by eviction before discarding.
-
- refs #1497
-
-commit 0d21e437917bc7cf08393852a3074957431ea30e
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Dec 19 15:44:15 2014 +1100
-
- Use the eviction server to write pages with READGEN_OLDEST set.
-
- Even before the eviction trigger has been reached. This should mean that we clear those pages out of cache earlier, and hopefully will save application threads from doing the evictions (at least sometimes).
-
-commit e0adfba3c4011c49b73ff3e4a165a4a938f69cb3
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Fri Dec 19 12:23:21 2014 +1100
-
- Don't try to write leaves from the sweep server. Previously, this was done without locking the handle, and so could race with LSM discarding a handle.
-
- We know the handle has been idle, so there is a good chance that a checkpoint has run since the last update and the write leaves was wasted effort. If not, this change will keep the handle locked for longer preventing new opens, but it has been idle for a while anyway.
-
- refs #1495, #1497 (maybe)
-
-commit 87328a8d5c1d4a201a1df604ba32a87863948bbb
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Thu Dec 18 14:51:51 2014 -0500
-
- fix test_salvage on Windows
-
-commit 1953776ada137f3deae50169bf889d2063b353d3
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Dec 18 16:45:06 2014 +1100
-
- Don't try to set and clear session->split_gen in WT_INTL_FOREACH_BEGIN: there are too many of those loops to ensure that none of them skip clearing it. Instead, make sure all calls are wrapped in WT_WITH_APAGE_INDEX.
-
- refs SERVER-16546
-
-commit 235f747e2df80d9899497595a2b649e7d6df8601
-Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
-Date: Wed Dec 17 14:11:41 2014 -0500
-
- snprintf - Implement a custom version of snprintf match the truncation behavior of C99 standard snprintf until MSVC supports snprintf.
-
-commit 857a6fd0c4b6b001c78cbbc507674e2129029dff
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Dec 17 15:51:41 2014 -0500
-
- Check the block header checksum before we clear it, it should be the same as the passed-in checksum, and if just those 4 bytes are corrupted, we wouldn't noticed. SERVER-16457.
-
-commit dfa706056c4a359f7f894047bc9e5399efcec776
-Author: Don Anderson <dda@ddanderson.com>
-Date: Tue Dec 16 15:58:34 2014 -0500
-
- Some refactoring of python packing. More checks for standalone unit tests.
- Refs #1429.
-
-2.8-RC3 Dec 17 2014
--------------------
-
-commit bb064847e1c45f2b396d3f65f4e08cd10f33ed6e
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Dec 17 15:45:23 2014 +1100
-
- Detect write-write conflicts before no-overwrite cursors decide to skip an update.
-
- refs SERVER-16351
-
-commit 91abf8e35d5246a653bd9615ffd9723d87999c38
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Wed Dec 17 13:36:02 2014 +1100
-
- Add support for none configuration string to log compressor.
-
- To be consistent with block compression configuration.
-
-commit 5438fee4942b4dbf484799dad6e12e042d253e99
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Wed Dec 17 12:09:33 2014 +1100
-
- Return an error if a shared cache configuration is set, but not enabled.
-
- Check for a configuration via shared_cache=(size=).
-
- Refs #1487
-
-commit 390a5b71b25492dc3030e908a65a11a04401852b
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Dec 16 15:32:09 2014 -0500
-
- We documented that huffman_key and huffman_value took "none" as an argument, but they didn't.
-
- Worse, if key was set but not value, we'd set value anyway, free of charge, and if value was set but not key, we'd fail. I doubt this is a problem (it's pretty surprising if anyone would set key but not also set value).
-
- Use __wt_config_gets_none() to support the "none" setting, re-work the logic to ignore keys or values that are zero-length.
-
- Reference #1417.
-
-commit 662e26eeb31f76f2c4aeebf6690c9056612de32e
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Tue Dec 16 14:20:01 2014 -0500
-
- Atomically create all log files and move them into place. #1482
-
-commit 5c30d62dbf7ec0976d6ec4d2aed4ba272aadd499
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Dec 15 19:41:05 2014 -0500
-
- If we race with the logging thread and get to __wt_logmgr_destroy() while __log_archive_once() is still using conn->log_path, we can free it out from under the running server. If there's a logging thread running, don't free conn->log_path until we've joined that thread.
- Reference #1480.
-
-commit d77d35db407fd74c266bdb728b12c74fdab26ba2
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Mon Dec 15 18:33:54 2014 -0500
-
- Even if we don't track any overflow pages during our read of the file, we still have to process the list of pages looking for leaf pages that reference unavailable overflow pages, no overflow pages doesn't imply there are no references to overflow pages.
-
-commit cedf8cfe69bf964629aab498feb20a0b1ab77bc0
-Author: Don Anderson <dda@ddanderson.com>
-Date: Mon Dec 15 11:54:15 2014 -0500
-
- Fix use of 'compressed' flag for printlog.
- Added printlog call to test case for log compression.
- Refs #1472.
-
-commit 3210b11cf7bfb79f3ed52cd1c17a13c644a82e7a
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Mon Dec 15 11:02:10 2014 -0500
-
- Fix memory leak. Always free log_path. #1473
-
-commit 097c61e5f3326bc71f2d645b3f539c5c6d1ae3fb
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Dec 15 05:58:06 2014 +0000
-
- Allow printlog to work without recovery.
-
- It now works even if compression or a custom path are setup.
-
-commit b8921272755ce66d09ab2a001745573420bd41ac
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Dec 10 15:40:06 2014 -0500
-
- We can't use the corrected page size to calculate the buffer's space available, but we don't have a page size either. We do know how much space we added, so use that to increment the space available.
-
- The raw compression handler can no longer pass a page size buffer to the underlying compression function, because a single key/value object could be larger than the page size, instead, pass a buffer of the same length as the source to compress, whatever that is.
-
-commit 402041727de02931be1dd385f3c970f31a53341c
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Dec 10 10:53:27 2014 +1100
-
- Remove the min / max bounds on overflow sizes.
-
-commit 5088ee53fce569915e8de8c168da50cff7991ec1
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Tue Dec 9 16:22:20 2014 -0500
-
- Separate the btree maximum key/value sizes from the underlying page size, reference #1282.
-
- Deprecate the internal_item_max and leaf_item_max configuration strings, replace with internal_key_max, leaf_key_max and leaf_value_max.
-
- Remove examples/c/ex_file.c (there's no real need for a "file" URI example, and it's easy to replace the one place the documentation used it).
-
-commit e1e187e8fdfb48526f2a62e3f0f48072c30db53e
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Dec 16 10:29:08 2014 +1100
-
- Update the swept handles statistic any time we close the underlying handle, regardless of whether a session still references the data handle.
-
- reds #1460, #1466
-
-commit 080b34fde5de97459c383c67ba93d9fdc88090a8
-Author: Don Anderson <dda@ddanderson.com>
-Date: Fri Dec 12 11:13:22 2014 -0500
-
- Allow pruning scenarios with different limits for default vs. long runs. The pruned scenario list now matches the original ordering. Tests now print with both the scenario number and the scenario args. This should make it easier to diagnose and debug problems that effect only certain scenarios. Refs #1461.
-
-commit d5b88e08e2f6e39d098cfff3c013f4aa035c88bc
-Author: Don Anderson <dda@ddanderson.com>
-Date: Thu Dec 11 15:13:11 2014 -0500
-
- Changed python test suite to allow for shorter runs by default, with a --long (or -l) option for the complete runs. Txn02 in particular now runs only a small number of scenarios by default. Also added a @longtest("description") decorator for individual tests that can be marked to be run only under --long. Refs #1461.
-
-commit 2f37332e5bbd14823f0c78ad38672dbce074e87f
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Dec 12 17:21:50 2014 +1100
-
- Ensure metadata table is open at start of checkpoints.
-
-commit 2cb10882f4f7189a3c2de4d7e187117873fded32
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Dec 12 17:07:11 2014 +1100
-
- Switch to stashing the metadata dhandle, not the btree.
-
- Also update checkpoint to use the saved handle instead of
- searching for it again.
-
-commit 42c05161cf8cc74606b259ceeeb41dd38ea7fb4e
-Author: Susan LoVerso <sue@wiredtiger.com>
-Date: Thu Dec 11 15:24:14 2014 -0500
-
- Use dhandle hash lists in more places. Adjust sweep timings. #1460
-
-commit 7fb6315e45e74f0bef0a04505018e1ab0b68d144
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Thu Dec 11 23:06:19 2014 +1100
-
- If LSM search_near finds a matching tombstone, step the whole LSM cursor next to find the closest key. We can't step individual chunk cursors, or we could return a record that is deleted in a more recent chunk.
-
- MongoDB BF-694, BF-700
-
-commit 5f6bbc898564aefb312255555abd34202cb98815
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Wed Dec 10 14:39:25 2014 +1100
-
- Track whether eviction is making progress regardless of whether the cache is 100% full. Otherwise we can get into a tight loop. Use the count of pages evicted rather than a flag, now that there are multiple eviction threads.
-
-commit cbe9e9bdbc508f95076b8097d41bb4cc799eab1c
-Author: Don Anderson <dda@ddanderson.com>
-Date: Tue Dec 9 19:11:52 2014 -0500
-
- Change timing to allow archive thread to complete on tests that
- do archive. Since this can make each test run substantially longer,
- reduce the number of tests that are doing archive from ~4000 to
- something under 100. Refs #1452.
-
-commit 62af85890179abb9fda17a619fcd5ae69fb369e0
-Merge: b83bf08 b24c7af
-Author: Michael Cahill <mjc@wiredtiger.com>
-Date: Wed Dec 10 11:03:48 2014 +1100
-
- Merge pull request #1449 from wiredtiger/lsm-switch-simplify
-
- Improve and simplify the LSM switch logic
-
-commit 7e0f7d7b803f9af04ad10b2bec6ef5073aa79248
-Author: Don Anderson <dda@ddanderson.com>
-Date: Tue Dec 9 16:04:48 2014 -0500
-
- SESSION->drop with "force" of nonexistant index/colgroup should be silent.
- Refs #1436.
-
-commit be364821d75c0c42169d79c486fa582c777f7082
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Dec 9 15:58:28 2014 +1100
-
- Sweep old handles more aggressively:
-
- 1. don't have checkpoint or other periodic operations like statistics logging keep old handles alive;
- 2. don't wait for all sessions to empty the file from their cache before closing;
- 3. only update the time of death from the sweep thread.
-
-commit c96a4c954ccc73744f8a1fbcf2fea6debdfca018
-Merge: cc8eb0b 2e332b9
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Tue Dec 9 14:41:36 2014 +1100
-
- Merge pull request #1443 from wiredtiger/cursor-open-optimize
-
- Cursor open optimize
-
-commit 993c8ede8ff64eac9e87d1adcd39f8575039222b
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Tue Dec 9 13:21:13 2014 +1100
-
- Improve and simplify the LSM switch logic: it is fine to keep writing into a chunk while it is being switched, until either it hits the hard chunk size limit or a switch transaction ID is chosen that is larger than the writer's. Fixes an assertion failure introduced by #1432 that could write to an old chunk after the switch completed.
-
- refs #1432, #1418
-
-commit 5551461cd5f26249e4330c9f87b4945d7ec2bb34
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Mon Dec 8 18:01:31 2014 +1100
-
- If there are only two LSM worker threads, don't let the cache get full.
-
- Allow the first thread to do flushes as well as switches and drops if there are only two threads.
-
- Refs #1441, but this is a hang seen from that test/format configuration, not a segfault.
-
-commit 8f06d6b79dabed54ad1e05515bbdb31e23c4e991
-Author: Don Anderson <dda@ddanderson.com>
-Date: Fri Dec 5 14:22:41 2014 -0500
-
- Modify printlog output so that arbitrary strings are shown as using the JSON Unicode standard. refs #1438.
-
-commit 68090796dea07e7b2d3d5bee8d69aafcd8febe16
-Author: Alex Gorrod <alexg@wiredtiger.com>
-Date: Fri Dec 5 04:36:25 2014 +0000
-
- Avoid string comparisons when looking up tables.
-
- Save a hash value in the table so we can do integer comparisons rather than string comparisons.
-
-commit 4de5e3a71bfad1c2a9ef1eccccdd45ec02fecba9
-Author: Michael Cahill <michael.cahill@wiredtiger.com>
-Date: Fri Dec 5 13:01:51 2014 +1100
-
- Force eviction if we see many consecutive deletes when scanning through a page. This fixes quadratic behavior in find-first+delete workloads.
-
-commit bbced52c939e16ad5662b3a177cef3e52abddd6e
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Thu Dec 4 07:57:13 2014 -0500
-
- In the final close, continue and remove the handle no matter what errors we see, otherwise the handle-close code in __wt_conn_dhandle_discard() can become infinite loops, where we repeatedly attempt to close the same file handles. Reference #1434.
-
-commit 46fa7f0b6397fe765c5e8c2853f9cd0b067bc808
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Dec 3 13:46:01 2014 -0500
-
- Changes in #1204, #1288 mistakenly changed the values for some error defines, which breaks backward compatibility. (The WT_DEADLOCK error name sorted differently from WT_ROLLBACK, and we were assigning error values based on the sort order in a script.) Revert the change, and make sure it doesn't happen again.
-
-commit 249e88485c75951a0584a7c7a8dd4b8f8b6a3382
-Author: Keith Bostic <keith@wiredtiger.com>
-Date: Wed Dec 3 06:25:16 2014 -0500
-
- Support "none" in all configuration strings as an alternative to an empty string. Reference #1417.
-
-commit 63d7c7869f8c2ab5a3e6ee935d1e37f21d40755f
-Author: Don Anderson <dda@ddanderson.com>
-Date: Tue Dec 2 14:00:11 2014 -0500
-
- Added log compression. When configured, we attempt to compress each log record. Added printlog output to show before/after compression sizes. Refs #1359.
-
diff --git a/SConstruct b/SConstruct
index a7306262f82..a5dd8761d6c 100644
--- a/SConstruct
+++ b/SConstruct
@@ -214,6 +214,7 @@ if (VERSION_MAJOR == None or
wiredtiger_includes = """
#include <sys/types.h>
#include <stdarg.h>
+ #include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
"""
@@ -239,12 +240,26 @@ wtheader = env.Substfile(
#
# WiredTiger library
#
-filelistfile = r'build_win\filelist.win'
-filelist = open(filelistfile)
-wtsources = [line.strip()
- for line in filelist
- if not line.startswith("#") and len(line) > 1]
-filelist.close()
+# Map WiredTiger build conditions: any conditions that appear in WiredTiger's
+# dist/filelist must appear here, and if the value is true, those files will be
+# included.
+#
+condition_map = {
+ 'POSIX_HOST' : env['PLATFORM'] == 'posix',
+ 'POWERPC_HOST' : False,
+ 'WINDOWS_HOST' : env['PLATFORM'] == 'win32',
+}
+
+def filtered_filelist(f):
+ for line in f:
+ file_cond = line.split()
+ if line.startswith("#") or len(file_cond) == 0:
+ continue
+ if len(file_cond) == 1 or condition_map[file_cond[1]]:
+ yield file_cond[0]
+
+filelistfile = r'dist/filelist'
+wtsources = list(filtered_filelist(open(filelistfile)))
if useZlib:
wtsources.append("ext/compressors/zlib/zlib_compress.c")
@@ -345,12 +360,12 @@ examples = [
"ex_all",
"ex_async",
"ex_call_center",
- "ex_config",
"ex_config_parse",
"ex_cursor",
"ex_data_source",
"ex_encrypt",
"ex_extending",
+ "ex_file_system",
"ex_hello",
"ex_log",
"ex_pack",
@@ -392,10 +407,16 @@ env.Append(BUILDERS={'SmokeTest' : Builder(action = builder_smoke_test)})
#Build the tests and setup the "scons test" target
+testutil = env.Library('testutil',
+ [
+ 'test/utility/misc.c',
+ 'test/utility/parse_opts.c'
+ ])
+
#Don't test bloom on Windows, its broken
t = env.Program("t_bloom",
"test/bloom/test_bloom.c",
- LIBS=[wtlib] + wtlibs)
+ LIBS=[wtlib, testutil] + wtlibs)
#env.Alias("check", env.SmokeTest(t))
Default(t)
@@ -418,7 +439,7 @@ t = env.Program("t_fops",
["test/fops/file.c",
"test/fops/fops.c",
"test/fops/t.c"],
- LIBS=[wtlib, shim] + wtlibs)
+ LIBS=[wtlib, shim, testutil] + wtlibs)
env.Append(CPPPATH=["test/utility"])
env.Alias("check", env.SmokeTest(t))
Default(t)
@@ -468,7 +489,7 @@ Default(t)
#Build the Examples
for ex in examples:
- if(ex in ['ex_all', 'ex_async', 'ex_thread', 'ex_encrypt']):
+ if(ex in ['ex_all', 'ex_async', 'ex_encrypt', 'ex_file_system' , 'ex_thread']):
exp = env.Program(ex, "examples/c/" + ex + ".c", LIBS=[wtlib, shim] + wtlibs)
Default(exp)
env.Alias("check", env.SmokeTest(exp))
diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c
index e83d6fcceed..0dc38287155 100644
--- a/bench/wtperf/config.c
+++ b/bench/wtperf/config.c
@@ -47,6 +47,53 @@ static void config_opt_usage(void);
(strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0')
/*
+ * config_unescape --
+ * Modify a string in place, replacing any backslash escape sequences.
+ * The modified string is always shorter.
+ */
+static int
+config_unescape(char *orig)
+{
+ char ch, *dst, *s;
+
+ for (dst = s = orig; *s != '\0';) {
+ if ((ch = *s++) == '\\') {
+ ch = *s++;
+ switch (ch) {
+ case 'b':
+ *dst++ = '\b';
+ break;
+ case 'f':
+ *dst++ = '\f';
+ break;
+ case 'n':
+ *dst++ = '\n';
+ break;
+ case 'r':
+ *dst++ = '\r';
+ break;
+ case 't':
+ *dst++ = '\t';
+ break;
+ case '\\':
+ case '/':
+ case '\"': /* Backslash needed for spell check. */
+ *dst++ = ch;
+ break;
+ default:
+ /* Note: Unicode (\u) not implemented. */
+ fprintf(stderr,
+ "invalid escape in string: %s\n", orig);
+ return (EINVAL);
+ }
+ } else
+ *dst++ = ch;
+ }
+ *dst = '\0';
+ return (0);
+}
+
+/*
* config_assign --
* Assign the src config to the dest, any storage allocated in dest is
* freed as a result.
@@ -123,7 +170,7 @@ config_free(CONFIG *cfg)
if (config_opts[i].type == STRING_TYPE ||
config_opts[i].type == CONFIG_STRING_TYPE) {
pstr = (char **)
- ((unsigned char *)cfg + config_opts[i].offset);
+ ((u_char *)cfg + config_opts[i].offset);
free(*pstr);
*pstr = NULL;
}
@@ -363,7 +410,8 @@ static int
config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
{
CONFIG_OPT *popt;
- char *newstr, **strp;
+ char *begin, *newstr, **strp;
+ int ret;
size_t i, newlen, nopt;
void *valueloc;
@@ -383,7 +431,7 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
fprintf(stderr, "\t%s\n", config_opts[i].name);
return (EINVAL);
}
- valueloc = ((unsigned char *)cfg + popt->offset);
+ valueloc = ((u_char *)cfg + popt->offset);
switch (popt->type) {
case BOOL_TYPE:
if (v->type != WT_CONFIG_ITEM_BOOL) {
@@ -438,15 +486,20 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
}
strp = (char **)valueloc;
newlen = v->len + 1;
- if (*strp == NULL) {
- newstr = dstrdup(v->str);
- } else {
- newlen += (strlen(*strp) + 1);
+ if (*strp == NULL)
+ begin = newstr = dstrdup(v->str);
+ else {
+ newlen += strlen(*strp) + 1;
newstr = dcalloc(newlen, sizeof(char));
snprintf(newstr, newlen,
"%s,%*s", *strp, (int)v->len, v->str);
/* Free the old value now we've copied it. */
free(*strp);
+ begin = &newstr[(newlen - 1) - v->len];
+ }
+ if ((ret = config_unescape(begin)) != 0) {
+ free(newstr);
+ return (ret);
}
*strp = newstr;
break;
@@ -487,84 +540,100 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
int
config_opt_file(CONFIG *cfg, const char *filename)
{
- struct stat sb;
- ssize_t read_size;
- size_t buf_size, linelen, optionpos;
- int contline, fd, linenum, ret;
- char option[1024];
- char *comment, *file_buf, *line, *ltrim, *rtrim;
+ FILE *fp;
+ size_t linelen, optionpos;
+ int linenum, ret;
+ bool contline;
+ char line[4 * 1024], option[4 * 1024];
+ char *comment, *ltrim, *rtrim;
- file_buf = NULL;
+ ret = 0;
- if ((fd = open(filename, O_RDONLY)) == -1) {
+ if ((fp = fopen(filename, "r")) == NULL) {
fprintf(stderr, "wtperf: %s: %s\n", filename, strerror(errno));
return (errno);
}
- if ((ret = fstat(fd, &sb)) != 0) {
- fprintf(stderr, "wtperf: stat of %s: %s\n",
- filename, strerror(errno));
- ret = errno;
- goto err;
- }
- buf_size = (size_t)sb.st_size;
- file_buf = dcalloc(buf_size + 2, 1);
- read_size = read(fd, file_buf, buf_size);
- if (read_size == -1
-#ifndef _WIN32
- /* Windows automatically translates \r\n -> \n so counts will be off */
- || (size_t)read_size != buf_size
-#endif
- ) {
- fprintf(stderr,
- "wtperf: read unexpected amount from config file\n");
- ret = EINVAL;
- goto err;
- }
- /* Make sure the buffer is terminated correctly. */
- file_buf[read_size] = '\0';
- ret = 0;
optionpos = 0;
linenum = 0;
- /*
- * We should switch this from using strtok to generating a single
- * WiredTiger configuration string compatible string, and using
- * the WiredTiger configuration parser to parse it at once.
- */
-#define WTPERF_CONFIG_DELIMS "\n\\"
- for (line = strtok(file_buf, WTPERF_CONFIG_DELIMS);
- line != NULL;
- line = strtok(NULL, WTPERF_CONFIG_DELIMS)) {
+ while (fgets(line, sizeof(line), fp) != NULL) {
linenum++;
- /* trim the line */
- for (ltrim = line; *ltrim && isspace(*ltrim); ltrim++)
+
+ /* Skip leading space. */
+ for (ltrim = line; *ltrim && isspace((u_char)*ltrim);
+ ltrim++)
+ ;
+
+ /*
+ * Find the end of the line; if there's no trailing newline, the
+ * the line is too long for the buffer or the file was corrupted
+ * (there's no terminating newline in the file).
+ */
+ for (rtrim = line; *rtrim && *rtrim != '\n'; rtrim++)
;
- rtrim = &ltrim[strlen(ltrim)];
- if (rtrim > ltrim && rtrim[-1] == '\n')
+ if (*rtrim != '\n') {
+ fprintf(stderr,
+ "wtperf: %s: %d: configuration line too long\n",
+ filename, linenum);
+ ret = EINVAL;
+ break;
+ }
+
+ /* Skip trailing space. */
+ while (rtrim > ltrim && isspace((u_char)rtrim[-1]))
rtrim--;
- contline = (rtrim > ltrim && rtrim[-1] == '\\');
+ /*
+ * If the last non-space character in the line is an escape, the
+ * line will be continued. Checked early because the line might
+ * otherwise be empty.
+ */
+ contline = rtrim > ltrim && rtrim[-1] == '\\';
if (contline)
rtrim--;
- comment = strchr(ltrim, '#');
- if (comment != NULL && comment < rtrim)
+ /*
+ * Discard anything after the first hash character. Check after
+ * the escape character, the escape can appear after a comment.
+ */
+ if ((comment = strchr(ltrim, '#')) != NULL)
rtrim = comment;
- while (rtrim > ltrim && isspace(rtrim[-1]))
+
+ /* Skip trailing space again. */
+ while (rtrim > ltrim && isspace((u_char)rtrim[-1]))
rtrim--;
- linelen = (size_t)(rtrim - ltrim);
- if (linelen == 0)
- continue;
+ /*
+ * Check for empty lines: note that the right-hand boundary can
+ * cross over the left-hand boundary, less-than or equal to is
+ * the correct test.
+ */
+ if (rtrim <= ltrim) {
+ /*
+ * If we're continuing from this line, or we haven't
+ * started building an option, ignore this line.
+ */
+ if (contline || optionpos == 0)
+ continue;
+
+ /*
+ * An empty line terminating an option we're building;
+ * clean things up so we can proceed.
+ */
+ linelen = 0;
+ } else
+ linelen = (size_t)(rtrim - ltrim);
+ ltrim[linelen] = '\0';
if (linelen + optionpos + 1 > sizeof(option)) {
- fprintf(stderr, "wtperf: %s: %d: line overflow\n",
+ fprintf(stderr,
+ "wtperf: %s: %d: option value overflow\n",
filename, linenum);
ret = EINVAL;
break;
}
- *rtrim = '\0';
- strncpy(&option[optionpos], ltrim, linelen);
+
+ memcpy(&option[optionpos], ltrim, linelen);
option[optionpos + linelen] = '\0';
if (contline)
optionpos += linelen;
@@ -577,16 +646,19 @@ config_opt_file(CONFIG *cfg, const char *filename)
optionpos = 0;
}
}
- if (ret == 0 && optionpos > 0) {
- fprintf(stderr, "wtperf: %s: %d: last line continues\n",
- filename, linenum);
- ret = EINVAL;
- goto err;
+ if (ret == 0) {
+ if (ferror(fp)) {
+ fprintf(stderr, "wtperf: %s: read error\n", filename);
+ ret = errno;
+ }
+ if (optionpos > 0) {
+ fprintf(stderr, "wtperf: %s: %d: last line continues\n",
+ filename, linenum);
+ ret = EINVAL;
+ }
}
-err: if (fd != -1)
- (void)close(fd);
- free(file_buf);
+ (void)fclose(fp);
return (ret);
}
@@ -754,7 +826,7 @@ config_consolidate(CONFIG *cfg)
* as being the same key.
*/
if (strncmp(conf_line->string, test_line->string,
- (size_t)(string_key - conf_line->string + 1))
+ (size_t)((string_key - conf_line->string) + 1))
== 0) {
TAILQ_REMOVE(&cfg->config_head, conf_line, c);
free(conf_line->string);
diff --git a/bench/wtperf/runners/evict-btree-1.wtperf b/bench/wtperf/runners/evict-btree-1.wtperf
new file mode 100644
index 00000000000..24da4dd7902
--- /dev/null
+++ b/bench/wtperf/runners/evict-btree-1.wtperf
@@ -0,0 +1,11 @@
+# wtperf options file: evict btree configuration
+conn_config="cache_size=50M"
+table_config="type=file"
+icount=10000000
+report_interval=5
+run_time=120
+populate_threads=1
+threads=((count=16,reads=1))
+# Add throughput/latency monitoring
+max_latency=2000
+sample_interval=5
diff --git a/bench/wtperf/runners/evict-btree-stress.wtperf b/bench/wtperf/runners/evict-btree-stress.wtperf
new file mode 100644
index 00000000000..740fb88c050
--- /dev/null
+++ b/bench/wtperf/runners/evict-btree-stress.wtperf
@@ -0,0 +1,12 @@
+# wtperf options file: evict btree configuration
+conn_config="cache_size=50M,eviction=(threads_max=4)"
+table_config="type=file"
+icount=10000000
+report_interval=5
+run_time=120
+populate_threads=1
+threads=((count=16,reads=1))
+# Add throughput/latency monitoring
+max_latency=2000
+sample_interval=5
+session_count_idle=100
diff --git a/bench/wtperf/runners/evict-lsm-1.wtperf b/bench/wtperf/runners/evict-lsm-1.wtperf
new file mode 100644
index 00000000000..ad885d98eb7
--- /dev/null
+++ b/bench/wtperf/runners/evict-lsm-1.wtperf
@@ -0,0 +1,12 @@
+# wtperf options file: evict lsm configuration
+conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6)"
+table_config="type=lsm,lsm=(chunk_size=2M),os_cache_dirty_max=16MB"
+compact=true
+icount=10000000
+report_interval=5
+run_time=120
+populate_threads=1
+threads=((count=16,reads=1))
+# Add throughput/latency monitoring
+max_latency=2000
+sample_interval=5
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index 9d57bdcf6b0..9d35f6fa640 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -1631,6 +1631,8 @@ execute_workload(CONFIG *cfg)
{
CONFIG_THREAD *threads;
WORKLOAD *workp;
+ WT_CONNECTION *conn;
+ WT_SESSION **sessions;
pthread_t idle_table_cycle_thread;
uint64_t last_ckpts, last_inserts, last_reads, last_truncates;
uint64_t last_updates;
@@ -1647,6 +1649,8 @@ execute_workload(CONFIG *cfg)
last_updates = 0;
ret = 0;
+ sessions = NULL;
+
/* Start cycling idle tables. */
if ((ret = start_idle_table_cycle(cfg, &idle_table_cycle_thread)) != 0)
return (ret);
@@ -1664,6 +1668,18 @@ execute_workload(CONFIG *cfg)
} else
pfunc = worker;
+ if (cfg->session_count_idle != 0) {
+ sessions = dcalloc((size_t)cfg->session_count_idle,
+ sizeof(WT_SESSION *));
+ conn = cfg->conn;
+ for (i = 0; i < cfg->session_count_idle; ++i)
+ if ((ret = conn->open_session(
+ conn, NULL, cfg->sess_config, &sessions[i])) != 0) {
+ lprintf(cfg, ret, 0,
+ "execute_workload: idle open_session");
+ goto err;
+ }
+ }
/* Start each workload. */
for (threads = cfg->workers, i = 0,
workp = cfg->workload; i < cfg->workload_cnt; ++i, ++workp) {
@@ -1758,6 +1774,7 @@ err: cfg->stop = 1;
if (ret == 0 && cfg->drop_tables && (ret = drop_all_tables(cfg)) != 0)
lprintf(cfg, ret, 0, "Drop tables failed.");
+ free(sessions);
/* Report if any worker threads didn't finish. */
if (cfg->error != 0) {
lprintf(cfg, WT_ERROR, 0,
@@ -2170,15 +2187,15 @@ int
main(int argc, char *argv[])
{
CONFIG *cfg, _cfg;
- size_t req_len;
+ size_t req_len, sreq_len;
int ch, monitor_set, ret;
const char *opts = "C:H:h:m:O:o:T:";
const char *config_opts;
- char *cc_buf, *tc_buf, *user_cconfig, *user_tconfig;
+ char *cc_buf, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig;
monitor_set = ret = 0;
config_opts = NULL;
- cc_buf = tc_buf = user_cconfig = user_tconfig = NULL;
+ cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL;
/* Setup the default configuration values. */
cfg = &_cfg;
@@ -2317,7 +2334,8 @@ main(int argc, char *argv[])
/* Concatenate non-default configuration strings. */
if (cfg->verbose > 1 || user_cconfig != NULL ||
- cfg->compress_ext != NULL || cfg->async_config != NULL) {
+ cfg->session_count_idle > 0 || cfg->compress_ext != NULL ||
+ cfg->async_config != NULL) {
req_len = strlen(cfg->conn_config) + strlen(debug_cconfig) + 3;
if (user_cconfig != NULL)
req_len += strlen(user_cconfig);
@@ -2325,16 +2343,26 @@ main(int argc, char *argv[])
req_len += strlen(cfg->async_config);
if (cfg->compress_ext != NULL)
req_len += strlen(cfg->compress_ext);
+ if (cfg->session_count_idle > 0) {
+ sreq_len = strlen(",session_max=") + 6;
+ req_len += sreq_len;
+ sess_cfg = dcalloc(sreq_len, 1);
+ snprintf(sess_cfg, sreq_len,
+ ",session_max=%" PRIu32,
+ cfg->session_count_idle + cfg->workers_cnt +
+ cfg->populate_threads + 10);
+ }
cc_buf = dcalloc(req_len, 1);
/*
* This is getting hard to parse.
*/
- snprintf(cc_buf, req_len, "%s%s%s%s%s%s%s",
+ snprintf(cc_buf, req_len, "%s%s%s%s%s%s%s%s",
cfg->conn_config,
cfg->async_config ? cfg->async_config : "",
cfg->compress_ext ? cfg->compress_ext : "",
cfg->verbose > 1 ? ",": "",
cfg->verbose > 1 ? debug_cconfig : "",
+ sess_cfg ? sess_cfg : "",
user_cconfig ? ",": "",
user_cconfig ? user_cconfig : "");
if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0)
@@ -2410,6 +2438,7 @@ einval: ret = EINVAL;
err: config_free(cfg);
free(cc_buf);
+ free(sess_cfg);
free(tc_buf);
free(user_cconfig);
free(user_tconfig);
@@ -2579,7 +2608,7 @@ wtperf_rand(CONFIG_THREAD *thread)
S2 = wtperf_value_range(cfg) *
(cfg->pareto / 100.0) * (PARETO_SHAPE - 1);
U = 1 - (double)rval / (double)UINT32_MAX;
- rval = (pow(U, S1) - 1) * S2;
+ rval = (uint64_t)((pow(U, S1) - 1) * S2);
/*
* This Pareto calculation chooses out of range values about
* 2% of the time, from my testing. That will lead to the
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index a2b497b3142..d874fa4eefe 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -30,33 +30,8 @@
#define HAVE_WTPERF_H
#include <wt_internal.h>
-
-#ifndef _WIN32
-#include <sys/time.h>
-#endif
-#include <sys/types.h>
-#include <sys/stat.h>
-
#include <assert.h>
-#include <ctype.h>
-#ifndef _WIN32
-#include <dirent.h>
-#endif
-#include <errno.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <limits.h>
#include <math.h>
-#ifndef _WIN32
-#include <pthread.h>
-#endif
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
#ifdef _WIN32
#include "windows_shim.h"
@@ -345,6 +320,9 @@ extract_key(char *key_buf, uint64_t *keynop)
* Print message and exit on failure.
*/
static inline void
+die(int, const char *)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static inline void
die(int e, const char *str)
{
fprintf(stderr, "Call to %s failed: %s", str, wiredtiger_strerror(e));
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index b5e274a17c2..2afd20f777f 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -163,6 +163,8 @@ DEF_OPT_AS_UINT32(sample_rate, 50,
"how often the latency of operations is measured. One for every operation,"
"two for every second operation, three for every third operation etc.")
DEF_OPT_AS_CONFIG_STRING(sess_config, "", "session configuration string")
+DEF_OPT_AS_UINT32(session_count_idle, 0,
+ "number of idle sessions to create. Default 0.")
DEF_OPT_AS_CONFIG_STRING(table_config,
"key_format=S,value_format=S,type=lsm,exclusive=true,"
"allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb,"
diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs
index 4e1f829c0c5..64749378ed1 100644
--- a/build_posix/Make.subdirs
+++ b/build_posix/Make.subdirs
@@ -25,8 +25,10 @@ examples/java JAVA
lang/python PYTHON
# Make the tests
+test/utility
test/bloom
test/checkpoint
+test/csuite
test/cursor_order
test/fops
test/format
diff --git a/build_posix/aclocal/options.m4 b/build_posix/aclocal/options.m4
index 0fb49dbf1df..5f9b8748df2 100644
--- a/build_posix/aclocal/options.m4
+++ b/build_posix/aclocal/options.m4
@@ -215,6 +215,16 @@ pthread_adaptive|pthreads_adaptive)
esac
AC_MSG_RESULT($with_spinlock)
+AC_MSG_CHECKING(if --enable-strict option specified)
+AC_ARG_ENABLE(strict,
+ [AS_HELP_STRING([--enable-strict],
+ [Enable strict compiler checking.])], r=$enableval, r=no)
+case "$r" in
+no) wt_cv_enable_strict=no;;
+*) wt_cv_enable_strict=yes;;
+esac
+AC_MSG_RESULT($wt_cv_enable_strict)
+
AH_TEMPLATE(HAVE_VERBOSE, [Enable verbose message configuration.])
AC_MSG_CHECKING(if --enable-verbose option specified)
AC_ARG_ENABLE(verbose,
diff --git a/build_posix/aclocal/strict.m4 b/build_posix/aclocal/strict.m4
new file mode 100644
index 00000000000..b59f09fe584
--- /dev/null
+++ b/build_posix/aclocal/strict.m4
@@ -0,0 +1,74 @@
+# AM_STRICT
+# Per compiler-version flags used when compiling in strict mode.
+
+# GCC warnings.
+AC_DEFUN([AM_GCC_WARNINGS], [
+ w="$w -Wall -Wextra -Werror"
+
+ w="$w -Waggregate-return"
+ w="$w -Wbad-function-cast"
+ w="$w -Wcast-align"
+ w="$w -Wdeclaration-after-statement"
+ w="$w -Wdouble-promotion"
+ w="$w -Wfloat-equal"
+ w="$w -Wformat-nonliteral"
+ w="$w -Wformat-security"
+ w="$w -Wformat=2"
+ w="$w -Winit-self"
+ w="$w -Wjump-misses-init"
+ w="$w -Wmissing-declarations"
+ w="$w -Wmissing-field-initializers"
+ w="$w -Wmissing-parameter-type"
+ w="$w -Wmissing-prototypes"
+ w="$w -Wnested-externs"
+ w="$w -Wold-style-definition"
+ w="$w -Wpacked"
+ w="$w -Wpointer-arith"
+ w="$w -Wpointer-sign"
+ w="$w -Wredundant-decls"
+ w="$w -Wshadow"
+ w="$w -Wsign-conversion"
+ w="$w -Wstrict-prototypes"
+ w="$w -Wswitch-enum"
+ w="$w -Wundef"
+ w="$w -Wunreachable-code"
+ w="$w -Wunsafe-loop-optimizations"
+ w="$w -Wunused"
+ w="$w -Wwrite-strings"
+
+ # Non-fatal informational warnings.
+ w="$w -Wno-error=inline"
+ w="$w -Wno-error=unsafe-loop-optimizations"
+
+ wt_cv_strict_warnings="$w"
+])
+
+# Clang warnings.
+AC_DEFUN([AM_CLANG_WARNINGS], [
+ w="-Weverything -Werror"
+
+ w="$w -Wno-cast-align"
+ w="$w -Wno-documentation-unknown-command"
+ w="$w -Wno-format-nonliteral"
+ w="$w -Wno-packed"
+ w="$w -Wno-padded"
+ w="$w -Wno-reserved-id-macro"
+ w="$w -Wno-zero-length-array"
+
+ # We should turn on cast-qual, but not as a fatal error: see WT-2690.
+ # For now, turn it off.
+ # w="$w -Wno-error=cast-qual"
+ w="$w -Wno-cast-qual"
+
+ # Older OS X releases need some special love; these flags should be
+ # removed in the not-too-distant future.
+ # Apple clang version 4.1
+ # (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn)
+ w="$w -Wno-pedantic"
+ w="$w -Wno-unused-command-line-argument"
+
+ # Ignore unrecognized options.
+ w="$w -Wno-unknown-warning-option"
+
+ wt_cv_strict_warnings="$w"
+])
diff --git a/build_posix/aclocal/types.m4 b/build_posix/aclocal/types.m4
index 439034c89d2..089058f5611 100644
--- a/build_posix/aclocal/types.m4
+++ b/build_posix/aclocal/types.m4
@@ -7,6 +7,7 @@ AC_DEFUN([AM_TYPES], [
#include <sys/types.h>
#include <inttypes.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>"
AC_SUBST(wiredtiger_includes_decl)
diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in
index 9251873be73..bbc6cf89d91 100644
--- a/build_posix/configure.ac.in
+++ b/build_posix/configure.ac.in
@@ -9,19 +9,20 @@ AC_CONFIG_AUX_DIR([build_posix/gnu-support])
AC_CONFIG_MACRO_DIR([build_posix/aclocal])
AC_CONFIG_SRCDIR([RELEASE_INFO])
-# If CFLAGS/CXXFLAGS were not set on entry, default to "-O3 -g"
-: ${CFLAGS=-O3 -g}
-: ${CXXFLAGS=-O3 -g}
-
# We rely on some automake features for testing (like AM_TEST_ENVIRONMENT)
# that didn't work before 1.11.6.
AM_INIT_AUTOMAKE([1.11.6 foreign parallel-tests subdir-objects])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([no])])
-# Configure options. The AM_OPTIONS and the libtool configuration
-# need to stay here. Moving them below the compiler and other
-# configurations causes -Wcast_align warnings and other warnings
-# on OS X.
+# If CFLAGS/CXXFLAGS were not set on entry, default to "-O3 -g"
+: ${CFLAGS=-O3 -g}
+: ${CXXFLAGS=-O3 -g}
+
+AC_PROG_CC(cc gcc)
+AC_PROG_CXX(c++ g++)
+AM_PROG_AS(as gas)
+
+# Configure options.
AM_OPTIONS
define([AC_LIBTOOL_LANG_CXX_CONFIG], [:])dnl
@@ -30,9 +31,33 @@ LT_PREREQ(2.2.6)
LT_INIT([pic-only])
AC_SUBST([LIBTOOL_DEPS])
-AC_PROG_CC(cc gcc)
-AC_PROG_CXX(c++ g++)
-AM_PROG_AS(as gas)
+# If enable-strict is configured, turn on as much error checking as we can for
+# this compiler. Intended for developers, and only works for gcc/clang, but it
+# fills a need.
+if test "$wt_cv_enable_strict" = "yes"; then
+ wt_cv_cc_version="`$CC --version | sed -eq`"
+ case "$wt_cv_cc_version" in
+ *clang*)
+ AM_CLANG_WARNINGS;;
+ *gcc*|*GCC*)
+ AM_GCC_WARNINGS;;
+ *)
+ AC_MSG_ERROR(
+ [--enable-strict does not support "$wt_cv_cc_version".]);;
+ esac
+
+ AM_CFLAGS="$AM_CFLAGS $wt_cv_strict_warnings"
+fi
+
+AM_CONDITIONAL([POSIX_HOST], [true])
+AM_CONDITIONAL([WINDOWS_HOST], [false])
+
+AS_CASE([$host_cpu],
+ [ppc64*], [wt_cv_powerpc="yes"],
+ [elf64lppc], [wt_cv_powerpc="yes"],
+ [powerpc*], [wt_cv_powerpc="yes"],
+ [wt_cv_powerpc="no"])
+AM_CONDITIONAL([POWERPC_HOST], [test "$wt_cv_powerpc" = "yes"])
# This is a workaround as part of WT-2459. Currently, clang (v3.7) does not
# support compiling the ASM code we have to perform the CRC checks on PowerPC.
@@ -41,12 +66,8 @@ AM_PROG_AS(as gas)
# determine what tag to use for that one .S file. If we catch that we are using
# two different compilers for CC and CCAS and we are on a PowerPC system we
# overload the libtool flags to provide CC by default.
-if test "$CC" != "$CCAS"; then
- AS_CASE([$host_cpu],
- [ppc64*], [AM_LIBTOOLFLAGS+="--tag=CC"],
- [elf64lppc], [AM_LIBTOOLFLAGS+="--tag=CC"],
- [powerpc*], [AM_LIBTOOLFLAGS+="--tag=CC"],
- [])
+if test "$wt_cv_powerpc" = "yes" -a "$CC" != "$CCAS"; then
+ [AM_LIBTOOLFLAGS+="--tag=CC"]
fi
AC_SUBST(AM_LIBTOOLFLAGS)
diff --git a/build_posix/makemake b/build_posix/makemake
index 9ed9d252911..506420b4aaf 100755
--- a/build_posix/makemake
+++ b/build_posix/makemake
@@ -7,7 +7,7 @@
(sed -n '1,/BEGIN SUBDIRS/p' Make.base
echo "SUBDIRS ="
-sed -e 's/#.*$//' -e '/^$/d' Make.subdirs | (while read dir cond ; do
+sed -e 's/#.*$//' -e '/^$/d' Make.subdirs | while read dir cond ; do
test -d ../$dir || continue
if test -n "$cond" ; then
cat <<END_CONDITIONAL
@@ -18,17 +18,27 @@ END_CONDITIONAL
else
echo "SUBDIRS += $dir"
fi
-done)
+done
# Write the rest of Make.base, up to SOURCES
sed -n '/END SUBDIRS/,/BEGIN SOURCES/p' Make.base
+# Write the list of sources.
echo
echo "libwiredtiger_la_LDFLAGS = -release @VERSION@"
-echo "libwiredtiger_la_SOURCES=\\"
-sed -e '/^[a-z]/!d' \
- -e 's/.*/ & \\/' \
- -e '$s/ \\$//' < ../dist/filelist
+echo "libwiredtiger_la_SOURCES ="
+sed -e '/^[a-z]/!d' < ../dist/filelist | while read file cond; do
+ if test -n "$cond"; then
+ cat <<END_CONDITIONAL
+# DO NOT indent the "libwiredtiger_la_SOURCES" lines, it breaks the build.
+if ${cond}
+libwiredtiger_la_SOURCES += $file
+endif
+END_CONDITIONAL
+ else
+ echo "libwiredtiger_la_SOURCES += $file"
+ fi
+done
# Write the rest of Make.base
sed -n '/END SOURCES/,$p' Make.base
diff --git a/build_win/filelist.win b/build_win/filelist.win
deleted file mode 100644
index c370303d5f8..00000000000
--- a/build_win/filelist.win
+++ /dev/null
@@ -1,172 +0,0 @@
-
-# List of source files for WiredTiger library.
-# filelist --
-src/async/async_api.c
-src/async/async_op.c
-src/async/async_worker.c
-src/block/block_addr.c
-src/block/block_ckpt.c
-src/block/block_compact.c
-src/block/block_ext.c
-src/block/block_map.c
-src/block/block_mgr.c
-src/block/block_open.c
-src/block/block_read.c
-src/block/block_session.c
-src/block/block_slvg.c
-src/block/block_vrfy.c
-src/block/block_write.c
-src/bloom/bloom.c
-src/btree/bt_compact.c
-src/btree/bt_curnext.c
-src/btree/bt_curprev.c
-src/btree/bt_cursor.c
-src/btree/bt_debug.c
-src/btree/bt_delete.c
-src/btree/bt_discard.c
-src/btree/bt_handle.c
-src/btree/bt_huffman.c
-src/btree/bt_io.c
-src/btree/bt_misc.c
-src/btree/bt_ovfl.c
-src/btree/bt_page.c
-src/btree/bt_read.c
-src/btree/bt_rebalance.c
-src/btree/bt_ret.c
-src/btree/bt_slvg.c
-src/btree/bt_split.c
-src/btree/bt_stat.c
-src/btree/bt_sync.c
-src/btree/bt_upgrade.c
-src/btree/bt_vrfy.c
-src/btree/bt_vrfy_dsk.c
-src/btree/bt_walk.c
-src/btree/col_modify.c
-src/btree/col_srch.c
-src/btree/row_key.c
-src/btree/row_modify.c
-src/btree/row_srch.c
-src/cache/cache_las.c
-src/config/config.c
-src/config/config_api.c
-src/config/config_check.c
-src/config/config_collapse.c
-src/config/config_def.c
-src/config/config_ext.c
-src/config/config_upgrade.c
-src/conn/api_strerror.c
-src/conn/api_version.c
-src/conn/conn_api.c
-src/conn/conn_cache.c
-src/conn/conn_cache_pool.c
-src/conn/conn_ckpt.c
-src/conn/conn_dhandle.c
-src/conn/conn_handle.c
-src/conn/conn_log.c
-src/conn/conn_open.c
-src/conn/conn_stat.c
-src/conn/conn_sweep.c
-src/cursor/cur_backup.c
-src/cursor/cur_bulk.c
-src/cursor/cur_config.c
-src/cursor/cur_ds.c
-src/cursor/cur_dump.c
-src/cursor/cur_file.c
-src/cursor/cur_index.c
-src/cursor/cur_join.c
-src/cursor/cur_json.c
-src/cursor/cur_log.c
-src/cursor/cur_metadata.c
-src/cursor/cur_stat.c
-src/cursor/cur_std.c
-src/cursor/cur_table.c
-src/evict/evict_file.c
-src/evict/evict_lru.c
-src/evict/evict_page.c
-src/log/log.c
-src/log/log_auto.c
-src/log/log_slot.c
-src/lsm/lsm_cursor.c
-src/lsm/lsm_cursor_bulk.c
-src/lsm/lsm_manager.c
-src/lsm/lsm_merge.c
-src/lsm/lsm_meta.c
-src/lsm/lsm_stat.c
-src/lsm/lsm_tree.c
-src/lsm/lsm_work_unit.c
-src/lsm/lsm_worker.c
-src/meta/meta_apply.c
-src/meta/meta_ckpt.c
-src/meta/meta_ext.c
-src/meta/meta_table.c
-src/meta/meta_track.c
-src/meta/meta_turtle.c
-src/os_common/filename.c
-src/os_common/os_abort.c
-src/os_common/os_alloc.c
-src/os_common/os_fhandle.c
-src/os_common/os_fs_inmemory.c
-src/os_common/os_fs_stdio.c
-src/os_common/os_getline.c
-src/os_common/os_getopt.c
-src/os_common/os_init.c
-src/os_common/os_strtouq.c
-src/os_win/os_dir.c
-src/os_win/os_dlopen.c
-src/os_win/os_errno.c
-src/os_win/os_fs.c
-src/os_win/os_getenv.c
-src/os_win/os_map.c
-src/os_win/os_mtx_cond.c
-src/os_win/os_once.c
-src/os_win/os_pagesize.c
-src/os_win/os_path.c
-src/os_win/os_priv.c
-src/os_win/os_setvbuf.c
-src/os_win/os_sleep.c
-src/os_win/os_snprintf.c
-src/os_win/os_thread.c
-src/os_win/os_time.c
-src/os_win/os_vsnprintf.c
-src/os_win/os_yield.c
-src/packing/pack_api.c
-src/packing/pack_impl.c
-src/packing/pack_stream.c
-src/reconcile/rec_track.c
-src/reconcile/rec_write.c
-src/schema/schema_create.c
-src/schema/schema_drop.c
-src/schema/schema_list.c
-src/schema/schema_open.c
-src/schema/schema_plan.c
-src/schema/schema_project.c
-src/schema/schema_rename.c
-src/schema/schema_stat.c
-src/schema/schema_truncate.c
-src/schema/schema_util.c
-src/schema/schema_worker.c
-src/session/session_api.c
-src/session/session_compact.c
-src/session/session_dhandle.c
-src/session/session_salvage.c
-src/support/cksum.c
-src/support/cond_auto.c
-src/support/crypto.c
-src/support/err.c
-src/support/global.c
-src/support/hash_city.c
-src/support/hash_fnv.c
-src/support/hazard.c
-src/support/hex.c
-src/support/huffman.c
-src/support/mtx_rw.c
-src/support/pow.c
-src/support/rand.c
-src/support/scratch.c
-src/support/stat.c
-src/txn/txn.c
-src/txn/txn_ckpt.c
-src/txn/txn_ext.c
-src/txn/txn_log.c
-src/txn/txn_nsnap.c
-src/txn/txn_recover.c
diff --git a/dist/api_data.py b/dist/api_data.py
index 8cfa83dadc4..90b1c8378a2 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -439,7 +439,7 @@ connection_runtime_config = [
Config('file_max', '100MB', r'''
the maximum size of log files''',
min='100KB', max='2GB'),
- Config('path', '', r'''
+ Config('path', '"."', r'''
the path to a directory into which the log files are written.
If the value is not an absolute path name, the files are created
relative to the database home'''),
@@ -722,8 +722,8 @@ wiredtiger_open = wiredtiger_open_common + [
\c create option''',
type='boolean'),
Config('in_memory', 'false', r'''
- keep data in-memory only, minimize disk I/O''',
- type='boolean', undoc=True),
+ keep data in-memory only. See @ref in_memory for more information''',
+ type='boolean'),
Config('use_environment', 'true', r'''
use the \c WIREDTIGER_CONFIG and \c WIREDTIGER_HOME environment
variables if the process is not running with special privileges.
@@ -822,6 +822,13 @@ methods = {
Config('bloom_hash_count', '8', r'''
the number of hash values per item for the bloom filter''',
min='2', max='100'),
+ Config('operation', '"and"', r'''
+ the operation applied between this and other joined cursors.
+ When "operation=and" is specified, all the conditions implied by
+ joins must be satisfied for an entry to be returned by the join cursor;
+ when "operation=or" is specified, only one must be satisfied.
+ All cursors joined to a join cursor must have matching operations''',
+ choices=['and', 'or']),
Config('strategy', '', r'''
when set to bloom, a bloom filter is created and populated for
this index. This has an up front cost but may reduce the number
@@ -952,6 +959,11 @@ methods = {
Display the contents of on-disk blocks as they are verified,
using the application's message handler, intended for debugging''',
type='boolean'),
+ Config('dump_layout', 'false', r'''
+ Display the layout of the files as they are verified, using the
+ application's message handler, intended for debugging; requires
+ optional support from the block manager''',
+ type='boolean'),
Config('dump_offsets', '', r'''
Display the contents of specific on-disk blocks,
using the application's message handler, intended for debugging''',
@@ -960,10 +972,6 @@ methods = {
Display the contents of in-memory pages as they are verified,
using the application's message handler, intended for debugging''',
type='boolean'),
- Config('dump_shape', 'false', r'''
- Display the shape of the tree after verification,
- using the application's message handler, intended for debugging''',
- type='boolean'),
Config('strict', 'false', r'''
Treat any verification problem as an error; by default, verify will
warn, but not fail, in the case of errors that won't affect future
@@ -1077,11 +1085,17 @@ methods = {
type='boolean'),
]),
'WT_CONNECTION.reconfigure' : Method(connection_runtime_config),
+'WT_CONNECTION.set_file_system' : Method([]),
'WT_CONNECTION.load_extension' : Method([
Config('config', '', r'''
configuration string passed to the entry point of the
extension as its WT_CONFIG_ARG argument'''),
+ Config('early_load', 'false', r'''
+ whether this extension should be loaded at the beginning of
+ ::wiredtiger_open. Only applicable to extensions loaded via the
+ wiredtiger_open configurations string''',
+ type='boolean'),
Config('entry', 'wiredtiger_extension_init', r'''
the entry point of the extension, called to initialize the
extension when it is loaded. The signature of the function
diff --git a/dist/api_err.py b/dist/api_err.py
index a17c68ee196..82f961a4ac9 100644
--- a/dist/api_err.py
+++ b/dist/api_err.py
@@ -53,11 +53,11 @@ errors = [
to return an error if recovery is required to use the database.'''),
Error('WT_CACHE_FULL', -31807,
'operation would overflow cache', '''
- This error is generated when wiredtiger_open is configured
- to run in-memory, and an insert or update operation requires more
- than the configured cache size to complete.''', undoc=True),
- Error('WT_PERM_DENIED', -31808,
- 'permission denied (internal)', undoc=True),
+ This error is only generated when wiredtiger_open is configured
+ to run in-memory, and an insert or update operation requires
+ more than the configured cache size to complete. The operation
+ may be retried; if a transaction is in progress, it should be
+ rolled back and the operation retried in a new transaction.'''),
]
# Update the #defines in the wiredtiger.in file.
diff --git a/dist/dist.py b/dist/dist.py
index 1b3ad828dfb..555cc03989b 100644
--- a/dist/dist.py
+++ b/dist/dist.py
@@ -2,21 +2,16 @@ import filecmp, glob, os, re, shutil
# source_files --
# Return a list of the WiredTiger source file names.
-def source_files(skip_includes=False):
- if not skip_includes:
- for line in glob.iglob('../src/include/*.[hi]'):
- yield line
+def source_files():
file_re = re.compile(r'^\w')
+ for line in glob.iglob('../src/include/*.[hi]'):
+ yield line
for line in open('filelist', 'r'):
if file_re.match(line):
- yield os.path.join('..', line.rstrip())
- # Return only the Windows-specific files in the Windows filelist
- for line in open('../build_win/filelist.win', 'r'):
- if 'os_win' in line and file_re.match(line):
- yield os.path.join('..', line.rstrip())
+ yield os.path.join('..', line.split()[0])
for line in open('extlist', 'r'):
if file_re.match(line):
- yield os.path.join('..', line.rstrip())
+ yield os.path.join('..', line.split()[0])
# source_dirs --
# Return a list of the WiredTiger source directory names.
diff --git a/dist/filelist b/dist/filelist
index 1d7ffa76922..59624508cf0 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -47,6 +47,9 @@ src/btree/row_key.c
src/btree/row_modify.c
src/btree/row_srch.c
src/cache/cache_las.c
+src/checksum/checksum.c
+src/checksum/power8/crc32.S POWERPC_HOST
+src/checksum/power8/crc32_wrapper.c POWERPC_HOST
src/config/config.c
src/config/config_api.c
src/config/config_check.c
@@ -104,30 +107,47 @@ src/meta/meta_turtle.c
src/os_common/filename.c
src/os_common/os_abort.c
src/os_common/os_alloc.c
+src/os_common/os_errno.c
src/os_common/os_fhandle.c
src/os_common/os_fs_inmemory.c
-src/os_common/os_fs_stdio.c
-src/os_common/os_getline.c
+src/os_common/os_fstream.c
+src/os_common/os_fstream_stdio.c
src/os_common/os_getopt.c
-src/os_common/os_init.c
src/os_common/os_strtouq.c
-src/os_posix/os_dir.c
-src/os_posix/os_dlopen.c
-src/os_posix/os_errno.c
-src/os_posix/os_fallocate.c
-src/os_posix/os_fs.c
-src/os_posix/os_getenv.c
-src/os_posix/os_map.c
-src/os_posix/os_mtx_cond.c
-src/os_posix/os_once.c
-src/os_posix/os_pagesize.c
-src/os_posix/os_path.c
-src/os_posix/os_priv.c
-src/os_posix/os_setvbuf.c
-src/os_posix/os_sleep.c
-src/os_posix/os_thread.c
-src/os_posix/os_time.c
-src/os_posix/os_yield.c
+src/os_posix/os_dir.c POSIX_HOST
+src/os_posix/os_dlopen.c POSIX_HOST
+src/os_posix/os_fallocate.c POSIX_HOST
+src/os_posix/os_fs.c POSIX_HOST
+src/os_posix/os_getenv.c POSIX_HOST
+src/os_posix/os_map.c POSIX_HOST
+src/os_posix/os_mtx_cond.c POSIX_HOST
+src/os_posix/os_once.c POSIX_HOST
+src/os_posix/os_pagesize.c POSIX_HOST
+src/os_posix/os_path.c POSIX_HOST
+src/os_posix/os_priv.c POSIX_HOST
+src/os_posix/os_setvbuf.c POSIX_HOST
+src/os_posix/os_sleep.c POSIX_HOST
+src/os_posix/os_thread.c POSIX_HOST
+src/os_posix/os_time.c POSIX_HOST
+src/os_posix/os_yield.c POSIX_HOST
+src/os_win/os_dir.c WINDOWS_HOST
+src/os_win/os_dlopen.c WINDOWS_HOST
+src/os_win/os_fs.c WINDOWS_HOST
+src/os_win/os_getenv.c WINDOWS_HOST
+src/os_win/os_map.c WINDOWS_HOST
+src/os_win/os_mtx_cond.c WINDOWS_HOST
+src/os_win/os_once.c WINDOWS_HOST
+src/os_win/os_pagesize.c WINDOWS_HOST
+src/os_win/os_path.c WINDOWS_HOST
+src/os_win/os_priv.c WINDOWS_HOST
+src/os_win/os_setvbuf.c WINDOWS_HOST
+src/os_win/os_sleep.c WINDOWS_HOST
+src/os_win/os_snprintf.c WINDOWS_HOST
+src/os_win/os_thread.c WINDOWS_HOST
+src/os_win/os_time.c WINDOWS_HOST
+src/os_win/os_vsnprintf.c WINDOWS_HOST
+src/os_win/os_winerr.c WINDOWS_HOST
+src/os_win/os_yield.c WINDOWS_HOST
src/packing/pack_api.c
src/packing/pack_impl.c
src/packing/pack_stream.c
@@ -148,7 +168,6 @@ src/session/session_api.c
src/session/session_compact.c
src/session/session_dhandle.c
src/session/session_salvage.c
-src/support/cksum.c
src/support/cond_auto.c
src/support/crypto.c
src/support/err.c
@@ -160,8 +179,6 @@ src/support/hex.c
src/support/huffman.c
src/support/mtx_rw.c
src/support/pow.c
-src/support/power8/crc32.S
-src/support/power8/crc32_wrapper.c
src/support/rand.c
src/support/scratch.c
src/support/stat.c
diff --git a/dist/flags.py b/dist/flags.py
index 806fac2137d..b5f36fb707a 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -8,13 +8,6 @@ flags = {
###################################################
# Internal routine flag declarations
###################################################
- 'file_types' : [
- 'FILE_TYPE_CHECKPOINT',
- 'FILE_TYPE_DATA',
- 'FILE_TYPE_DIRECTORY',
- 'FILE_TYPE_LOG',
- 'FILE_TYPE_REGULAR',
- ],
'log_scan' : [
'LOGSCAN_FIRST',
'LOGSCAN_FROM_CKP',
@@ -105,6 +98,7 @@ flags = {
'CONN_LSM_MERGE',
'CONN_PANIC',
'CONN_READONLY',
+ 'CONN_RECOVERING',
'CONN_SERVER_ASYNC',
'CONN_SERVER_CHECKPOINT',
'CONN_SERVER_LSM',
@@ -115,12 +109,12 @@ flags = {
],
'session' : [
'SESSION_CAN_WAIT',
- 'SESSION_CLEAR_EVICT_WALK',
'SESSION_INTERNAL',
'SESSION_LOCK_NO_WAIT',
'SESSION_LOCKED_CHECKPOINT',
'SESSION_LOCKED_HANDLE_LIST',
'SESSION_LOCKED_METADATA',
+ 'SESSION_LOCKED_PASS',
'SESSION_LOCKED_SCHEMA',
'SESSION_LOCKED_SLOT',
'SESSION_LOCKED_TABLE',
diff --git a/dist/log.py b/dist/log.py
index 9201b20054b..8743dd3a71c 100644
--- a/dist/log.py
+++ b/dist/log.py
@@ -178,7 +178,7 @@ __wt_logop_read(WT_SESSION_IMPL *session,
}
static size_t
-__logrec_json_unpack_str(char *dest, size_t destlen, const char *src,
+__logrec_json_unpack_str(char *dest, size_t destlen, const u_char *src,
size_t srclen)
{
\tsize_t total;
diff --git a/dist/s_c_test_create b/dist/s_c_test_create
new file mode 100755
index 00000000000..fd0fa809d99
--- /dev/null
+++ b/dist/s_c_test_create
@@ -0,0 +1,105 @@
+#! /bin/sh
+
+#
+# Usage: s_c_test_create test_name
+#
+# Create a new test case in the C test suite.
+# This will create the infrastructure for a new C test case. The given
+# test name is a new directory in the C suite directory and the Makefile
+# components and C program template are created.
+#
+# Any 'make check' variations of this test should be added to the smoke.sh
+# script in the main C suite directory.
+#
+tmp=__a
+trap 'rm -f $tmp; exit 0' 0 1 2 3 13 15
+
+if [ "x$1" = "x" ]; then
+ echo "Usage: $0 test_name"
+ exit 1
+fi
+CSUITE_DIRECTORY=../test/csuite
+MAKEFILE_NAME=$CSUITE_DIRECTORY/Makefile.am
+
+TEST_NAME=$1
+
+exists=`grep $TEST_NAME $MAKEFILE_NAME`
+
+if [ "x$exists" != "x" ]; then
+ echo "Test with requested name already exists. Try another name."
+ exit 1
+fi
+
+# Create a subdirectory and stub for the new test
+mkdir $CSUITE_DIRECTORY/$TEST_NAME
+
+(cat <<EOF
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+/*
+ * JIRA ticket reference:
+ * Test case description:
+ * Failure mode:
+ */
+
+void (*custom_die)(void) = NULL;
+
+int
+main(int argc, char *argv[])
+{
+ TEST_OPTS *opts, _opts;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ /*
+ * Insert test implementation here.
+ */
+
+ testutil_cleanup(opts);
+
+ return (0);
+}
+EOF
+) > $CSUITE_DIRECTORY/$TEST_NAME/main.c
+
+
+# Now update the C test suite makefile to include the new test case
+
+NEW_MAKE_SECT="test_${TEST_NAME}_SOURCES = ${TEST_NAME}\/main.c\\nnoinst_PROGRAMS = test_${TEST_NAME}\\n\\n"
+
+cat $CSUITE_DIRECTORY/Makefile.am | awk \
+ "/^# Script add new line here/ && !modif { printf(\"$NEW_MAKE_SECT\"); modif=1 } {print}" > $tmp
+
+mv $tmp $CSUITE_DIRECTORY/Makefile.am
+
+exit 0
diff --git a/dist/s_copyright.list b/dist/s_copyright.list
index c6a5910087b..4999d2a37a2 100644
--- a/dist/s_copyright.list
+++ b/dist/s_copyright.list
@@ -29,6 +29,8 @@ skip src/config/config_def.c
skip src/conn/api_strerror.c
skip src/docs/tools/doxypy.py
skip src/include/extern.h
+skip src/include/extern_posix.h
+skip src/include/extern_win.h
skip src/include/flags.h
skip src/include/queue.h
skip src/log/log_auto.c
diff --git a/dist/s_define b/dist/s_define
index 77673bdcdf9..050101e8510 100755
--- a/dist/s_define
+++ b/dist/s_define
@@ -5,7 +5,7 @@ t=__wt.$$
trap 'rm -f $t; exit 0' 0 1 2 3 13 15
# List of source files to search.
-l=`sed -e 's,#.*,,' -e '/^$/d' -e 's,^,../,' filelist`
+l=`sed -e '/^[a-z]/!d' -e 's/[ ].*$//' -e 's,^,../,' filelist`
l="$l `echo ../src/include/*.i ../src/utilities/*.c ../test/*/*.c`"
# List of include files for source #defines.
diff --git a/dist/s_define.list b/dist/s_define.list
index c9777c86675..2cdda74e7d4 100644
--- a/dist/s_define.list
+++ b/dist/s_define.list
@@ -23,6 +23,7 @@ WT_CONN_CHECK_PANIC
WT_DEADLOCK
WT_DEBUG_BYTE
WT_ERR_ERROR_OK
+WT_EXT_FOREACH_OFF
WT_HANDLE_CLOSED
WT_HANDLE_NULLABLE
WT_LOG_SLOT_ACTIVE
@@ -42,13 +43,17 @@ WT_STATS_FIELD_TO_SLOT
WT_STATS_SLOT_ID
WT_STAT_DECR
WT_STAT_DECRV
+WT_STAT_DECRV_ATOMIC
WT_STAT_FAST_CONN_DECRV
WT_STAT_FAST_DATA_DECRV
WT_STAT_FAST_DECR
WT_STAT_FAST_DECRV
+WT_STAT_FAST_DECRV_ATOMIC
WT_STAT_FAST_INCR
WT_STAT_FAST_INCRV
+WT_STAT_FAST_INCRV_ATOMIC
WT_STAT_FAST_SET
+WT_STAT_INCRV_ATOMIC
WT_STAT_WRITE
WT_TIMEDIFF_US
WT_TRET_ERROR_OK
diff --git a/dist/s_docs b/dist/s_docs
index c66bcb0bd06..08602989fe8 100755
--- a/dist/s_docs
+++ b/dist/s_docs
@@ -114,7 +114,8 @@ valid_build()
}
classf=`ls ../docs/struct___* 2>/dev/null`
for c in $classf; do
- echo "$c: Need to add class to PREDEFINED in src/docs/Doxyfile"
+ echo "$c: Add class to PREDEFINED in src/docs/Doxyfile, then remove docs/*.{html,js} and rebuild"
+
done
}
diff --git a/dist/s_funcs b/dist/s_funcs
index 5fee03b5615..8695c8d4fa7 100755
--- a/dist/s_funcs
+++ b/dist/s_funcs
@@ -5,7 +5,7 @@ t=__wt.$$
trap 'rm -f $t; exit 0' 0 1 2 3 13 15
# List of files to search.
-l=`sed -e 's,#.*,,' -e '/^$/d' -e 's,^,../,' filelist`
+l=`sed -e '/^[a-z]/!d' -e 's/[ ].*$//' -e 's,^,../,' filelist`
l="$l `echo ../src/*/*.i ../src/utilities/*.c ../bench/wtperf/*.c`"
(
diff --git a/dist/s_label b/dist/s_label
index b7c5795234a..b56ecc6fc78 100755
--- a/dist/s_label
+++ b/dist/s_label
@@ -23,7 +23,7 @@ file_parse()
# where there's a jump to the error label after the error label.
for f in `find bench examples ext src test -name '*.[ci]'`; do
file_parse $f |
- egrep '(WT_ERR|WT_ILLEGAL_VALUE_ERR)\(.*(WT_ILLEGAL_VALUE|WT_RET)\(.*err:|[^a-z_]err:.*(WT_ERR|WT_ILLEGAL_VALUE_ERR)\(' |
+ egrep '(WT_ERR[_A-Z]*|WT_ILLEGAL_VALUE_ERR)\(.*(WT_ILLEGAL_VALUE|WT_RET[_A-Z]*)\(.*err:|[^a-z_]err:.*(WT_ERR|WT_ILLEGAL_VALUE_ERR)\(' |
sed 's/:.*//' > $t
test -s $t && {
@@ -32,6 +32,14 @@ for f in `find bench examples ext src test -name '*.[ci]'`; do
}
done
+# Returns before jumps to an error label within the same loop.
+# Jumps before returns have already been detected above.
+for f in `find bench examples ext src test -name '*.[ci]'`; do
+ file_parse $f | sed "s=^=$f:="
+done | python dist/s_label_loop.py |
+ egrep '\{@[^@]*(WT_ILLEGAL_VALUE|WT_RET[_A-Z]*)\([^@]*(WT_ERR[_A-Z]*|WT_ILLEGAL_VALUE_ERR)\(.*err:' |
+ sed -e 's/^\([^:]*\): *\([^:]*\):.*/\1:\2: mix of returns and jump to the error label within a loop/'
+
# Return of 0 in functions after a jump to the error label.
for f in `find bench examples ext src test -name '*.[ci]'`; do
file_parse $f |
diff --git a/dist/s_label_loop.py b/dist/s_label_loop.py
new file mode 100644
index 00000000000..5cc222a4250
--- /dev/null
+++ b/dist/s_label_loop.py
@@ -0,0 +1,28 @@
+# Mark outer loop boundaries with {@ and }@ . Nested loops are not marked.
+# Each input line is the content of a C function.
+import re, sys
+
+p = re.compile('((for |while |_FOREACH|FOREACH_BEGIN)\([^{)]*\)|do) {')
+for line in sys.stdin:
+ matched = 0
+ m = p.search(line)
+ while m != None:
+ matched = 1
+ pos = m.end()
+ out = line[:pos] + "@"
+ level = 1
+ length = len(line)
+ while level > 0 and pos < length:
+ c = line[pos:pos+1]
+ pos += 1
+ out += c
+ if c == "}":
+ level -= 1
+ elif c == "{":
+ level += 1
+ out += "@"
+ sys.stdout.write(out)
+ line = line[pos:]
+ m = p.search(line)
+ if matched != 0:
+ sys.stdout.write(line)
diff --git a/dist/s_longlines b/dist/s_longlines
index 000f33d51d5..91dada361f4 100755
--- a/dist/s_longlines
+++ b/dist/s_longlines
@@ -8,9 +8,11 @@ l=`(cd .. &&
find bench/wtperf examples ext src test -name '*.[chisy]' &&
find dist -name '*.py' &&
find src -name '*.in') |
- sed -e '/dist\/stat_data\.py/d' \
+ sed -e '/checksum\/power8/d' \
+ -e '/dist\/stat_data\.py/d' \
-e '/include\/extern\.h/d' \
- -e '/support\/power8/d' \
+ -e '/include\/extern_posix\.h/d' \
+ -e '/include\/extern_win\.h/d' \
-e '/support\/stat\.c/d'`
for f in $l ; do
diff --git a/dist/s_prototypes b/dist/s_prototypes
index 4ceb69f4c77..73f7be371ea 100755
--- a/dist/s_prototypes
+++ b/dist/s_prototypes
@@ -28,36 +28,52 @@ proto()
-e 's/\* /\*/g' \
-e 's/ */ /g' \
-e 's/^/extern /' \
- -e 's/WT_GCC_FUNC_/WT_GCC_FUNC_DECL_/' \
- -e 's/$/;/p' < $1
+ -e 's/WT_GCC_FUNC_/WT_GCC_FUNC_DECL_/g' \
+ -e '# If a line ends in #endif, appending a semicolon will result' \
+ -e '# in an illegal expression, force an appended newline using' \
+ -e '# the H command because substitute may not allow newline in' \
+ -e '# the RHS of the expression.' \
+ -e '/#endif$/{' \
+ -e x \
+ -e 's/.*//' \
+ -e H \
+ -e x \
+ -e '}' \
+ -e 's/$/;/' \
+ -e p < $1
}
-(
-cat <<EOF
+# proto --
+# generate the list of prototypes given a file list
+externs()
+{
+(cat <<EOF
/* DO NOT EDIT: automatically built by dist/s_prototypes. */
EOF
+ for i in $l; do
+ proto ../$i
+ done) > $t
+ cmp $t $f > /dev/null 2>&1 ||
+ (echo "Building $f" && rm -f $f && cp $t $f)
+}
-# First, get prototypes for everything but the OS directories.
-# Second, get prototypes for the OS directories.
-# The reason for this is because the OS directories repeat names (that is, there
-# are common names in both os_posix and os_win), and so we sort the prototypes
-# to avoid repeating them in the output (which some compilers won't tolerate).
-# We'd sort everything and discard duplicates, but we can't sort when function
-# signatures are on multiple lines, that is, #ifdef'd function signatures. Since
-# the OS directories are the only places with repeated names, and they have no
-# #ifdef'd signatures, we do it this way.
-l=`sed -e '/^[a-z]/!d' -e '/src\/os/d' filelist`
-for i in $l; do
- proto ../$i
-done
-l=`echo ../src\/os*/*.c`
+f=../src/include/extern_win.h
+l=`sed \
+ -e '/os_win/!d' \
+ -e 's/[ ].*$//' filelist`
+externs
-for i in $l; do
- proto $i
-done | tee xxx | env LC_ALL=C sort -u
-) > $t
+f=../src/include/extern_posix.h
+l=`sed \
+ -e '/os_posix/!d' \
+ -e 's/[ ].*$//' filelist`
+externs
f=../src/include/extern.h
-cmp $t $f > /dev/null 2>&1 ||
- (echo "Building $f" && rm -f $f && cp $t $f)
+l=`sed \
+ -e '/^[a-z]/!d' \
+ -e '/os_posix/d' \
+ -e '/os_win/d' \
+ -e 's/[ ].*$//' filelist`
+externs
diff --git a/dist/s_stat b/dist/s_stat
index 3938b8e65eb..0638a7f3337 100755
--- a/dist/s_stat
+++ b/dist/s_stat
@@ -8,8 +8,8 @@ trap 'rm -f $t; exit 0' 0 1 2 3 13 15
# definition.
l=`sed \
-e '/src\/support\/stat.c/d' \
- -e 's,#.*,,' \
- -e '/^$/d' \
+ -e '/^[a-z]/!d' \
+ -e 's/[ ].*$//' \
-e 's,^,../,' filelist`
l="$l `echo ../src/include/*.i ../src/include/os.h`"
diff --git a/dist/s_string b/dist/s_string
index 3a4f9e190d3..32aa7528979 100755
--- a/dist/s_string
+++ b/dist/s_string
@@ -31,7 +31,9 @@ replace() {
# Check the spelling of an individual file.
check() {
# Strip out git hashes, which are seven character hex strings.
- sed 's/ [0-9a-f]\{7\} / /g' ../$2 | aspell --lang=en $1 list |
+ # Strip out double quote char literals ('"'), they confuse aspell.
+ sed -e 's/ [0-9a-f]\{7\} / /g' -e "s/'\"'//g" ../$2 |
+ aspell --lang=en $1 list |
sort -u |
comm -23 /dev/stdin s_string.ok > $t
test -s $t && {
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 631f2a5c909..7966ff2cf2e 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -76,6 +76,7 @@ DECL
DECR
DESC
DHANDLE
+DIRECTIO
DNE
DOI
DONTNEED
@@ -86,6 +87,7 @@ Decrement
Decrypt
DeleteFileA
EAGAIN
+EB
EBUSY
EEXIST
EINTR
@@ -116,6 +118,7 @@ FNV
FORALL
FOREACH
FULLFSYNC
+FindClose
FindFirstFile
Fixup
Fk
@@ -130,6 +133,7 @@ GIDs
Gcc
Geoff
GetEnvironmentVariableA
+GetFileAttributesA
GetFileAttributesEx
GetFileSizeEx
GetLastError
@@ -145,6 +149,7 @@ IEC
IEEE
IKEY
IMPL
+IMPL's
INCR
INIT
INITIALIZER
@@ -211,6 +216,7 @@ Mewhort
Mitzenmacher
MongoDB
MoveFile
+MoveFileA
Multi
Multithreaded
Mutex
@@ -255,6 +261,7 @@ Qsort
RCS
RDNOLOCK
RDONLY
+READONLY
RECNO
REF's
REFs
@@ -468,6 +475,7 @@ ckptfrag
ckptlist
cksum
cloexec
+closedir
clsm
cmd
cmp
@@ -488,9 +496,11 @@ conn
connectionp
const
constantp
+cookiep
copydoc
copyin
copyout
+countp
cp
cpuid
crc
@@ -602,6 +612,7 @@ evictserver
exactp
exe
execop
+existp
extern
extlist
fadvise
@@ -618,6 +629,7 @@ ffs
fgetc
fgetln
fh
+fhandle
filefrag
filehandle
fileid
@@ -637,11 +649,14 @@ fmterr
fnv
foc
fopen
+formatmessage
fp
fprintf
free'd
+fs
fscanf
fstat
+fstream
fsync
fsyncLock
fsyncs
@@ -669,11 +684,13 @@ gostruct
goutf
gt
handleops
+handlep
hashval
havesize
hdr
highjack
hotbackup
+hselasky
html
huffman
hval
@@ -684,11 +701,13 @@ ibackup
icount
idx
ifdef's
+iiu
ikey
im
impl
incase
incr
+incrementals
incrementing
indices
indirects
@@ -697,6 +716,7 @@ infeasible
inflateInit
infmt
init
+initializers
initn
initsize
initval
@@ -715,8 +735,20 @@ intrin
inuse
io
ip
+isalnum
+isalpha
+iscntrl
+isdigit
+isgraph
islocked
+islower
ispo
+isprint
+ispunct
+isrc
+isspace
+isupper
+isxdigit
iter
iteratively
jnr
@@ -739,6 +771,7 @@ lbracket
ld
le
len
+lengthp
lenp
level's
leveldb
@@ -844,6 +877,7 @@ noraw
notfound
notsup
notused
+nowait
nset
nsnap
nul
@@ -866,6 +900,7 @@ os
osfhandle
ovfl
ownp
+pR
packv
pagesize
parens
@@ -911,6 +946,7 @@ pushms
putK
putV
pv
+pvA
pwrite
py
qdown
@@ -939,6 +975,7 @@ recsize
rectype
recurse
refp
+regionp
reinitialization
relocked
resize
@@ -1038,7 +1075,9 @@ toklen
tokname
tokstart
toktype
+tolower
totalsec
+toupper
transactional
transactionally
trecno
@@ -1064,6 +1103,7 @@ uncompresssed
undef
unencrypted
unesc
+unescape
unescaped
unicode
uninstantiated
diff --git a/dist/s_style b/dist/s_style
index a163eb83b25..a222c004cc3 100755
--- a/dist/s_style
+++ b/dist/s_style
@@ -20,7 +20,7 @@ if [ $# -ne 1 ]; then
-name '*.[chisy]' -o -name '*.in' -o -name '*.dox' |
sed -e '/Makefile.in/d' \
-e '/build_win\/wiredtiger_config.h/d' \
- -e '/support\/power8/d' |
+ -e '/checksum\/power8/d' |
xargs $xp -n 1 -I{} sh ./dist/s_style {}
else
# General style correction and cleanup for a single file
@@ -60,11 +60,13 @@ else
echo "$f: use TAILQ for all lists"
fi
- if ! expr "$f" : 'src/os_common/.*' > /dev/null &&
+ if ! expr "$f" : 'src/include/extern.h' > /dev/null &&
+ ! expr "$f" : 'src/include/extern_posix.h' > /dev/null &&
+ ! expr "$f" : 'src/include/extern_win.h' > /dev/null &&
+ ! expr "$f" : 'src/include/os.h' > /dev/null &&
+ ! expr "$f" : 'src/os_common/.*' > /dev/null &&
! expr "$f" : 'src/os_posix/.*' > /dev/null &&
! expr "$f" : 'src/os_win/.*' > /dev/null &&
- ! expr "$f" : 'src/include/extern.h' > /dev/null &&
- ! expr "$f" : 'src/include/os.h' > /dev/null &&
grep '__wt_errno' $f > $t; then
echo "$f: upper-level code should not call __wt_errno"
cat $t
@@ -73,7 +75,7 @@ else
if ! expr "$f" : 'examples/c/.*' > /dev/null &&
! expr "$f" : 'ext/datasources/helium/helium.c' > /dev/null &&
! expr "$f" : 'src/include/os.h' > /dev/null &&
- grep "%zu" $f | grep -v 'SIZET_FMT' > $t; then
+ egrep "%[0-9]*zu" $f | grep -v 'SIZET_FMT' > $t; then
echo "$f: %zu needs to be fixed for Windows"
cat $t
fi
@@ -138,6 +140,20 @@ else
}
fi
+ # Use of ctype functions that sign extend their arguments.
+ if ! expr "$f" : 'bench/.*' > /dev/null &&
+ ! expr "$f" : 'test/csuite/.*' > /dev/null &&
+ ! expr "$f" : 'examples/.*' > /dev/null &&
+ ! expr "$f" : 'ext/.*' > /dev/null &&
+ ! expr "$f" : 'src/include/ctype.i' > /dev/null; then
+ if egrep '(#include.*["</]ctype.h[">]|\b(is(alnum|alpha|cntrl|digit|graph|lower|print|punct|space|upper|xdigit)|to(lower|toupper))\()' $f > $t; then
+ test -s $t && {
+ echo "$f: direct use of ctype.h functions, instead of ctype.i equivalents"
+ cat $t
+ }
+ fi
+ fi
+
tr -cd '[:alnum:][:space:][:punct:]' < $f |
unexpand |
sed -e 's/){/) {/' \
diff --git a/dist/s_typedef b/dist/s_typedef
index 233f432f0e5..b044a0e6b4b 100755
--- a/dist/s_typedef
+++ b/dist/s_typedef
@@ -44,7 +44,7 @@ build() {
check() {
# Complain about unused #typedefs.
# List of files to search.
- l=`sed -e 's,#.*,,' -e '/^$/d' -e 's,^,../,' filelist`
+ l=`sed -e '/^[a-z]/!d' -e 's/[ ].*$//' -e 's,^,../,' filelist`
l="$l `echo ../src/utilities/*.c`"
(
diff --git a/dist/s_whitespace b/dist/s_whitespace
index 74820a4f0e9..8cf3f7dfe6f 100755
--- a/dist/s_whitespace
+++ b/dist/s_whitespace
@@ -38,7 +38,7 @@ for f in `find bench examples ext src test \
-name '*.in' -o \
-name 'Makefile.am' |
sed -e '/Makefile.in/d' \
- -e '/support\/power8/d'`; do
+ -e '/checksum\/power8/d'`; do
whitespace_and_empty_line $f
done
diff --git a/dist/s_win b/dist/s_win
index 562e89f94c6..49deb348bc3 100755
--- a/dist/s_win
+++ b/dist/s_win
@@ -39,42 +39,7 @@ win_export()
(echo "Building $f" && rm -f $f && cp $t $f)
}
-win_filelist()
-{
- f='../build_win/filelist.win'
-
- # Discard POSIX-only and PPC-only files, add in Windows-only files.
- (
- sed \
- -e '/\/os_posix\//d' \
- -e '/src\/support\/power8\/crc32.S/d' \
- -e '/src\/support\/power8\/crc32_wrapper.c/d'
-
- echo 'src/os_win/os_dir.c'
- echo 'src/os_win/os_dlopen.c'
- echo 'src/os_win/os_errno.c'
- echo 'src/os_win/os_fs.c'
- echo 'src/os_win/os_getenv.c'
- echo 'src/os_win/os_map.c'
- echo 'src/os_win/os_mtx_cond.c'
- echo 'src/os_win/os_once.c'
- echo 'src/os_win/os_pagesize.c'
- echo 'src/os_win/os_path.c'
- echo 'src/os_win/os_priv.c'
- echo 'src/os_win/os_setvbuf.c'
- echo 'src/os_win/os_sleep.c'
- echo 'src/os_win/os_snprintf.c'
- echo 'src/os_win/os_thread.c'
- echo 'src/os_win/os_time.c'
- echo 'src/os_win/os_vsnprintf.c'
- echo 'src/os_win/os_yield.c') < filelist | sort > $t
-
- cmp $t $f > /dev/null 2>&1 ||
- (echo "Building $f" && rm -f $f && cp $t $f)
-}
-
win_config
win_export
-win_filelist
exit 0
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 483e0bd3ef2..694ffc86ee4 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -81,6 +81,10 @@ class SessionStat(Stat):
prefix = 'session'
def __init__(self, name, desc, flags=''):
Stat.__init__(self, name, SessionStat.prefix, desc, flags)
+class ThreadState(Stat):
+ prefix = 'thread-state'
+ def __init__(self, name, desc, flags=''):
+ Stat.__init__(self, name, ThreadState.prefix, desc, flags)
class TxnStat(Stat):
prefix = 'transaction'
def __init__(self, name, desc, flags=''):
@@ -97,10 +101,20 @@ class YieldStat(Stat):
##########################################
groups = {}
groups['cursor'] = [CursorStat.prefix, SessionStat.prefix]
-groups['evict'] = [CacheStat.prefix, ConnStat.prefix, BlockStat.prefix]
+groups['evict'] = [
+ BlockStat.prefix,
+ CacheStat.prefix,
+ ConnStat.prefix,
+ ThreadState.prefix
+]
groups['lsm'] = [LSMStat.prefix, TxnStat.prefix]
groups['memory'] = [CacheStat.prefix, ConnStat.prefix, RecStat.prefix]
-groups['system'] = [ConnStat.prefix, DhandleStat.prefix, SessionStat.prefix]
+groups['system'] = [
+ ConnStat.prefix,
+ DhandleStat.prefix,
+ SessionStat.prefix,
+ ThreadState.prefix
+]
##########################################
# CONNECTION statistics
@@ -113,6 +127,7 @@ connection_stats = [
ConnStat('cond_auto_wait_reset', 'auto adjusting condition resets'),
ConnStat('cond_wait', 'pthread mutex condition wait calls'),
ConnStat('file_open', 'files currently open', 'no_clear,no_scale'),
+ ConnStat('fsync_io', 'total fsync I/Os'),
ConnStat('memory_allocation', 'memory allocations'),
ConnStat('memory_free', 'memory frees'),
ConnStat('memory_grow', 'memory re-allocations'),
@@ -171,6 +186,9 @@ connection_stats = [
CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum'),
CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items'),
CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'),
+ CacheStat('cache_eviction_get_ref', 'eviction calls to get a page'),
+ CacheStat('cache_eviction_get_ref_empty', 'eviction calls to get a page found queue empty'),
+ CacheStat('cache_eviction_get_ref_empty2', 'eviction calls to get a page found queue empty after locking'),
CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'),
CacheStat('cache_eviction_internal', 'internal pages evicted'),
CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'),
@@ -181,6 +199,8 @@ connection_stats = [
CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'),
CacheStat('cache_eviction_server_evicting', 'eviction server evicting pages'),
CacheStat('cache_eviction_server_not_evicting', 'eviction server populating queue, but not evicting pages'),
+ CacheStat('cache_eviction_server_slept', 'eviction server slept, because we did not make progress with eviction'),
+ CacheStat('cache_eviction_server_toobig', 'eviction server skipped very large page'),
CacheStat('cache_eviction_slow', 'eviction server unable to reach eviction goal'),
CacheStat('cache_eviction_split_internal', 'internal pages split during eviction'),
CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'),
@@ -188,6 +208,9 @@ connection_stats = [
CacheStat('cache_eviction_walks_active', 'files with active eviction walks', 'no_clear,no_scale,size'),
CacheStat('cache_eviction_walks_started', 'files with new eviction walks started'),
CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'),
+ CacheStat('cache_hazard_checks', 'hazard pointer check calls'),
+ CacheStat('cache_hazard_max', 'hazard pointer maximum array length', 'max_aggregate,no_scale'),
+ CacheStat('cache_hazard_walks', 'hazard pointer check entries walked'),
CacheStat('cache_inmem_split', 'in-memory page splits'),
CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
CacheStat('cache_lookaside_insert', 'lookaside table insert calls'),
@@ -195,6 +218,7 @@ connection_stats = [
CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'),
CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'),
CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'),
+ CacheStat('cache_pages_requested', 'pages requested from the cache'),
CacheStat('cache_read', 'pages read into cache'),
CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
CacheStat('cache_write', 'pages written from cache'),
@@ -247,6 +271,8 @@ connection_stats = [
LogStat('log_slot_unbuffered', 'consolidated slot unbuffered writes'),
LogStat('log_sync', 'log sync operations'),
LogStat('log_sync_dir', 'log sync_dir operations'),
+ LogStat('log_sync_dir_duration', 'log sync_dir time duration (usecs)'),
+ LogStat('log_sync_duration', 'log sync time duration (usecs)'),
LogStat('log_write_lsn', 'log server thread advances write LSN'),
LogStat('log_write_lsn_skip', 'log server thread write LSN walk skipped'),
LogStat('log_writes', 'log write operations'),
@@ -267,6 +293,10 @@ connection_stats = [
##########################################
TxnStat('txn_begin', 'transaction begins'),
TxnStat('txn_checkpoint', 'transaction checkpoints'),
+ TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'),
+ TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)'),
+ TxnStat('txn_checkpoint_fsync_pre', 'transaction fsync calls for checkpoint before allocating the transaction ID'),
+ TxnStat('txn_checkpoint_fsync_pre_duration', 'transaction fsync duration for checkpoint before allocating the transaction ID (usecs)'),
TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'),
@@ -319,6 +349,13 @@ connection_stats = [
CursorStat('cursor_update', 'cursor update calls'),
##########################################
+ # Thread State statistics
+ ##########################################
+ ThreadState('fsync_active', 'active filesystem fsync calls','no_clear,no_scale'),
+ ThreadState('read_active', 'active filesystem read calls','no_clear,no_scale'),
+ ThreadState('write_active', 'active filesystem write calls','no_clear,no_scale'),
+
+ ##########################################
# Yield statistics
##########################################
YieldStat('page_busy_blocked', 'page acquire busy blocked'),
@@ -414,7 +451,6 @@ dsrc_stats = [
##########################################
# Cache and eviction statistics
##########################################
- CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'),
CacheStat('cache_bytes_read', 'bytes read into cache', 'size'),
CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
@@ -429,6 +465,7 @@ dsrc_stats = [
CacheStat('cache_inmem_split', 'in-memory page splits'),
CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
CacheStat('cache_overflow_value', 'overflow values cached in memory', 'no_scale'),
+ CacheStat('cache_pages_requested', 'pages requested from the cache'),
CacheStat('cache_read', 'pages read into cache'),
CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
CacheStat('cache_read_overflow', 'overflow pages read into cache'),
@@ -477,9 +514,11 @@ dsrc_stats = sorted(dsrc_stats, key=attrgetter('desc'))
# Cursor Join statistics
##########################################
join_stats = [
- JoinStat('accesses', 'accesses'),
- JoinStat('actual_count', 'actual count of items'),
JoinStat('bloom_false_positive', 'bloom filter false positives'),
+ JoinStat('bloom_insert', 'items inserted into a bloom filter'),
+ JoinStat('iterated', 'items iterated'),
+ JoinStat('main_access', 'accesses to the main table'),
+ JoinStat('membership_check', 'checks that conditions of membership are satisfied'),
]
join_stats = sorted(join_stats, key=attrgetter('desc'))
diff --git a/examples/c/Makefile.am b/examples/c/Makefile.am
index 72fd98aff7b..d5305eec5c8 100644
--- a/examples/c/Makefile.am
+++ b/examples/c/Makefile.am
@@ -7,7 +7,6 @@ noinst_PROGRAMS = \
ex_async \
ex_backup \
ex_call_center \
- ex_config \
ex_config_parse \
ex_cursor \
ex_data_source \
@@ -15,6 +14,7 @@ noinst_PROGRAMS = \
ex_event_handler \
ex_extending \
ex_extractor \
+ ex_file_system \
ex_hello \
ex_log \
ex_pack \
@@ -26,6 +26,7 @@ noinst_PROGRAMS = \
ex_thread
ex_encrypt_LDFLAGS = -rdynamic
+ex_file_system_LDFLAGS = -rdynamic
# The examples can be run with no arguments as simple smoke tests
TESTS = $(noinst_PROGRAMS)
diff --git a/examples/c/ex_access.c b/examples/c/ex_access.c
index cc42982617b..d7f3cc557ad 100644
--- a/examples/c/ex_access.c
+++ b/examples/c/ex_access.c
@@ -60,8 +60,8 @@ main(void)
if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0 ||
(ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
- return (ret);
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
}
/*! [access example connection] */
@@ -95,5 +95,5 @@ main(void)
ret = conn->close(conn, NULL);
/*! [access example close] */
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index 1c036b75461..dd807922c10 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -1037,6 +1037,13 @@ backup(WT_SESSION *session)
ret = cursor->close(cursor);
/*! [backup]*/
+ /*! [incremental backup]*/
+ /* Open the backup data source for incremental backup. */
+ ret = session->open_cursor(
+ session, "backup:", NULL, "target=(\"log:\")", &cursor);
+ /*! [incremental backup]*/
+ ret = cursor->close(cursor);
+
/*! [backup of a checkpoint]*/
ret = session->checkpoint(session, "drop=(from=June01),name=June01");
/*! [backup of a checkpoint]*/
@@ -1207,5 +1214,5 @@ main(void)
/*! [Get the WiredTiger library version #2] */
}
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_async.c b/examples/c/ex_async.c
index ecdbd2f4fea..f7531a5c3d8 100644
--- a/examples/c/ex_async.c
+++ b/examples/c/ex_async.c
@@ -31,7 +31,9 @@
#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
+
#ifndef _WIN32
#include <unistd.h>
#else
@@ -48,7 +50,6 @@
#define ATOMIC_ADD(v, val) __sync_add_and_fetch(&(v), val)
#endif
-static const char * const home = NULL;
static int global_error = 0;
/*! [async example callback implementation] */
@@ -120,8 +121,19 @@ main(void)
WT_CONNECTION *conn;
WT_SESSION *session;
int i, ret;
+ const char *home;
char k[MAX_KEYS][16], v[MAX_KEYS][16];
+ /*
+ * Create a clean test directory for this run of the test program if the
+ * environment variable isn't already set (as is done by make check).
+ */
+ if (getenv("WIREDTIGER_HOME") == NULL) {
+ home = "WT_HOME";
+ ret = system("rm -rf WT_HOME && mkdir WT_HOME");
+ } else
+ home = NULL;
+
/*! [async example connection] */
ret = wiredtiger_open(home, NULL,
"create,cache_size=100MB,"
@@ -148,7 +160,7 @@ main(void)
if (ret == EBUSY)
sleep(1);
else
- return (ret);
+ return (EXIT_FAILURE);
}
/*! [async handle allocation] */
@@ -198,7 +210,7 @@ main(void)
if (ret == EBUSY)
sleep(1);
else
- return (ret);
+ return (EXIT_FAILURE);
}
/*! [async search] */
@@ -220,5 +232,5 @@ main(void)
printf("Searched for %" PRIu32 " keys\n", ex_asynckeys.num_keys);
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_backup.c b/examples/c/ex_backup.c
index 12eeaa4b7c3..0697cbb3458 100644
--- a/examples/c/ex_backup.c
+++ b/examples/c/ex_backup.c
@@ -273,12 +273,12 @@ main(void)
snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", home, home);
if ((ret = system(cmd_buf)) != 0) {
fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret);
- return (ret);
+ return (EXIT_FAILURE);
}
if ((ret = wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
home, wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
ret = setup_directories();
@@ -320,7 +320,9 @@ main(void)
* comparison between the incremental and original.
*/
ret = wt_conn->close(wt_conn, NULL);
+
printf("Final comparison: dumping and comparing data\n");
ret = compare_backups(0);
- return (ret);
+
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_call_center.c b/examples/c/ex_call_center.c
index d401507d165..cd53a1cdaf9 100644
--- a/examples/c/ex_call_center.c
+++ b/examples/c/ex_call_center.c
@@ -107,8 +107,8 @@ main(void)
if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
- return (1);
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
}
/* Note: further error checking omitted for clarity. */
@@ -245,5 +245,5 @@ main(void)
ret = conn->close(conn, NULL);
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_config.c b/examples/c/ex_config.c
deleted file mode 100644
index 2ac8198176c..00000000000
--- a/examples/c/ex_config.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*-
- * Public Domain 2014-2016 MongoDB, Inc.
- * Public Domain 2008-2014 WiredTiger, Inc.
- *
- * This is free and unencumbered software released into the public domain.
- *
- * Anyone is free to copy, modify, publish, use, compile, sell, or
- * distribute this software, either in source code form or as a compiled
- * binary, for any purpose, commercial or non-commercial, and by any
- * means.
- *
- * In jurisdictions that recognize copyright laws, the author or authors
- * of this software dedicate any and all copyright interest in the
- * software to the public domain. We make this dedication for the benefit
- * of the public at large and to the detriment of our heirs and
- * successors. We intend this dedication to be an overt act of
- * relinquishment in perpetuity of all present and future rights to this
- * software under copyright law.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * ex_config.c
- * This is an example demonstrating how to configure various database and
- * table properties.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <wiredtiger.h>
-
-static const char *home;
-
-int
-main(void)
-{
- int ret;
- WT_CONNECTION *conn;
- WT_SESSION *session;
- WT_CURSOR *cursor;
- const char *key, *value;
-
- /*
- * Create a clean test directory for this run of the test program if the
- * environment variable isn't already set (as is done by make check).
- */
- if (getenv("WIREDTIGER_HOME") == NULL) {
- home = "WT_HOME";
- ret = system("rm -rf WT_HOME && mkdir WT_HOME");
- } else
- home = NULL;
-
- /*! [configure cache size] */
- if ((ret = wiredtiger_open(home, NULL,
- "create,cache_size=500M", &conn)) != 0)
- fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
- /*! [configure cache size] */
-
- /*! [create a table] */
- ret = conn->open_session(conn, NULL, NULL, &session);
-
- ret = session->create(session,
- "table:access", "key_format=S,value_format=S");
- /*! [create a table] */
-
- /*! [transaction] */
- ret = session->begin_transaction(session, "priority=100,name=mytxn");
-
- ret = session->open_cursor(session, "config:", NULL, NULL, &cursor);
-
- while ((ret = cursor->next(cursor)) == 0) {
- ret = cursor->get_key(cursor, &key);
- ret = cursor->get_value(cursor, &value);
- printf("configuration value: %s = %s\n", key, value);
- }
-
- ret = session->commit_transaction(session, NULL);
- /*! [transaction] */
-
- ret = conn->close(conn, NULL);
-
- return (ret);
-}
diff --git a/examples/c/ex_config_parse.c b/examples/c/ex_config_parse.c
index be3c78bedd4..40508b38204 100644
--- a/examples/c/ex_config_parse.c
+++ b/examples/c/ex_config_parse.c
@@ -32,6 +32,7 @@
#include <inttypes.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <wiredtiger.h>
@@ -51,12 +52,12 @@ main(void)
NULL, config_string, strlen(config_string), &parser)) != 0) {
fprintf(stderr, "Error creating configuration parser: %s\n",
wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
if ((ret = parser->close(parser)) != 0) {
fprintf(stderr, "Error closing configuration parser: %s\n",
wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
/*! [Create a configuration parser] */
@@ -64,7 +65,7 @@ main(void)
NULL, config_string, strlen(config_string), &parser)) != 0) {
fprintf(stderr, "Error creating configuration parser: %s\n",
wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
{
@@ -76,7 +77,7 @@ main(void)
if ((ret = parser->get(parser, "page_size", &v)) != 0) {
fprintf(stderr,
"page_size configuration: %s", wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
my_page_size = v.val;
/*! [get] */
@@ -91,7 +92,7 @@ main(void)
NULL, config_string, strlen(config_string), &parser)) != 0) {
fprintf(stderr, "Error creating configuration parser: %s\n",
wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
/*! [next] */
/*
@@ -112,7 +113,7 @@ main(void)
NULL, config_string, strlen(config_string), &parser)) != 0) {
fprintf(stderr, "Error creating configuration parser: %s\n",
wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
/*! [nested get] */
@@ -125,7 +126,7 @@ main(void)
if ((ret = parser->get(parser, "log.file_max", &v)) != 0) {
fprintf(stderr,
"log.file_max configuration: %s", wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
printf("log file max: %" PRId64 "\n", v.val);
/*! [nested get] */
@@ -135,7 +136,7 @@ main(void)
NULL, config_string, strlen(config_string), &parser)) != 0) {
fprintf(stderr, "Error creating configuration parser: %s\n",
wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
/*! [nested traverse] */
{
@@ -150,11 +151,10 @@ main(void)
"Error creating nested configuration "
"parser: %s\n",
wiredtiger_strerror(ret));
- ret = parser->close(parser);
- return (ret);
+ break;
}
- while ((ret = sub_parser->next(
- sub_parser, &k, &v)) == 0)
+ while ((ret =
+ sub_parser->next(sub_parser, &k, &v)) == 0)
printf("\t%.*s\n", (int)k.len, k.str);
ret = sub_parser->close(sub_parser);
}
@@ -163,5 +163,5 @@ main(void)
ret = parser->close(parser);
}
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_cursor.c b/examples/c/ex_cursor.c
index 67c945ebc0b..b8ed6ab169d 100644
--- a/examples/c/ex_cursor.c
+++ b/examples/c/ex_cursor.c
@@ -181,12 +181,12 @@ main(void)
if ((ret = wiredtiger_open(
home, NULL, "create,statistics=(fast)", &conn)) != 0)
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
/* Open a session for the current thread's work. */
if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
fprintf(stderr, "Error opening a session on %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
ret = session->create(session, "table:world",
"key_format=r,value_format=5sii,"
@@ -220,9 +220,11 @@ main(void)
ret = cursor->close(cursor);
/* Note: closing the connection implicitly closes open session(s). */
- if ((ret = conn->close(conn, NULL)) != 0)
+ if ((ret = conn->close(conn, NULL)) != 0) {
fprintf(stderr, "Error closing %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
- return (ret);
+ return (EXIT_SUCCESS);
}
diff --git a/examples/c/ex_data_source.c b/examples/c/ex_data_source.c
index dd2b835e6ae..6ed80dfcf19 100644
--- a/examples/c/ex_data_source.c
+++ b/examples/c/ex_data_source.c
@@ -58,6 +58,17 @@ my_create(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
(void)config;
{
+#if !defined(ERROR_BAD_COMMAND)
+#define ERROR_BAD_COMMAND 37
+#endif
+ /*! [WT_EXTENSION_API map_windows_error] */
+ int posix_error =
+ wt_api->map_windows_error(wt_api, session, ERROR_BAD_COMMAND);
+ /*! [WT_EXTENSION_API map_windows_error] */
+ (void)posix_error;
+ }
+
+ {
const char *msg = "string";
/*! [WT_EXTENSION_API err_printf] */
(void)wt_api->err_printf(
@@ -667,7 +678,7 @@ main(void)
(void)wt_api->msg_printf(wt_api, NULL, "configuration complete");
/*! [WT_EXTENSION_API default_session] */
- (void)conn->close(conn, NULL);
+ ret = conn->close(conn, NULL);
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_encrypt.c b/examples/c/ex_encrypt.c
index c53a61c92ea..3b3323bc091 100644
--- a/examples/c/ex_encrypt.c
+++ b/examples/c/ex_encrypt.c
@@ -51,7 +51,7 @@ __declspec(dllexport)
#endif
int add_my_encryptors(WT_CONNECTION *connection);
-static const char *home = NULL;
+static const char *home;
#define SYS_KEYID "system"
#define SYS_PW "system_password"
@@ -122,8 +122,8 @@ do_rotate(char *buf, size_t len, int rotn)
* Now rotate
*/
for (i = 0; i < len; i++)
- if (isalpha(buf[i])) {
- if (islower(buf[i]))
+ if (isalpha((unsigned char)buf[i])) {
+ if (islower((unsigned char)buf[i]))
buf[i] = ((buf[i] - 'a') + rotn) % 26 + 'a';
else
buf[i] = ((buf[i] - 'A') + rotn) % 26 + 'A';
@@ -587,6 +587,8 @@ main(void)
printf("Verified key %s; value %s\n", key1, val1);
}
+
ret = conn->close(conn, NULL);
- return (ret);
+
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_event_handler.c b/examples/c/ex_event_handler.c
index d1e08edb04d..7122e71882e 100644
--- a/examples/c/ex_event_handler.c
+++ b/examples/c/ex_event_handler.c
@@ -68,7 +68,7 @@ handle_wiredtiger_error(WT_EVENT_HANDLER *handler,
/* Report the error on the console. */
fprintf(stderr,
"app_id %s, thread context %p, error %d, message %s\n",
- custom_handler->app_id, session, error, message);
+ custom_handler->app_id, (void *)session, error, message);
return (0);
}
@@ -83,7 +83,8 @@ handle_wiredtiger_message(
{
/* Cast the handler back to our custom handler. */
printf("app id %s, thread context %p, message %s\n",
- ((CUSTOM_EVENT_HANDLER *)handler)->app_id, session, message);
+ ((CUSTOM_EVENT_HANDLER *)handler)->app_id,
+ (void *)session, message);
return (0);
}
@@ -111,10 +112,10 @@ config_event_handler(void)
/*! [Configure event_handler] */
/* Make an invalid API call, to ensure the event handler works. */
- (void)conn->open_session(conn, NULL, "isolation=invalid", &session);
+ printf("ex_event_handler: expect an error message to follow\n");
+ ret = conn->open_session(conn, NULL, "isolation=invalid", &session);
- if (ret == 0)
- ret = conn->close(conn, NULL);
+ ret = conn->close(conn, NULL);
return (ret);
}
@@ -122,6 +123,8 @@ config_event_handler(void)
int
main(void)
{
+ int ret;
+
/*
* Create a clean test directory for this run of the test program if the
* environment variable isn't already set (as is done by make check).
@@ -132,5 +135,7 @@ main(void)
} else
home = NULL;
- return (config_event_handler());
+ ret = config_event_handler();
+
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_extending.c b/examples/c/ex_extending.c
index 4d265ae1d2b..f276cdd3e1e 100644
--- a/examples/c/ex_extending.c
+++ b/examples/c/ex_extending.c
@@ -108,7 +108,7 @@ main(void)
/* Open a connection to the database, creating it if necessary. */
if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0)
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
/*! [add collator nocase] */
ret = conn->add_collator(conn, "nocase", &nocasecoll, NULL);
@@ -119,15 +119,12 @@ main(void)
/* Open a session for the current thread's work. */
if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
fprintf(stderr, "Error opening a session on %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
- /* XXX Do some work... */
+ /* Do some work... */
- /* Note: closing the connection implicitly closes open session(s). */
- if ((ret = conn->close(conn, NULL)) != 0)
+ ret = conn->close(conn, NULL);
/*! [add collator prefix10] */
- fprintf(stderr, "Error closing %s: %s\n",
- home, wiredtiger_strerror(ret));
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_extractor.c b/examples/c/ex_extractor.c
index 8623f4759fc..f9d7af4af0f 100644
--- a/examples/c/ex_extractor.c
+++ b/examples/c/ex_extractor.c
@@ -283,5 +283,5 @@ main(void)
ret = conn->close(conn, NULL);
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_file_system.c b/examples/c/ex_file_system.c
new file mode 100644
index 00000000000..77e8f40480b
--- /dev/null
+++ b/examples/c/ex_file_system.c
@@ -0,0 +1,975 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ex_file_system.c
+ * demonstrates how to use the custom file system interface
+ */
+
+/*
+ * Include WiredTiger internal functions: we need architecture portable locking
+ * in this example, and we use the TAILQ_XXX functions to keep the code simple.
+ *
+ * Application-writers SHOULD NOT INCLUDE "wt_internal.h", the public WiredTiger
+ * include files should be used instead:
+ *
+ * #include <wiredtiger.h>
+ * #include <wiredtiger_ext.h>
+ */
+#include "wt_internal.h"
+
+/*
+ * This example code uses internal WiredTiger functions for portable locking.
+ * We use #defines to clarify the meaning and ignore errors to simplify the
+ * code.
+ *
+ * Application writers SHOULD NOT COPY THIS LOCKING CODE, it's special-case code
+ * to make this example portable across platforms.
+ */
+#define ALLOCATE_FILE_SYSTEM_LOCK(demo_fs) \
+ (void)__wt_spin_init(NULL, &(demo_fs)->lock, "demo file handle lock")
+#define DESTROY_FILE_SYSTEM_LOCK(wt_session, demo_fs) \
+ __wt_spin_destroy((WT_SESSION_IMPL *)(wt_session), &(demo_fs)->lock)
+#define LOCK_FILE_SYSTEM(wt_session, demo_fs) \
+ __wt_spin_lock((WT_SESSION_IMPL *)(wt_session), &(demo_fs)->lock)
+#define UNLOCK_FILE_SYSTEM(wt_session, demo_fs) \
+ __wt_spin_unlock( \
+ (WT_SESSION_IMPL *)(wt_session), &(demo_fs)->lock)
+
+/*
+ * Example file system implementation, using memory buffers to represent files.
+ */
+typedef struct {
+ WT_FILE_SYSTEM iface;
+
+ /*
+ * WiredTiger performs schema and I/O operations in parallel, all file
+ * system and file handle access must be thread-safe. This example uses
+ * a single, global file system lock for simplicity; real applications
+ * might require finer granularity, for example, a single lock for the
+ * file system handle list and per-handle locks serializing I/O.
+ */
+ WT_SPINLOCK lock; /* Lock */
+
+ int opened_file_count;
+ int opened_unique_file_count;
+ int closed_file_count;
+ int read_ops;
+ int write_ops;
+
+ /* Queue of file handles */
+ TAILQ_HEAD(demo_file_handle_qh, demo_file_handle) fileq;
+
+ WT_EXTENSION_API *wtext; /* Extension functions */
+
+} DEMO_FILE_SYSTEM;
+
+typedef struct demo_file_handle {
+ WT_FILE_HANDLE iface;
+
+ /*
+ * Add custom file handle fields after the interface.
+ */
+ DEMO_FILE_SYSTEM *demo_fs; /* Enclosing file system */
+
+ TAILQ_ENTRY(demo_file_handle) q; /* Queue of handles */
+ uint32_t ref; /* Reference count */
+
+ char *buf; /* In-memory contents */
+ size_t bufsize; /* In-memory buffer size */
+
+ size_t size; /* Read/write data size */
+} DEMO_FILE_HANDLE;
+
+/*
+ * Extension initialization function.
+ */
+#ifdef _WIN32
+/*
+ * Explicitly export this function so it is visible when loading extensions.
+ */
+__declspec(dllexport)
+#endif
+int demo_file_system_create(WT_CONNECTION *, WT_CONFIG_ARG *);
+
+/*
+ * Forward function declarations for file system API implementation
+ */
+static int demo_fs_open(WT_FILE_SYSTEM *,
+ WT_SESSION *, const char *, WT_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
+static int demo_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *,
+ const char *, const char *, char ***, uint32_t *);
+static int demo_fs_directory_list_free(
+ WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t);
+static int demo_fs_directory_sync(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *directory);
+static int demo_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *);
+static int demo_fs_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *);
+static int demo_fs_rename(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *);
+static int demo_fs_size(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *);
+static int demo_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *);
+
+/*
+ * Forward function declarations for file handle API implementation
+ */
+static int demo_file_close(WT_FILE_HANDLE *, WT_SESSION *);
+static int demo_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool);
+static int demo_file_read(
+ WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *);
+static int demo_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *);
+static int demo_file_sync(WT_FILE_HANDLE *, WT_SESSION *);
+static int demo_file_truncate(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t);
+static int demo_file_write(
+ WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *);
+
+/*
+ * Forward function declarations for internal functions
+ */
+static int demo_handle_remove(WT_SESSION *, DEMO_FILE_HANDLE *);
+static DEMO_FILE_HANDLE *demo_handle_search(WT_FILE_SYSTEM *, const char *);
+
+#define DEMO_FILE_SIZE_INCREMENT 32768
+
+/*
+ * string_match --
+ * Return if a string matches a byte string of len bytes.
+ */
+static bool
+byte_string_match(const char *str, const char *bytes, size_t len)
+{
+ return (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0');
+}
+
+/*
+ * demo_file_system_create --
+ * Initialization point for demo file system
+ */
+int
+demo_file_system_create(WT_CONNECTION *conn, WT_CONFIG_ARG *config)
+{
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *config_parser;
+ WT_EXTENSION_API *wtext;
+ WT_FILE_SYSTEM *file_system;
+ int ret = 0;
+
+ wtext = conn->get_extension_api(conn);
+
+ if ((demo_fs = calloc(1, sizeof(DEMO_FILE_SYSTEM))) == NULL) {
+ (void)wtext->err_printf(wtext, NULL,
+ "demo_file_system_create: %s",
+ wtext->strerror(wtext, NULL, ENOMEM));
+ return (ENOMEM);
+ }
+ demo_fs->wtext = wtext;
+ file_system = (WT_FILE_SYSTEM *)demo_fs;
+
+ /*
+ * Applications may have their own configuration information to pass to
+ * the underlying filesystem implementation. See the main function for
+ * the setup of those configuration strings; here we parse configuration
+ * information as passed in by main, through WiredTiger.
+ *
+ * Retrieve our configuration information, the "config" value.
+ */
+ if ((ret = wtext->config_get(wtext, NULL, config, "config", &v)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_get: config: %s",
+ wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+
+ /* Open a WiredTiger parser on the "config" value. */
+ if ((ret = wtext->config_parser_open(
+ wtext, NULL, v.str, v.len, &config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_parser_open: config: %s",
+ wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+
+ /* Step through our configuration values. */
+ printf("Custom file system configuration\n");
+ while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
+ if (byte_string_match("config_string", k.str, k.len)) {
+ printf("\t" "key %.*s=\"%.*s\"\n",
+ (int)k.len, k.str, (int)v.len, v.str);
+ continue;
+ }
+ if (byte_string_match("config_value", k.str, k.len)) {
+ printf("\t" "key %.*s=%" PRId64 "\n",
+ (int)k.len, k.str, v.val);
+ continue;
+ }
+ ret = EINVAL;
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: unexpected configuration "
+ "information: %.*s=%.*s: %s",
+ (int)k.len, k.str, (int)v.len, v.str,
+ wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+
+ /* Check for expected parser termination and close the parser. */
+ if (ret != WT_NOTFOUND) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: config: %s",
+ wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+ if ((ret = config_parser->close(config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.close: config: %s",
+ wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+
+ ALLOCATE_FILE_SYSTEM_LOCK(demo_fs);
+
+ /* Initialize the in-memory jump table. */
+ file_system->fs_directory_list = demo_fs_directory_list;
+ file_system->fs_directory_list_free = demo_fs_directory_list_free;
+ file_system->fs_directory_sync = demo_fs_directory_sync;
+ file_system->fs_exist = demo_fs_exist;
+ file_system->fs_open_file = demo_fs_open;
+ file_system->fs_remove = demo_fs_remove;
+ file_system->fs_rename = demo_fs_rename;
+ file_system->fs_size = demo_fs_size;
+ file_system->terminate = demo_fs_terminate;
+
+ if ((ret = conn->set_file_system(conn, file_system, NULL)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONNECTION.set_file_system: %s",
+ wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+ return (0);
+
+err: free(demo_fs);
+ /* An error installing the file system is fatal. */
+ exit(1);
+}
+
+/*
+ * demo_fs_open --
+ * fopen for our demo file system
+ */
+static int
+demo_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
+ const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ WT_FILE_HANDLE **file_handlep)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
+ WT_FILE_HANDLE *file_handle;
+ int ret = 0;
+
+ (void)file_type; /* Unused */
+ (void)flags; /* Unused */
+
+ *file_handlep = NULL;
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+ demo_fh = NULL;
+ wtext = demo_fs->wtext;
+
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ ++demo_fs->opened_file_count;
+
+ /*
+ * First search the file queue, if we find it, assert there's only a
+ * single reference, we only support a single handle on any file.
+ */
+ demo_fh = demo_handle_search(file_system, name);
+ if (demo_fh != NULL) {
+ if (demo_fh->ref != 0) {
+ (void)wtext->err_printf(wtext, session,
+ "demo_fs_open: %s: file already open", name);
+ ret = EBUSY;
+ goto err;
+ }
+
+ demo_fh->ref = 1;
+
+ *file_handlep = (WT_FILE_HANDLE *)demo_fh;
+
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+ return (0);
+ }
+
+ /* The file hasn't been opened before, create a new one. */
+ if ((demo_fh = calloc(1, sizeof(DEMO_FILE_HANDLE))) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+
+ /* Initialize private information. */
+ demo_fh->demo_fs = demo_fs;
+ demo_fh->ref = 1;
+ if ((demo_fh->buf = calloc(1, DEMO_FILE_SIZE_INCREMENT)) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+ demo_fh->bufsize = DEMO_FILE_SIZE_INCREMENT;
+ demo_fh->size = 0;
+
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)demo_fh;
+ if ((file_handle->name = strdup(name)) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+
+ /*
+ * Setup the function call table for our custom file system. Set the
+ * function pointer to NULL where our implementation doesn't support
+ * the functionality.
+ */
+ file_handle->close = demo_file_close;
+ file_handle->fh_advise = NULL;
+ file_handle->fh_allocate = NULL;
+ file_handle->fh_allocate_nolock = NULL;
+ file_handle->fh_lock = demo_file_lock;
+ file_handle->fh_map = NULL;
+ file_handle->fh_map_discard = NULL;
+ file_handle->fh_map_preload = NULL;
+ file_handle->fh_unmap = NULL;
+ file_handle->fh_read = demo_file_read;
+ file_handle->fh_size = demo_file_size;
+ file_handle->fh_sync = demo_file_sync;
+ file_handle->fh_sync_nowait = NULL;
+ file_handle->fh_truncate = demo_file_truncate;
+ file_handle->fh_write = demo_file_write;
+
+ TAILQ_INSERT_HEAD(&demo_fs->fileq, demo_fh, q);
+ ++demo_fs->opened_unique_file_count;
+
+ *file_handlep = file_handle;
+
+ if (0) {
+err: free(demo_fh->buf);
+ free(demo_fh);
+ }
+
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+ return (ret);
+}
+
+/*
+ * demo_fs_directory_list --
+ * Return a list of files in a given sub-directory.
+ */
+static int
+demo_fs_directory_list(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ size_t len, prefix_len;
+ uint32_t allocated, count;
+ int ret = 0;
+ char *name, **entries;
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+
+ *dirlistp = NULL;
+ *countp = 0;
+
+ entries = NULL;
+ allocated = count = 0;
+ len = strlen(directory);
+ prefix_len = prefix == NULL ? 0 : strlen(prefix);
+
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ TAILQ_FOREACH(demo_fh, &demo_fs->fileq, q) {
+ name = demo_fh->iface.name;
+ if (strncmp(name, directory, len) != 0 ||
+ (prefix != NULL && strncmp(name, prefix, prefix_len) != 0))
+ continue;
+
+ /*
+ * Increase the list size in groups of 10, it doesn't
+ * matter if the list is a bit longer than necessary.
+ */
+ if (count >= allocated) {
+ entries = realloc(
+ entries, (allocated + 10) * sizeof(char *));
+ if (entries == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+ memset(entries + allocated * sizeof(char *),
+ 0, 10 * sizeof(char *));
+ allocated += 10;
+ }
+ entries[count++] = strdup(name);
+ }
+
+ *dirlistp = entries;
+ *countp = count;
+
+err: UNLOCK_FILE_SYSTEM(session, demo_fs);
+ if (ret == 0)
+ return (0);
+
+ if (entries != NULL) {
+ while (count > 0)
+ free(entries[--count]);
+ free(entries);
+ }
+
+ return (ret);
+}
+
+/*
+ * demo_fs_directory_list_free --
+ * Free memory allocated by demo_fs_directory_list.
+ */
+static int
+demo_fs_directory_list_free(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, char **dirlist, uint32_t count)
+{
+ (void)file_system;
+ (void)session;
+
+ if (dirlist != NULL) {
+ while (count > 0)
+ free(dirlist[--count]);
+ free(dirlist);
+ }
+ return (0);
+}
+
+/*
+ * demo_fs_directory_sync --
+ * Directory sync for our demo file system, which is a no-op.
+ */
+static int
+demo_fs_directory_sync(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *directory)
+{
+ (void)file_system; /* Unused */
+ (void)session; /* Unused */
+ (void)directory; /* Unused */
+
+ return (0);
+}
+
+/*
+ * demo_fs_exist --
+ * Return if the file exists.
+ */
+static int
+demo_fs_exist(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *name, bool *existp)
+{
+ DEMO_FILE_SYSTEM *demo_fs;
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ *existp = demo_handle_search(file_system, name) != NULL;
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+
+ return (0);
+}
+
+/*
+ * demo_fs_remove --
+ * POSIX remove.
+ */
+static int
+demo_fs_remove(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name)
+{
+ DEMO_FILE_SYSTEM *demo_fs;
+ DEMO_FILE_HANDLE *demo_fh;
+ int ret = 0;
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+
+ ret = ENOENT;
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ if ((demo_fh = demo_handle_search(file_system, name)) != NULL)
+ ret = demo_handle_remove(session, demo_fh);
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+
+ return (ret);
+}
+
+/*
+ * demo_fs_rename --
+ * POSIX rename.
+ */
+static int
+demo_fs_rename(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *from, const char *to)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ char *copy;
+ int ret = 0;
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ if ((demo_fh = demo_handle_search(file_system, from)) == NULL)
+ ret = ENOENT;
+ else if ((copy = strdup(to)) == NULL)
+ ret = ENOMEM;
+ else {
+ free(demo_fh->iface.name);
+ demo_fh->iface.name = copy;
+ }
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+ return (ret);
+}
+
+/*
+ * demo_fs_size --
+ * Get the size of a file in bytes, by file name.
+ */
+static int
+demo_fs_size(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *name, wt_off_t *sizep)
+{
+ DEMO_FILE_SYSTEM *demo_fs;
+ DEMO_FILE_HANDLE *demo_fh;
+ int ret = 0;
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+
+ ret = ENOENT;
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ if ((demo_fh = demo_handle_search(file_system, name)) != NULL)
+ ret = demo_file_size((WT_FILE_HANDLE *)demo_fh, session, sizep);
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+
+ return (ret);
+}
+
+/*
+ * demo_fs_terminate --
+ * Discard any resources on termination
+ */
+static int
+demo_fs_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ int ret = 0, tret;
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+
+ while ((demo_fh = TAILQ_FIRST(&demo_fs->fileq)) != NULL)
+ if ((tret =
+ demo_handle_remove(session, demo_fh)) != 0 && ret == 0)
+ ret = tret;
+
+ printf("Custom file system\n");
+ printf("\t%d unique file opens\n", demo_fs->opened_unique_file_count);
+ printf("\t%d files opened\n", demo_fs->opened_file_count);
+ printf("\t%d files closed\n", demo_fs->closed_file_count);
+ printf("\t%d reads, %d writes\n",
+ demo_fs->read_ops, demo_fs->write_ops);
+
+ DESTROY_FILE_SYSTEM_LOCK(session, demo_fs);
+ free(demo_fs);
+
+ return (ret);
+}
+
+/*
+ * demo_file_close --
+ * ANSI C close.
+ */
+static int
+demo_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
+
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ if (--demo_fh->ref == 0)
+ ++demo_fs->closed_file_count;
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+
+ return (0);
+}
+
+/*
+ * demo_file_lock --
+ * Lock/unlock a file.
+ */
+static int
+demo_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *session, bool lock)
+{
+ /* Locks are always granted. */
+ (void)file_handle; /* Unused */
+ (void)session; /* Unused */
+ (void)lock; /* Unused */
+ return (0);
+}
+
+/*
+ * demo_file_read --
+ * POSIX pread.
+ */
+static int
+demo_file_read(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *session, wt_off_t offset, size_t len, void *buf)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
+ size_t off;
+ int ret = 0;
+
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
+ wtext = demo_fs->wtext;
+ off = (size_t)offset;
+
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ ++demo_fs->read_ops;
+ if (off < demo_fh->size) {
+ if (len > demo_fh->size - off)
+ len = demo_fh->size - off;
+ memcpy(buf, (uint8_t *)demo_fh->buf + off, len);
+ } else
+ ret = EIO; /* EOF */
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+ if (ret == 0)
+ return (0);
+
+ (void)wtext->err_printf(wtext, session,
+ "%s: handle-read: failed to read %zu bytes at offset %zu: %s",
+ demo_fh->iface.name, len, off, wtext->strerror(wtext, NULL, ret));
+ return (ret);
+}
+
+/*
+ * demo_file_size --
+ * Get the size of a file in bytes, by file handle.
+ */
+static int
+demo_file_size(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
+
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ *sizep = (wt_off_t)demo_fh->size;
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+ return (0);
+}
+
+/*
+ * demo_file_sync --
+ * Ensure the content of the file is stable. This is a no-op in our
+ * memory backed file system.
+ */
+static int
+demo_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
+{
+ (void)file_handle; /* Unused */
+ (void)session; /* Unused */
+
+ return (0);
+}
+
+/*
+ * demo_buffer_resize --
+ * Resize the write buffer.
+ */
+static int
+demo_buffer_resize(
+ WT_SESSION *session, DEMO_FILE_HANDLE *demo_fh, wt_off_t offset)
+{
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
+ size_t off;
+ void *p;
+
+ demo_fs = demo_fh->demo_fs;
+ wtext = demo_fs->wtext;
+ off = (size_t)offset;
+
+ /* Grow the buffer as necessary and clear any new space in the file. */
+ if (demo_fh->bufsize >= off)
+ return (0);
+
+ if ((p = realloc(demo_fh->buf, off)) == NULL) {
+ (void)wtext->err_printf(wtext, session,
+ "%s: failed to resize buffer",
+ demo_fh->iface.name, wtext->strerror(wtext, NULL, ENOMEM));
+ return (ENOMEM);
+ }
+ memset((uint8_t *)p + demo_fh->bufsize, 0, off - demo_fh->bufsize);
+ demo_fh->buf = p;
+ demo_fh->bufsize = off;
+
+ return (0);
+}
+
+/*
+ * demo_file_truncate --
+ * POSIX ftruncate.
+ */
+static int
+demo_file_truncate(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ int ret = 0;
+
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
+
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ if ((ret = demo_buffer_resize(session, demo_fh, offset)) == 0)
+ demo_fh->size = (size_t)offset;
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+ return (ret);
+}
+
+/*
+ * demo_file_write --
+ * POSIX pwrite.
+ */
+static int
+demo_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *session,
+ wt_off_t offset, size_t len, const void *buf)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
+ size_t off;
+ int ret = 0;
+
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
+ wtext = demo_fs->wtext;
+ off = (size_t)offset;
+
+ LOCK_FILE_SYSTEM(session, demo_fs);
+ ++demo_fs->write_ops;
+ if ((ret = demo_buffer_resize(session, demo_fh,
+ offset + (wt_off_t)(len + DEMO_FILE_SIZE_INCREMENT))) == 0) {
+ memcpy((uint8_t *)demo_fh->buf + off, buf, len);
+ if (off + len > demo_fh->size)
+ demo_fh->size = off + len;
+ }
+ UNLOCK_FILE_SYSTEM(session, demo_fs);
+ if (ret == 0)
+ return (0);
+
+ (void)wtext->err_printf(wtext, session,
+ "%s: handle-write: failed to write %zu bytes at offset %zu: %s",
+ demo_fh->iface.name, len, off, wtext->strerror(wtext, NULL, ret));
+ return (ret);
+}
+
+/*
+ * demo_handle_remove --
+ * Destroy an in-memory file handle. Should only happen on remove or
+ * shutdown.
+ */
+static int
+demo_handle_remove(WT_SESSION *session, DEMO_FILE_HANDLE *demo_fh)
+{
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
+
+ demo_fs = demo_fh->demo_fs;
+ wtext = demo_fs->wtext;
+
+ if (demo_fh->ref != 0) {
+ (void)wtext->err_printf(wtext, session,
+ "demo_handle_remove: %s: file is currently open",
+ demo_fh->iface.name, wtext->strerror(wtext, NULL, EBUSY));
+ return (EBUSY);
+ }
+
+ TAILQ_REMOVE(&demo_fs->fileq, demo_fh, q);
+
+ /* Clean up private information. */
+ free(demo_fh->buf);
+
+ /* Clean up public information. */
+ free(demo_fh->iface.name);
+
+ free(demo_fh);
+
+ return (0);
+}
+
+/*
+ * demo_handle_search --
+ * Return a matching handle, if one exists.
+ */
+static DEMO_FILE_HANDLE *
+demo_handle_search(WT_FILE_SYSTEM *file_system, const char *name)
+{
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+
+ TAILQ_FOREACH(demo_fh, &demo_fs->fileq, q)
+ if (strcmp(demo_fh->iface.name, name) == 0)
+ break;
+ return (demo_fh);
+}
+
+static const char *home;
+
+int
+main(void)
+{
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ const char *key, *open_config, *uri;
+ int i;
+ int ret = 0;
+ char kbuf[64];
+
+ /*
+ * Create a clean test directory for this run of the test program if the
+ * environment variable isn't already set (as is done by make check).
+ */
+ if (getenv("WIREDTIGER_HOME") == NULL) {
+ home = "WT_HOME";
+ ret = system("rm -rf WT_HOME && mkdir WT_HOME");
+ } else
+ home = NULL;
+
+ /*! [WT_FILE_SYSTEM register] */
+ /*
+ * Setup a configuration string that will load our custom file system.
+ * Use the special local extension to indicate that the entry point is
+ * in the same executable. Also enable early load for this extension,
+ * since WiredTiger needs to be able to find it before doing any file
+ * operations. Finally, pass in two pieces of configuration information
+ * to our initialization function as the "config" value.
+ */
+ open_config = "create,log=(enabled=true),extensions=(local={"
+ "entry=demo_file_system_create,early_load=true,"
+ "config={config_string=\"demo-file-system\",config_value=37}"
+ "})";
+ /* Open a connection to the database, creating it if necessary. */
+ if ((ret = wiredtiger_open(home, NULL, open_config, &conn)) != 0) {
+ fprintf(stderr, "Error connecting to %s: %s\n",
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ /*! [WT_FILE_SYSTEM register] */
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
+ fprintf(stderr, "WT_CONNECTION.open_session: %s\n",
+ wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ uri = "table:fs";
+ if ((ret = session->create(
+ session, uri, "key_format=S,value_format=S")) != 0) {
+ fprintf(stderr, "WT_SESSION.create: %s: %s\n",
+ uri, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ if ((ret = session->open_cursor(
+ session, uri, NULL, NULL, &cursor)) != 0) {
+ fprintf(stderr, "WT_SESSION.open_cursor: %s: %s\n",
+ uri, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ for (i = 0; i < 1000; ++i) {
+ (void)snprintf(kbuf, sizeof(kbuf), "%010d KEY -----", i);
+ cursor->set_key(cursor, kbuf);
+ cursor->set_value(cursor, "--- VALUE ---");
+ if ((ret = cursor->insert(cursor)) != 0) {
+ fprintf(stderr, "WT_CURSOR.insert: %s: %s\n",
+ kbuf, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ }
+ if ((ret = cursor->close(cursor)) != 0) {
+ fprintf(stderr, "WT_CURSOR.close: %s\n",
+ wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ if ((ret = session->open_cursor(
+ session, uri, NULL, NULL, &cursor)) != 0) {
+ fprintf(stderr, "WT_SESSION.open_cursor: %s: %s\n",
+ uri, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ for (i = 0; i < 1000; ++i) {
+ if ((ret = cursor->next(cursor)) != 0) {
+ fprintf(stderr, "WT_CURSOR.insert: %s: %s\n",
+ kbuf, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ (void)snprintf(kbuf, sizeof(kbuf), "%010d KEY -----", i);
+ if ((ret = cursor->get_key(cursor, &key)) != 0) {
+ fprintf(stderr, "WT_CURSOR.get_key: %s\n",
+ wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ if (strcmp(kbuf, key) != 0) {
+ fprintf(stderr, "Key mismatch: %s, %s\n", kbuf, key);
+ return (EXIT_FAILURE);
+ }
+ }
+ if ((ret = cursor->next(cursor)) != WT_NOTFOUND) {
+ fprintf(stderr,
+ "WT_CURSOR.insert: expected WT_NOTFOUND, got %s\n",
+ wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+
+ if ((ret = conn->close(conn, NULL)) != 0) {
+ fprintf(stderr, "Error closing connection to %s: %s\n",
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+
+ return (EXIT_SUCCESS);
+}
diff --git a/examples/c/ex_hello.c b/examples/c/ex_hello.c
index 345e434741f..99534ee8868 100644
--- a/examples/c/ex_hello.c
+++ b/examples/c/ex_hello.c
@@ -56,21 +56,27 @@ main(void)
home = NULL;
/* Open a connection to the database, creating it if necessary. */
- if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0)
+ if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
/* Open a session for the current thread's work. */
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
fprintf(stderr, "Error opening a session on %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
/* Do some work... */
/* Note: closing the connection implicitly closes open session(s). */
- if ((ret = conn->close(conn, NULL)) != 0)
+ if ((ret = conn->close(conn, NULL)) != 0) {
fprintf(stderr, "Error closing %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
- return (ret);
+ return (EXIT_SUCCESS);
}
diff --git a/examples/c/ex_log.c b/examples/c/ex_log.c
index 78bd7e683cf..fdbc39412ae 100644
--- a/examples/c/ex_log.c
+++ b/examples/c/ex_log.c
@@ -295,12 +295,12 @@ main(void)
home1, home2, home1, home2);
if ((ret = system(cmd_buf)) != 0) {
fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret);
- return (ret);
+ return (EXIT_FAILURE);
}
if ((ret = wiredtiger_open(home1, NULL, CONN_CONFIG, &wt_conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
home1, wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
ret = wt_conn->open_session(wt_conn, NULL, NULL, &session);
@@ -348,12 +348,13 @@ main(void)
if ((ret = wiredtiger_open(home1, NULL, CONN_CONFIG, &wt_conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
home1, wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
ret = wt_conn->open_session(wt_conn, NULL, NULL, &session);
ret = simple_walk_log(session, count_min);
ret = walk_log(session);
ret = wt_conn->close(wt_conn, NULL);
- return (ret);
+
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_pack.c b/examples/c/ex_pack.c
index 43b57880674..86725123f55 100644
--- a/examples/c/ex_pack.c
+++ b/examples/c/ex_pack.c
@@ -55,14 +55,18 @@ main(void)
home = NULL;
/* Open a connection to the database, creating it if necessary. */
- if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0)
+ if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
/* Open a session for the current thread's work. */
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
fprintf(stderr, "Error opening a session on %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
{
/*! [packing] */
@@ -81,9 +85,11 @@ main(void)
}
/* Note: closing the connection implicitly closes open session(s). */
- if ((ret = conn->close(conn, NULL)) != 0)
+ if ((ret = conn->close(conn, NULL)) != 0) {
fprintf(stderr, "Error closing %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
- return (ret);
+ return (EXIT_SUCCESS);
}
diff --git a/examples/c/ex_process.c b/examples/c/ex_process.c
index 19f395dddaf..217730c4288 100644
--- a/examples/c/ex_process.c
+++ b/examples/c/ex_process.c
@@ -58,22 +58,28 @@ main(void)
/*! [processes] */
/* Open a connection to the database, creating it if necessary. */
if ((ret =
- wiredtiger_open(home, NULL, "create,multiprocess", &conn)) != 0)
+ wiredtiger_open(home, NULL, "create,multiprocess", &conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
/* Open a session for the current thread's work. */
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
fprintf(stderr, "Error opening a session on %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
/* XXX Do some work... */
/* Note: closing the connection implicitly closes open session(s). */
- if ((ret = conn->close(conn, NULL)) != 0)
+ if ((ret = conn->close(conn, NULL)) != 0) {
fprintf(stderr, "Error closing %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
/*! [processes] */
- return (ret);
+ return (EXIT_SUCCESS);
}
diff --git a/examples/c/ex_schema.c b/examples/c/ex_schema.c
index 70fc7eb2e62..a59d9480780 100644
--- a/examples/c/ex_schema.c
+++ b/examples/c/ex_schema.c
@@ -69,7 +69,8 @@ main(void)
{
POP_RECORD *p;
WT_CONNECTION *conn;
- WT_CURSOR *cursor, *cursor2, *join_cursor, *stat_cursor;
+ WT_CURSOR *country_cursor, *country_cursor2, *cursor, *join_cursor,
+ *stat_cursor, *subjoin_cursor, *year_cursor;
WT_SESSION *session;
const char *country;
uint64_t recno, population;
@@ -89,8 +90,8 @@ main(void)
if ((ret = wiredtiger_open(
home, NULL, "create,statistics=(fast)", &conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
- return (ret);
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
}
/* Note: error checking omitted for clarity. */
@@ -336,18 +337,18 @@ main(void)
ret = session->open_cursor(session,
"join:table:poptable", NULL, NULL, &join_cursor);
ret = session->open_cursor(session,
- "index:poptable:country", NULL, NULL, &cursor);
+ "index:poptable:country", NULL, NULL, &country_cursor);
ret = session->open_cursor(session,
- "index:poptable:immutable_year", NULL, NULL, &cursor2);
+ "index:poptable:immutable_year", NULL, NULL, &year_cursor);
/* select values WHERE country == "AU" AND year > 1900 */
- cursor->set_key(cursor, "AU\0\0\0");
- ret = cursor->search(cursor);
- ret = session->join(session, join_cursor, cursor,
+ country_cursor->set_key(country_cursor, "AU\0\0\0");
+ ret = country_cursor->search(country_cursor);
+ ret = session->join(session, join_cursor, country_cursor,
"compare=eq,count=10");
- cursor2->set_key(cursor2, (uint16_t)1900);
- ret = cursor2->search(cursor2);
- ret = session->join(session, join_cursor, cursor2,
+ year_cursor->set_key(year_cursor, (uint16_t)1900);
+ ret = year_cursor->search(year_cursor);
+ ret = session->join(session, join_cursor, year_cursor,
"compare=gt,count=10,strategy=bloom");
/* List the values that are joined */
@@ -370,10 +371,63 @@ main(void)
ret = stat_cursor->close(stat_cursor);
ret = join_cursor->close(join_cursor);
- ret = cursor2->close(cursor2);
- ret = cursor->close(cursor);
+ ret = year_cursor->close(year_cursor);
+ ret = country_cursor->close(country_cursor);
+
+ /*! [Complex join cursors] */
+ /* Open cursors needed by the join. */
+ ret = session->open_cursor(session,
+ "join:table:poptable", NULL, NULL, &join_cursor);
+ ret = session->open_cursor(session,
+ "join:table:poptable", NULL, NULL, &subjoin_cursor);
+ ret = session->open_cursor(session,
+ "index:poptable:country", NULL, NULL, &country_cursor);
+ ret = session->open_cursor(session,
+ "index:poptable:country", NULL, NULL, &country_cursor2);
+ ret = session->open_cursor(session,
+ "index:poptable:immutable_year", NULL, NULL, &year_cursor);
+
+ /*
+ * select values WHERE (country == "AU" OR country == "UK")
+ * AND year > 1900
+ *
+ * First, set up the join representing the country clause.
+ */
+ country_cursor->set_key(country_cursor, "AU\0\0\0");
+ ret = country_cursor->search(country_cursor);
+ ret = session->join(session, subjoin_cursor, country_cursor,
+ "operation=or,compare=eq,count=10");
+ country_cursor2->set_key(country_cursor2, "UK\0\0\0");
+ ret = country_cursor2->search(country_cursor2);
+ ret = session->join(session, subjoin_cursor, country_cursor2,
+ "operation=or,compare=eq,count=10");
+
+ /* Join that to the top join, and add the year clause */
+ ret = session->join(session, join_cursor, subjoin_cursor, NULL);
+ year_cursor->set_key(year_cursor, (uint16_t)1900);
+ ret = year_cursor->search(year_cursor);
+ ret = session->join(session, join_cursor, year_cursor,
+ "compare=gt,count=10,strategy=bloom");
+
+ /* List the values that are joined */
+ while ((ret = join_cursor->next(join_cursor)) == 0) {
+ ret = join_cursor->get_key(join_cursor, &recno);
+ ret = join_cursor->get_value(join_cursor, &country, &year,
+ &population);
+ printf("ID %" PRIu64, recno);
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
+ country, year, population);
+ }
+ /*! [Complex join cursors] */
+
+ ret = join_cursor->close(join_cursor);
+ ret = subjoin_cursor->close(subjoin_cursor);
+ ret = country_cursor->close(country_cursor);
+ ret = country_cursor2->close(country_cursor2);
+ ret = year_cursor->close(year_cursor);
ret = conn->close(conn, NULL);
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_scope.c b/examples/c/ex_scope.c
index 93878ec7e3d..795ad85d57b 100644
--- a/examples/c/ex_scope.c
+++ b/examples/c/ex_scope.c
@@ -106,10 +106,12 @@ cursor_scope_ops(WT_CURSOR *cursor)
* memory, but as it does not position the cursor, it
* doesn't reference memory owned by the cursor, either.
*/
+ printf("ex_scope: "
+ "expect two WiredTiger error messages:\n");
if ((ret = cursor->get_key(cursor, &key)) == 0 ||
(ret = cursor->get_value(cursor, &value)) == 0) {
fprintf(stderr,
- "%s: error in s get_key/value: %s\n",
+ "%s: error in get_key/value: %s\n",
op->op, session->strerror(session, ret));
return (ret);
}
@@ -122,6 +124,8 @@ cursor_scope_ops(WT_CURSOR *cursor)
* reference key memory owned by the cursor, but has no
* value.
*/
+ printf("ex_scope: "
+ "expect one WiredTiger error message:\n");
if ((ret = cursor->get_key(cursor, &key)) != 0 ||
(ret = cursor->get_value(cursor, &value)) == 0) {
fprintf(stderr,
@@ -178,7 +182,7 @@ main(void)
WT_CONNECTION *conn;
WT_CURSOR *cursor;
WT_SESSION *session;
- int ret, tret;
+ int ret;
/*
* Create a clean test directory for this run of the test program if the
@@ -194,8 +198,8 @@ main(void)
if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0 ||
(ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
- return (ret);
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
}
ret = session->create(session,
@@ -207,8 +211,7 @@ main(void)
ret = cursor_scope_ops(cursor);
/* Close the connection and clean up. */
- if ((tret = conn->close(conn, NULL)) != 0 && ret == 0)
- ret = tret;
+ ret = conn->close(conn, NULL);
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_stat.c b/examples/c/ex_stat.c
index 6c5c15aacc6..ba473d6be04 100644
--- a/examples/c/ex_stat.c
+++ b/examples/c/ex_stat.c
@@ -235,9 +235,8 @@ main(void)
ret = wiredtiger_open(home, NULL, "create,statistics=(all)", &conn);
ret = conn->open_session(conn, NULL, NULL, &session);
- ret = session->create(
- session, "table:access",
- "key_format=S,value_format=S,columns=(k,v)");
+ ret = session->create(session,
+ "table:access", "key_format=S,value_format=S,columns=(k,v)");
ret = session->open_cursor(
session, "table:access", NULL, NULL, &cursor);
@@ -258,5 +257,7 @@ main(void)
ret = print_derived_stats(session);
- return (conn->close(conn, NULL) == 0 ? ret : EXIT_FAILURE);
+ ret = conn->close(conn, NULL);
+
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_sync.c b/examples/c/ex_sync.c
index 8c3a6463a82..2c610b1e570 100644
--- a/examples/c/ex_sync.c
+++ b/examples/c/ex_sync.c
@@ -63,12 +63,12 @@ main(void)
home, home);
if ((ret = system(cmd_buf)) != 0) {
fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret);
- return (ret);
+ return (EXIT_FAILURE);
}
if ((ret = wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn)) != 0) {
fprintf(stderr, "Error connecting to %s: %s\n",
home, wiredtiger_strerror(ret));
- return (ret);
+ return (EXIT_FAILURE);
}
ret = wt_conn->open_session(wt_conn, NULL, NULL, &session);
@@ -149,5 +149,6 @@ main(void)
ret = session->log_flush(session, "sync=on");
ret = wt_conn->close(wt_conn, NULL);
- return (ret);
+
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
diff --git a/examples/c/ex_thread.c b/examples/c/ex_thread.c
index a72211b6243..7c52d3b8189 100644
--- a/examples/c/ex_thread.c
+++ b/examples/c/ex_thread.c
@@ -101,7 +101,7 @@ main(void)
if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0)
fprintf(stderr, "Error connecting to %s: %s\n",
- home, wiredtiger_strerror(ret));
+ home == NULL ? "." : home, wiredtiger_strerror(ret));
/* Note: further error checking omitted for clarity. */
ret = conn->open_session(conn, NULL, NULL, &session);
@@ -122,6 +122,6 @@ main(void)
ret = conn->close(conn, NULL);
- return (ret);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
/*! [thread main] */
diff --git a/examples/java/com/wiredtiger/examples/ex_all.java b/examples/java/com/wiredtiger/examples/ex_all.java
index 5fe767d49bf..48e85c9fade 100644
--- a/examples/java/com/wiredtiger/examples/ex_all.java
+++ b/examples/java/com/wiredtiger/examples/ex_all.java
@@ -878,6 +878,18 @@ backup(Session session)
": backup failed: " + ex.toString());
}
/*! [backup]*/
+ try {
+ /*! [incremental backup]*/
+ /* Open the backup data source for incremental backup. */
+ cursor = session.open_cursor("backup:", null, "target=(\"log:\")");
+ /*! [incremental backup]*/
+
+ ret = cursor.close();
+ }
+ catch (Exception ex) {
+ System.err.println(progname +
+ ": incremental backup failed: " + ex.toString());
+ }
/*! [backup of a checkpoint]*/
ret = session.checkpoint("drop=(from=June01),name=June01");
diff --git a/examples/java/com/wiredtiger/examples/ex_schema.java b/examples/java/com/wiredtiger/examples/ex_schema.java
index 7cc26acb479..76bff66a688 100644
--- a/examples/java/com/wiredtiger/examples/ex_schema.java
+++ b/examples/java/com/wiredtiger/examples/ex_schema.java
@@ -76,7 +76,8 @@ public class ex_schema {
throws WiredTigerException
{
Connection conn;
- Cursor cursor, cursor2, join_cursor, stat_cursor;
+ Cursor country_cursor, country_cursor2, cursor, join_cursor,
+ stat_cursor, subjoin_cursor, year_cursor;
Session session;
String country;
long recno, population;
@@ -343,18 +344,18 @@ public class ex_schema {
/* Open cursors needed by the join. */
join_cursor = session.open_cursor(
"join:table:poptable", null, null);
- cursor = session.open_cursor(
+ country_cursor = session.open_cursor(
"index:poptable:country", null, null);
- cursor2 = session.open_cursor(
+ year_cursor = session.open_cursor(
"index:poptable:immutable_year", null, null);
/* select values WHERE country == "AU" AND year > 1900 */
- cursor.putKeyString("AU");
- ret = cursor.search();
- session.join(join_cursor, cursor, "compare=eq,count=10");
- cursor2.putKeyShort((short)1900);
- ret = cursor2.search();
- session.join(join_cursor, cursor2,
+ country_cursor.putKeyString("AU");
+ ret = country_cursor.search();
+ session.join(join_cursor, country_cursor, "compare=eq,count=10");
+ year_cursor.putKeyShort((short)1900);
+ ret = year_cursor.search();
+ session.join(join_cursor, year_cursor,
"compare=gt,count=10,strategy=bloom");
/* List the values that are joined */
@@ -376,8 +377,61 @@ public class ex_schema {
ret = stat_cursor.close();
ret = join_cursor.close();
- ret = cursor2.close();
- ret = cursor.close();
+ ret = year_cursor.close();
+ ret = country_cursor.close();
+
+ /*! [Complex join cursors] */
+ /* Open cursors needed by the join. */
+ join_cursor = session.open_cursor(
+ "join:table:poptable", null, null);
+ subjoin_cursor = session.open_cursor(
+ "join:table:poptable", null, null);
+ country_cursor = session.open_cursor(
+ "index:poptable:country", null, null);
+ country_cursor2 = session.open_cursor(
+ "index:poptable:country", null, null);
+ year_cursor = session.open_cursor(
+ "index:poptable:immutable_year", null, null);
+
+ /*
+ * select values WHERE (country == "AU" OR country == "UK")
+ * AND year > 1900
+ *
+ * First, set up the join representing the country clause.
+ */
+ country_cursor.putKeyString("AU");
+ ret = country_cursor.search();
+ ret = session.join(subjoin_cursor, country_cursor,
+ "operation=or,compare=eq,count=10");
+ country_cursor2.putKeyString("UK");
+ ret = country_cursor2.search();
+ ret = session.join(subjoin_cursor, country_cursor2,
+ "operation=or,compare=eq,count=10");
+
+ /* Join that to the top join, and add the year clause */
+ ret = session.join(join_cursor, subjoin_cursor, null);
+ year_cursor.putKeyShort((short)1900);
+ ret = year_cursor.search();
+ ret = session.join(join_cursor, year_cursor,
+ "compare=gt,count=10,strategy=bloom");
+
+ /* List the values that are joined */
+ while ((ret = join_cursor.next()) == 0) {
+ recno = join_cursor.getKeyRecord();
+ country = join_cursor.getValueString();
+ year = join_cursor.getValueShort();
+ population = join_cursor.getValueLong();
+ System.out.print("ID " + recno);
+ System.out.println( ": country " + country + ", year " + year +
+ ", population " + population);
+ }
+ /*! [Complex join cursors] */
+
+ ret = join_cursor.close();
+ ret = subjoin_cursor.close();
+ ret = year_cursor.close();
+ ret = country_cursor.close();
+ ret = country_cursor2.close();
ret = conn.close(null);
diff --git a/ext/compressors/zlib/zlib_compress.c b/ext/compressors/zlib/zlib_compress.c
index 4ff0d8576eb..9aede2ed907 100644
--- a/ext/compressors/zlib/zlib_compress.c
+++ b/ext/compressors/zlib/zlib_compress.c
@@ -307,17 +307,9 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
/*
* If there's more compression to do, save a snapshot and keep
* going, otherwise, use the current compression.
- *
- * Don't let the compression ratio become insanely good (which
- * can happen with synthetic workloads). Once we hit a limit,
- * stop so the in-memory size of pages isn't hugely larger than
- * the on-disk size, otherwise we can get into trouble where
- * every update to a page results in forced eviction based on
- * the in-memory size, even though the data fits into a single
- * on-disk block.
*/
last_slot = curr_slot;
- if (zs.avail_out > 0 && zs.total_in <= zs.total_out * 20) {
+ if (zs.avail_out > 0) {
if ((ret = deflateCopy(&last_zs, &zs)) != Z_OK)
return (zlib_error(
compressor, session, "deflateCopy", ret));
diff --git a/ext/datasources/helium/helium.c b/ext/datasources/helium/helium.c
index 0350f9a2752..2a66c9a0ca7 100644
--- a/ext/datasources/helium/helium.c
+++ b/ext/datasources/helium/helium.c
@@ -2196,8 +2196,8 @@ helium_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session,
EMSG_ERR(wtext, session, ret,
"value_format configuration: %s",
wtext->strerror(wtext, session, ret));
- ws->config_bitfield =
- v.len == 2 && isdigit(v.str[0]) && v.str[1] == 't';
+ ws->config_bitfield = v.len == 2 &&
+ isdigit((u_char)v.str[0]) && v.str[1] == 't';
if ((ret = config_parser->get(
config_parser, "helium_o_compress", &v)) != 0)
diff --git a/ext/test/kvs_bdb/kvs_bdb.c b/ext/test/kvs_bdb/kvs_bdb.c
index 05c522ff41f..3d78bca1d1b 100644
--- a/ext/test/kvs_bdb/kvs_bdb.c
+++ b/ext/test/kvs_bdb/kvs_bdb.c
@@ -831,7 +831,7 @@ kvs_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session,
goto err;
}
cursor->config_bitfield =
- v.len == 2 && isdigit(v.str[0]) && v.str[1] == 't';
+ v.len == 2 && isdigit((u_char)v.str[0]) && v.str[1] == 't';
if ((ret = writelock(wtext, session, &ds->rwlock)) != 0)
goto err;
diff --git a/lang/java/java_doc.i b/lang/java/java_doc.i
index 450cb1d5ab2..2264cb31ef1 100644
--- a/lang/java/java_doc.i
+++ b/lang/java/java_doc.i
@@ -63,6 +63,7 @@ COPYDOC(__wt_connection, WT_CONNECTION, add_collator)
COPYDOC(__wt_connection, WT_CONNECTION, add_compressor)
COPYDOC(__wt_connection, WT_CONNECTION, add_encryptor)
COPYDOC(__wt_connection, WT_CONNECTION, add_extractor)
+COPYDOC(__wt_connection, WT_CONNECTION, set_file_system)
COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, close)
COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, next)
COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, get)
diff --git a/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java b/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java
index 02639bfe77a..4f05e153607 100644
--- a/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java
+++ b/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java
@@ -40,6 +40,7 @@ import com.wiredtiger.db.WiredTigerPackingException;
public class PackFormatInputStream {
protected String format;
+ protected boolean isRaw;
protected int formatOff;
protected int formatRepeatCount;
@@ -48,8 +49,9 @@ public class PackFormatInputStream {
*
* \param format the encoded format backing string.
*/
- protected PackFormatInputStream(String format) {
+ protected PackFormatInputStream(String format, boolean isRaw) {
this.format = format;
+ this.isRaw = isRaw;
formatOff = 0;
formatRepeatCount = 0;
}
@@ -114,6 +116,9 @@ public class PackFormatInputStream {
throws WiredTigerPackingException {
char expected = getType();
+ if (isRaw)
+ throw new WiredTigerPackingException(
+ "Format mismatch for raw mode");
if (Character.toLowerCase(expected) != Character.toLowerCase(asking))
throw new WiredTigerPackingException(
"Format mismatch. Wanted: " + asking + ", got: " + expected);
diff --git a/lang/java/src/com/wiredtiger/db/PackInputStream.java b/lang/java/src/com/wiredtiger/db/PackInputStream.java
index f265d041d94..732bf450acd 100644
--- a/lang/java/src/com/wiredtiger/db/PackInputStream.java
+++ b/lang/java/src/com/wiredtiger/db/PackInputStream.java
@@ -43,6 +43,7 @@ public class PackInputStream {
protected byte[] value;
protected int valueOff;
protected int valueLen;
+ protected boolean isRaw;
/**
* Constructor.
@@ -52,7 +53,7 @@ public class PackInputStream {
* \param value The raw bytes that back the stream.
*/
public PackInputStream(String format, byte[] value) {
- this(format, value, 0, value.length);
+ this(format, value, false, 0, value.length);
}
/**
@@ -61,14 +62,29 @@ public class PackInputStream {
* \param format A String that contains the WiredTiger format that
* defines the layout of this packed value.
* \param value The raw bytes that back the stream.
+ * \param isRaw The stream is opened raw.
+ */
+ public PackInputStream(String format, byte[] value, boolean isRaw) {
+ this(format, value, isRaw, 0, value.length);
+ }
+
+ /**
+ * Constructor.
+ *
+ * \param format A String that contains the WiredTiger format that
+ * defines the layout of this packed value.
+ * \param value The raw bytes that back the stream.
+ * \param isRaw The stream is opened raw.
* \param off Offset into the value array at which the stream begins.
* \param len Length of the value array that forms the stream.
*/
- public PackInputStream(String format, byte[] value, int off, int len) {
- this.format = new PackFormatInputStream(format);
+ public PackInputStream(
+ String format, byte[] value, boolean isRaw, int off, int len) {
+ this.format = new PackFormatInputStream(format, isRaw);
this.value = value;
this.valueOff = off;
this.valueLen = len;
+ this.isRaw = isRaw;
}
/**
@@ -117,7 +133,9 @@ public class PackInputStream {
*/
public void getByteArray(byte[] dest, int off, int len)
throws WiredTigerPackingException {
- format.checkType('U', false);
+ if (!isRaw) {
+ format.checkType('U', false);
+ }
getByteArrayInternal(getByteArrayLength(), dest, off, len);
}
@@ -127,7 +145,9 @@ public class PackInputStream {
*/
public byte[] getByteArray()
throws WiredTigerPackingException {
- format.checkType('U', false);
+ if (!isRaw) {
+ format.checkType('U', false);
+ }
int itemLen = getByteArrayLength();
byte[] unpacked = new byte[itemLen];
getByteArrayInternal(itemLen, unpacked, 0, itemLen);
@@ -142,7 +162,10 @@ public class PackInputStream {
throws WiredTigerPackingException {
int itemLen = 0;
- if (format.hasLength()) {
+ if (isRaw) {
+ // The rest of the buffer is a byte array.
+ itemLen = valueLen - valueOff;
+ } else if (format.hasLength()) {
// If the format has a length, it's always used.
itemLen = format.getLengthFromFormat(true);
} else if (format.getType() == 'U') {
diff --git a/lang/java/src/com/wiredtiger/db/PackOutputStream.java b/lang/java/src/com/wiredtiger/db/PackOutputStream.java
index 805e34f6ca8..46b3aef0974 100644
--- a/lang/java/src/com/wiredtiger/db/PackOutputStream.java
+++ b/lang/java/src/com/wiredtiger/db/PackOutputStream.java
@@ -50,7 +50,7 @@ public class PackOutputStream {
* defines the layout of this packed value.
*/
public PackOutputStream(String format) {
- this.format = new PackFormatInputStream(format);
+ this.format = new PackFormatInputStream(format, false);
intBuf = new byte[MAX_INT_BYTES];
packed = new ByteArrayOutputStream(100);
}
diff --git a/lang/java/wiredtiger.i b/lang/java/wiredtiger.i
index ce013a1939c..c04bae63cbc 100644
--- a/lang/java/wiredtiger.i
+++ b/lang/java/wiredtiger.i
@@ -80,6 +80,7 @@ typedef struct {
JavaVM *javavm; /* Used in async threads to craft a jnienv */
JNIEnv *jnienv; /* jni env that created the Session/Cursor */
WT_SESSION_IMPL *session; /* session used for alloc/free */
+ bool cursor_raw; /* is the cursor opened raw? */
jobject jobj; /* the java Session/Cursor/AsyncOp object */
jobject jcallback; /* callback object for async ops */
jfieldID cptr_fid; /* cached Cursor.swigCPtr field id in session */
@@ -576,8 +577,15 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
return $self->update($self);
}
- %javamethodmodifiers java_init "protected";
- int java_init(jobject jasyncop) {
+ %javamethodmodifiers _java_raw "protected";
+ bool _java_raw(JNIEnv *jenv) {
+ (void)jenv;
+ JAVA_CALLBACK *jcb = (JAVA_CALLBACK *)$self->c.lang_private;
+ return jcb->cursor_raw;
+ }
+
+ %javamethodmodifiers _java_init "protected";
+ int _java_init(jobject jasyncop) {
JAVA_CALLBACK *jcb =
(JAVA_CALLBACK *)$self->c.lang_private;
jcb->jobj = JCALL1(NewGlobalRef, jcb->jnienv, jasyncop);
@@ -604,7 +612,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
valueFormat = getValue_format();
keyPacker = new PackOutputStream(keyFormat);
valuePacker = new PackOutputStream(valueFormat);
- wiredtigerJNI.AsyncOp_java_init(swigCPtr, this, this);
+ wiredtigerJNI.AsyncOp__java_init(swigCPtr, this, this);
}
protected static long getCPtr($javaclassname obj) {
@@ -1090,7 +1098,8 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
throws WiredTigerPackingException {
if (keyUnpacker == null)
keyUnpacker =
- new PackInputStream(keyFormat, get_key_wrap());
+ new PackInputStream(keyFormat, get_key_wrap(),
+ _java_raw());
return keyUnpacker;
}
@@ -1103,7 +1112,8 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
throws WiredTigerPackingException {
if (valueUnpacker == null)
valueUnpacker =
- new PackInputStream(valueFormat, get_value_wrap());
+ new PackInputStream(valueFormat, get_value_wrap(),
+ _java_raw());
return valueUnpacker;
}
@@ -1175,6 +1185,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
return $self->update($self);
}
+ %javamethodmodifiers compare_wrap "protected";
int compare_wrap(JNIEnv *jenv, WT_CURSOR *other) {
int cmp, ret = $self->compare($self, other, &cmp);
if (ret != 0)
@@ -1182,6 +1193,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
return cmp;
}
+ %javamethodmodifiers equals_wrap "protected";
int equals_wrap(JNIEnv *jenv, WT_CURSOR *other) {
int cmp, ret = $self->equals($self, other, &cmp);
if (ret != 0)
@@ -1189,8 +1201,15 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
return cmp;
}
- %javamethodmodifiers java_init "protected";
- int java_init(jobject jcursor) {
+ %javamethodmodifiers _java_raw "protected";
+ bool _java_raw(JNIEnv *jenv) {
+ (void)jenv;
+ JAVA_CALLBACK *jcb = (JAVA_CALLBACK *)$self->lang_private;
+ return jcb->cursor_raw;
+ }
+
+ %javamethodmodifiers _java_init "protected";
+ int _java_init(jobject jcursor) {
JAVA_CALLBACK *jcb = (JAVA_CALLBACK *)$self->lang_private;
jcb->jobj = JCALL1(NewGlobalRef, jcb->jnienv, jcursor);
JCALL1(DeleteLocalRef, jcb->jnienv, jcursor);
@@ -1216,7 +1235,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
valueFormat = getValue_format();
keyPacker = new PackOutputStream(keyFormat);
valuePacker = new PackOutputStream(valueFormat);
- wiredtigerJNI.Cursor_java_init(swigCPtr, this, this);
+ wiredtigerJNI.Cursor__java_init(swigCPtr, this, this);
}
protected static long getCPtr($javaclassname obj) {
@@ -1773,7 +1792,8 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
if (!success || keyFormat.equals(""))
return null;
else
- return new PackInputStream(keyFormat, get_key_wrap());
+ return new PackInputStream(keyFormat,
+ get_key_wrap(), _java_raw());
}
/**
@@ -1789,7 +1809,7 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
return null;
else
return new PackInputStream(valueFormat,
- get_value_wrap());
+ get_value_wrap(), _java_raw());
}
%}
@@ -1799,20 +1819,22 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
*/
%javaexception("com.wiredtiger.db.WiredTigerException") { $action; }
%javaexception("") wiredtiger_strerror { $action; }
+%javaexception("") __wt_async_op::_java_raw { $action; }
%javaexception("") __wt_async_op::connection { $action; }
%javaexception("") __wt_async_op::get_type { $action; }
%javaexception("") __wt_async_op::get_id { $action; }
%javaexception("") __wt_async_op::key_format { $action; }
%javaexception("") __wt_async_op::value_format { $action; }
+%javaexception("") __wt_connection::_java_init { $action; }
%javaexception("") __wt_connection::get_home { $action; }
%javaexception("") __wt_connection::is_new { $action; }
-%javaexception("") __wt_connection::java_init { $action; }
+%javaexception("") __wt_cursor::_java_raw { $action; }
%javaexception("") __wt_cursor::key_format { $action; }
%javaexception("") __wt_cursor::session { $action; }
%javaexception("") __wt_cursor::uri { $action; }
%javaexception("") __wt_cursor::value_format { $action; }
+%javaexception("") __wt_session::_java_init { $action; }
%javaexception("") __wt_session::connection { $action; }
-%javaexception("") __wt_session::java_init { $action; }
/* Remove / rename parts of the C API that we don't want in Java. */
%immutable __wt_cursor::session;
@@ -1832,6 +1854,9 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
%ignore __wt_event_handler;
%ignore __wt_extractor;
%ignore __wt_connection::add_extractor;
+%ignore __wt_file_system;
+%ignore __wt_file_handle;
+%ignore __wt_connection::set_file_system;
%ignore __wt_item;
%ignore __wt_lsn;
%ignore __wt_session::msg_printf;
@@ -1890,8 +1915,8 @@ REQUIRE_WRAP(WT_ASYNC_OP::get_id, __wt_async_op::get_id,getId)
%}
%extend ctypename {
- %javamethodmodifiers java_init "protected";
- int java_init(jobject jsess) {
+ %javamethodmodifiers _java_init "protected";
+ int _java_init(jobject jsess) {
implclass *session = (implclass *)$self;
JAVA_CALLBACK *jcb = (JAVA_CALLBACK *)session->lang_private;
jcb->jobj = JCALL1(NewGlobalRef, jcb->jnienv, jsess);
@@ -1901,8 +1926,8 @@ REQUIRE_WRAP(WT_ASYNC_OP::get_id, __wt_async_op::get_id,getId)
}
%enddef
-TRACKED_CLASS(Session, __wt_session, wiredtigerJNI.Session_java_init, WT_SESSION_IMPL)
-TRACKED_CLASS(Connection, __wt_connection, wiredtigerJNI.Connection_java_init, WT_CONNECTION_IMPL)
+TRACKED_CLASS(Session, __wt_session, wiredtigerJNI.Session__java_init, WT_SESSION_IMPL)
+TRACKED_CLASS(Connection, __wt_connection, wiredtigerJNI.Connection__java_init, WT_CONNECTION_IMPL)
/* Note: Cursor incorporates the elements of TRACKED_CLASS into its
* custom constructor and %extend clause.
*/
@@ -1996,13 +2021,15 @@ err: if (ret != 0)
if ((ret = $self->open_cursor($self, uri, to_dup, config, &cursor)) != 0)
goto err;
- if ((cursor->flags & WT_CURSTD_DUMP_JSON) == 0)
- cursor->flags |= WT_CURSTD_RAW;
-
if ((ret = __wt_calloc_def((WT_SESSION_IMPL *)cursor->session,
1, &jcb)) != 0)
goto err;
+ if ((cursor->flags & WT_CURSTD_RAW) != 0)
+ jcb->cursor_raw = true;
+ if ((cursor->flags & WT_CURSTD_DUMP_JSON) == 0)
+ cursor->flags |= WT_CURSTD_RAW;
+
jcb->jnienv = jenv;
jcb->session = (WT_SESSION_IMPL *)cursor->session;
cursor->lang_private = jcb;
diff --git a/src/async/async_worker.c b/src/async/async_worker.c
index e692bc619a9..90dac557e36 100644
--- a/src/async/async_worker.c
+++ b/src/async/async_worker.c
@@ -216,9 +216,8 @@ __async_worker_execop(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op,
__wt_cursor_set_raw_value(&asyncop->c, &val);
break;
case WT_AOP_NONE:
- default:
- WT_RET_MSG(session, EINVAL, "Unknown async optype %d\n",
- op->optype);
+ WT_RET_MSG(session, EINVAL,
+ "Unknown async optype %d\n", op->optype);
}
return (0);
}
diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c
index a861a21876b..b9f0ec25d53 100644
--- a/src/block/block_ckpt.c
+++ b/src/block/block_ckpt.c
@@ -63,6 +63,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
*/
*root_addr_sizep = 0;
+#ifdef HAVE_VERBOSE
if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) {
if (addr != NULL) {
WT_ERR(__wt_scr_alloc(session, 0, &tmp));
@@ -72,6 +73,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
"%s: load-checkpoint: %s", block->name,
addr == NULL ? "[Empty]" : (const char *)tmp->data));
}
+#endif
/*
* There's a single checkpoint in the file that can be written, all of
@@ -140,12 +142,10 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
* will unnecessarily allocate buffer space.
*/
if (!checkpoint && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) {
- /*
- * The truncate might fail if there's a file mapping (if there's
- * an open checkpoint on the file), that's OK.
- */
WT_ERR(__wt_verbose(session, WT_VERB_CHECKPOINT,
"truncate file to %" PRIuMAX, (uintmax_t)ci->file_size));
+
+ /* The truncate might fail, and that's OK. */
WT_ERR_BUSY_OK(
__wt_block_truncate(session, block, ci->file_size));
}
@@ -190,10 +190,7 @@ __wt_block_checkpoint_unload(
* checkpoints.
*/
if (!checkpoint) {
- /*
- * The truncate might fail if there's a file mapping (if there's
- * an open checkpoint on the file), that's OK.
- */
+ /* The truncate might fail, and that's OK. */
WT_TRET_BUSY_OK(
__wt_block_truncate(session, block, block->size));
@@ -512,6 +509,7 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
!F_ISSET(ckpt, WT_CKPT_DELETE))
continue;
+#ifdef HAVE_VERBOSE
if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) {
if (tmp == NULL)
WT_ERR(__wt_scr_alloc(session, 0, &tmp));
@@ -521,7 +519,7 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
"%s: delete-checkpoint: %s: %s",
block->name, ckpt->name, (const char *)tmp->data));
}
-
+#endif
/*
* Find the checkpoint into which we'll roll this checkpoint's
* blocks: it's the next real checkpoint in the list, and it
diff --git a/src/block/block_compact.c b/src/block/block_compact.c
index 24ca6632311..02862ea842f 100644
--- a/src/block/block_compact.c
+++ b/src/block/block_compact.c
@@ -39,12 +39,14 @@ __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
/* Restore the original allocation plan. */
__wt_block_configure_first_fit(block, false);
+#ifdef HAVE_VERBOSE
/* Dump the results of the compaction pass. */
if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) {
__wt_spin_lock(session, &block->live_lock);
ret = __block_dump_avail(session, block, false);
__wt_spin_unlock(session, &block->live_lock);
}
+#endif
return (ret);
}
@@ -188,6 +190,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session,
}
__wt_spin_unlock(session, &block->live_lock);
+#ifdef HAVE_VERBOSE
if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) {
++block->compact_pages_reviewed;
if (*skipp)
@@ -195,6 +198,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session,
else
++block->compact_pages_written;
}
+#endif
return (ret);
}
diff --git a/src/block/block_ext.c b/src/block/block_ext.c
index caafcc77c48..0d3e7b54f17 100644
--- a/src/block/block_ext.c
+++ b/src/block/block_ext.c
@@ -24,7 +24,7 @@ static int __block_append(WT_SESSION_IMPL *,
static int __block_ext_overlap(WT_SESSION_IMPL *,
WT_BLOCK *, WT_EXTLIST *, WT_EXT **, WT_EXTLIST *, WT_EXT **);
static int __block_extlist_dump(
- WT_SESSION_IMPL *, const char *, WT_EXTLIST *, bool);
+ WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, const char *);
static int __block_merge(WT_SESSION_IMPL *,
WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
@@ -1227,8 +1227,7 @@ corrupted: __wt_scr_free(session, &tmp);
WT_ERR(func(session, block, el, off, size));
}
- if (WT_VERBOSE_ISSET(session, WT_VERB_BLOCK))
- WT_ERR(__block_extlist_dump(session, "read extlist", el, 0));
+ WT_ERR(__block_extlist_dump(session, block, el, "read"));
err: __wt_scr_free(session, &tmp);
return (ret);
@@ -1250,8 +1249,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session,
uint32_t entries;
uint8_t *p;
- if (WT_VERBOSE_ISSET(session, WT_VERB_BLOCK))
- WT_RET(__block_extlist_dump(session, "write extlist", el, 0));
+ WT_RET(__block_extlist_dump(session, block, el, "write"));
/*
* Figure out how many entries we're writing -- if there aren't any
@@ -1362,9 +1360,8 @@ __wt_block_extlist_truncate(
block->size = size;
/*
- * Truncate the file. The truncate might fail if there's a file mapping
- * (if there's an open checkpoint on the file), that's OK, we'll ignore
- * those blocks.
+ * Truncate the file. The truncate might fail, and that's OK, we simply
+ * ignore those blocks.
*/
WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
"truncate file from %" PRIdMAX " to %" PRIdMAX,
@@ -1428,38 +1425,62 @@ __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el)
*/
static int
__block_extlist_dump(
- WT_SESSION_IMPL *session, const char *tag, WT_EXTLIST *el, bool show_size)
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, const char *tag)
{
+ WT_DECL_ITEM(t1);
+ WT_DECL_ITEM(t2);
+ WT_DECL_RET;
WT_EXT *ext;
- WT_SIZE *szp;
+ uint64_t pow, sizes[64];
+ u_int i;
+ const char *sep;
- WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
- "%s: %s: %" PRIu64 " bytes, by offset:%s",
- tag, el->name, el->bytes, el->entries == 0 ? " [Empty]" : ""));
- if (el->entries == 0)
+ if (!block->verify_layout && !WT_VERBOSE_ISSET(session, WT_VERB_BLOCK))
return (0);
- WT_EXT_FOREACH(ext, el->off)
- WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
- "\t{%" PRIuMAX "/%" PRIuMAX "}",
- (uintmax_t)ext->off, (uintmax_t)ext->size));
+ WT_ERR(__wt_scr_alloc(session, 0, &t1));
+ if (block->verify_layout)
+ WT_ERR(__wt_msg(session,
+ "%s extent list %s, %" PRIu32 " entries, %s bytes",
+ tag, el->name, el->entries,
+ __wt_buf_set_size(session, el->bytes, true, t1)));
+ else
+ WT_ERR(__wt_verbose(session, WT_VERB_BLOCK,
+ "%s extent list %s, %" PRIu32 " entries, %s bytes",
+ tag, el->name, el->entries,
+ __wt_buf_set_size(session, el->bytes, true, t1)));
- if (!show_size)
- return (0);
+ if (ret != 0 || el->entries == 0)
+ goto done;
- WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
- "%s: %s: by size:%s",
- tag, el->name, el->entries == 0 ? " [Empty]" : ""));
- if (el->entries == 0)
- return (0);
+ memset(sizes, 0, sizeof(sizes));
+ WT_EXT_FOREACH(ext, el->off)
+ for (i = 9, pow = 512;; ++i, pow *= 2)
+ if (ext->size <= (wt_off_t)pow) {
+ ++sizes[i];
+ break;
+ }
+ sep = "extents by bucket:";
+ t1->size = 0;
+ WT_ERR(__wt_scr_alloc(session, 0, &t2));
+ for (i = 9, pow = 512; i < WT_ELEMENTS(sizes); ++i, pow *= 2)
+ if (sizes[i] != 0) {
+ WT_ERR(__wt_buf_catfmt(session, t1,
+ "%s {%s: %" PRIu64 "}",
+ sep,
+ __wt_buf_set_size(session, pow, false, t2),
+ sizes[i]));
+ sep = ",";
+ }
- WT_EXT_FOREACH(szp, el->sz) {
- WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
- "\t{%" PRIuMAX "}", (uintmax_t)szp->size));
- WT_EXT_FOREACH_OFF(ext, szp->off)
- WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
- "\t\t{%" PRIuMAX "/%" PRIuMAX "}",
- (uintmax_t)ext->off, (uintmax_t)ext->size));
- }
- return (0);
+ if (block->verify_layout)
+ WT_ERR(__wt_msg(session, "%s", (char *)t1->data));
+ else
+ WT_ERR(__wt_verbose(
+ session, WT_VERB_BLOCK, "%s", (char *)t1->data));
+
+done: err:
+ __wt_scr_free(session, &t1);
+ __wt_scr_free(session, &t2);
+ return (ret);
}
diff --git a/src/block/block_map.c b/src/block/block_map.c
index b16fe7f8423..d2c70fb4c49 100644
--- a/src/block/block_map.c
+++ b/src/block/block_map.c
@@ -13,24 +13,16 @@
* Map a segment of the file in, if possible.
*/
int
-__wt_block_map(
- WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp,
- void **mappingcookie)
+__wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ void *mapped_regionp, size_t *lengthp, void *mapped_cookiep)
{
WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
- *(void **)mapp = NULL;
- *maplenp = 0;
+ *(void **)mapped_regionp = NULL;
+ *lengthp = 0;
+ *(void **)mapped_cookiep = NULL;
-#ifdef WORDS_BIGENDIAN
- /*
- * The underlying objects are little-endian, mapping objects isn't
- * currently supported on big-endian systems.
- */
- WT_UNUSED(session);
- WT_UNUSED(block);
- WT_UNUSED(mappingcookie);
-#else
/* Map support is configurable. */
if (!S2C(session)->mmap)
return (0);
@@ -51,15 +43,23 @@ __wt_block_map(
return (0);
/*
+ * There may be no underlying functionality.
+ */
+ handle = block->fh->handle;
+ if (handle->fh_map == NULL)
+ return (0);
+
+ /*
* Map the file into memory.
* Ignore not-supported errors, we'll read the file through the cache
* if map fails.
*/
- ret = block->fh->fh_map(
- session, block->fh, mapp, maplenp, mappingcookie);
- if (ret == ENOTSUP)
+ ret = handle->fh_map(handle,
+ (WT_SESSION *)session, mapped_regionp, lengthp, mapped_cookiep);
+ if (ret == ENOTSUP) {
+ *(void **)mapped_regionp = NULL;
ret = 0;
-#endif
+ }
return (ret);
}
@@ -69,11 +69,13 @@ __wt_block_map(
* Unmap any mapped-in segment of the file.
*/
int
-__wt_block_unmap(
- WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen,
- void **mappingcookie)
+__wt_block_unmap(WT_SESSION_IMPL *session,
+ WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie)
{
+ WT_FILE_HANDLE *handle;
+
/* Unmap the file from memory. */
- return (block->fh->fh_map_unmap(
- session, block->fh, map, maplen, mappingcookie));
+ handle = block->fh->handle;
+ return (handle->fh_unmap(handle,
+ (WT_SESSION *)session, mapped_region, length, mapped_cookie));
}
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 06150a0f062..971fe713f83 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -103,7 +103,7 @@ __bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session,
* of being read into cache buffers.
*/
WT_RET(__wt_block_map(session,
- bm->block, &bm->map, &bm->maplen, &bm->mappingcookie));
+ bm->block, &bm->map, &bm->maplen, &bm->mapped_cookie));
/*
* If this handle is for a checkpoint, that is, read-only, there
@@ -149,7 +149,7 @@ __bm_checkpoint_unload(WT_BM *bm, WT_SESSION_IMPL *session)
/* Unmap any mapped segment. */
if (bm->map != NULL)
WT_TRET(__wt_block_unmap(session,
- bm->block, bm->map, bm->maplen, &bm->mappingcookie));
+ bm->block, bm->map, bm->maplen, &bm->mapped_cookie));
/* Unload the checkpoint. */
WT_TRET(__wt_block_checkpoint_unload(session, bm->block, !bm->is_live));
@@ -302,6 +302,20 @@ __bm_is_mapped(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
+ * __bm_map_discard --
+ * Discard a mapped segment.
+ */
+static int
+__bm_map_discard(WT_BM *bm, WT_SESSION_IMPL *session, void *map, size_t len)
+{
+ WT_FILE_HANDLE *handle;
+
+ handle = bm->block->fh->handle;
+ return (handle->fh_map_discard(
+ handle, (WT_SESSION *)session, map, len, bm->mapped_cookie));
+}
+
+/*
* __bm_salvage_end --
* End a block manager salvage.
*/
@@ -413,19 +427,7 @@ __bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats)
static int
__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool block)
{
- WT_DECL_RET;
-
- if (!block && !bm->block->nowait_sync_available)
- return (0);
-
- if ((ret = __wt_fsync(session, bm->block->fh, block)) == 0)
- return (0);
-
- /* Ignore ENOTSUP, but don't try again. */
- if (ret != ENOTSUP)
- return (ret);
- bm->block->nowait_sync_available = false;
- return (0);
+ return (__wt_fsync(session, bm->block->fh, block));
}
/*
@@ -544,6 +546,7 @@ __bm_method_set(WT_BM *bm, bool readonly)
bm->compact_start = __bm_compact_start;
bm->free = __bm_free;
bm->is_mapped = __bm_is_mapped;
+ bm->map_discard = __bm_map_discard;
bm->preload = __wt_bm_preload;
bm->read = __wt_bm_read;
bm->salvage_end = __bm_salvage_end;
diff --git a/src/block/block_open.c b/src/block/block_open.c
index f4da5ca7c05..1603b1574e7 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -33,7 +33,6 @@ __wt_block_manager_create(
WT_FH *fh;
int suffix;
bool exists;
- char *path;
/*
* Create the underlying file and open a handle.
@@ -44,7 +43,7 @@ __wt_block_manager_create(
* in our space. Move any existing files out of the way and complain.
*/
for (;;) {
- if ((ret = __wt_open(session, filename, WT_FILE_TYPE_DATA,
+ if ((ret = __wt_open(session, filename, WT_OPEN_FILE_TYPE_DATA,
WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)) == 0)
break;
WT_ERR_TEST(ret != EEXIST, ret);
@@ -54,13 +53,13 @@ __wt_block_manager_create(
for (suffix = 1;; ++suffix) {
WT_ERR(__wt_buf_fmt(
session, tmp, "%s.%d", filename, suffix));
- WT_ERR(__wt_exist(session, tmp->data, &exists));
+ WT_ERR(__wt_fs_exist(session, tmp->data, &exists));
if (!exists) {
- WT_ERR(
- __wt_rename(session, filename, tmp->data));
+ WT_ERR(__wt_fs_rename(
+ session, filename, tmp->data));
WT_ERR(__wt_msg(session,
"unexpected file %s found, renamed to %s",
- filename, (char *)tmp->data));
+ filename, (const char *)tmp->data));
break;
}
}
@@ -82,14 +81,12 @@ __wt_block_manager_create(
* Some filesystems require that we sync the directory to be confident
* that the file will appear.
*/
- if (ret == 0 && (ret = __wt_filename(session, filename, &path)) == 0) {
- ret = __wt_directory_sync(session, path);
- __wt_free(session, path);
- }
+ if (ret == 0)
+ WT_TRET(__wt_fs_directory_sync(session, filename));
/* Undo any create on error. */
if (ret != 0)
- WT_TRET(__wt_remove(session, filename));
+ WT_TRET(__wt_fs_remove(session, filename));
err: __wt_scr_free(session, &tmp);
@@ -156,8 +153,7 @@ __wt_block_open(WT_SESSION_IMPL *session,
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
uint64_t bucket, hash;
-
- WT_UNUSED(readonly);
+ uint32_t flags;
WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename));
@@ -204,12 +200,18 @@ __wt_block_open(WT_SESSION_IMPL *session,
/* Set the file extension information. */
block->extend_len = conn->data_extend_len;
- /* Set the asynchronous flush, preload availability. */
- block->nowait_sync_available = true;
- block->preload_available = true;
-
- /* Open the underlying file handle. */
- WT_ERR(__wt_open(session, filename, WT_FILE_TYPE_DATA, 0, &block->fh));
+ /*
+ * Open the underlying file handle.
+ *
+ * "direct_io=checkpoint" configures direct I/O for readonly data files.
+ */
+ flags = 0;
+ if (readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT))
+ LF_SET(WT_OPEN_DIRECTIO);
+ if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA))
+ LF_SET(WT_OPEN_DIRECTIO);
+ WT_ERR(__wt_open(
+ session, filename, WT_OPEN_FILE_TYPE_DATA, flags, &block->fh));
/* Set the file's size. */
WT_ERR(__wt_filesize(session, block->fh, &block->size));
@@ -422,5 +424,5 @@ int
__wt_block_manager_named_size(
WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
{
- return (__wt_filesize_name(session, name, false, sizep));
+ return (__wt_fs_size(session, name, sizep));
}
diff --git a/src/block/block_read.c b/src/block/block_read.c
index 6f0c41c1b5c..97157e4a0f1 100644
--- a/src/block/block_read.c
+++ b/src/block/block_read.c
@@ -19,44 +19,32 @@ __wt_bm_preload(
WT_BLOCK *block;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
wt_off_t offset;
uint32_t cksum, size;
bool mapped;
WT_UNUSED(addr_size);
+
block = bm->block;
WT_STAT_FAST_CONN_INCR(session, block_preload);
- /* Preload the block. */
- if (block->preload_available) {
- /* Crack the cookie. */
- WT_RET(__wt_block_buffer_to_addr(
- block, addr, &offset, &size, &cksum));
-
- mapped = bm->map != NULL &&
- offset + size <= (wt_off_t)bm->maplen;
- if (mapped)
- ret = block->fh->fh_map_preload(session,
- block->fh, (uint8_t *)bm->map + offset, size);
- else
- ret = block->fh->fh_advise(session,
- block->fh, (wt_off_t)offset,
- (wt_off_t)size, POSIX_FADV_WILLNEED);
- if (ret == 0)
- return (0);
-
- /* Ignore ENOTSUP, but don't try again. */
- if (ret != ENOTSUP)
- return (ret);
- block->preload_available = false;
- }
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));
- /*
- * If preload isn't supported, do it the slow way; don't call the
- * underlying read routine directly, we don't know for certain if
- * this is a mapped range.
- */
+ handle = block->fh->handle;
+ mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
+ if (mapped && handle->fh_map_preload != NULL)
+ ret = handle->fh_map_preload(handle, (WT_SESSION *)session,
+ (uint8_t *)bm->map + offset, size, bm->mapped_cookie);
+ if (!mapped && handle->fh_advise != NULL)
+ ret = handle->fh_advise(handle, (WT_SESSION *)session,
+ (wt_off_t)offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED);
+ if (ret != EBUSY && ret != ENOTSUP)
+ return (ret);
+
+ /* If preload isn't supported, do it the slow way. */
WT_RET(__wt_scr_alloc(session, 0, &tmp));
ret = __wt_bm_read(bm, session, tmp, addr, addr_size);
__wt_scr_free(session, &tmp);
@@ -74,6 +62,7 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
{
WT_BLOCK *block;
WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
wt_off_t offset;
uint32_t cksum, size;
bool mapped;
@@ -87,23 +76,17 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
/*
* Map the block if it's possible.
*/
+ handle = block->fh->handle;
mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
- if (mapped) {
+ if (mapped && handle->fh_map_preload != NULL) {
buf->data = (uint8_t *)bm->map + offset;
buf->size = size;
- if (block->preload_available) {
- ret = block->fh->fh_map_preload(
- session, block->fh, buf->data, buf->size);
-
- /* Ignore ENOTSUP, but don't try again. */
- if (ret != ENOTSUP)
- return (ret);
- block->preload_available = false;
- }
+ ret = handle->fh_map_preload(handle, (WT_SESSION *)session,
+ buf->data, buf->size,bm->mapped_cookie);
WT_STAT_FAST_CONN_INCR(session, block_map_read);
WT_STAT_FAST_CONN_INCRV(session, block_byte_map_read, size);
- return (0);
+ return (ret);
}
#ifdef HAVE_DIAGNOSTIC
diff --git a/src/block/block_vrfy.c b/src/block/block_vrfy.c
index 6570184ca10..af58864b9dc 100644
--- a/src/block/block_vrfy.c
+++ b/src/block/block_vrfy.c
@@ -15,13 +15,15 @@ static int __verify_filefrag_add(
WT_SESSION_IMPL *, WT_BLOCK *, const char *, wt_off_t, wt_off_t, bool);
static int __verify_filefrag_chk(WT_SESSION_IMPL *, WT_BLOCK *);
static int __verify_last_avail(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
-static int __verify_last_truncate(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
+static int __verify_set_file_size(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
/* The bit list ignores the first block: convert to/from a frag/offset. */
#define WT_wt_off_TO_FRAG(block, off) \
((off) / (block)->allocsize - 1)
+#ifdef HAVE_VERBOSE
#define WT_FRAG_TO_OFF(block, frag) \
(((wt_off_t)(frag + 1)) * (block)->allocsize)
+#endif
/*
* __wt_block_verify_start --
@@ -35,6 +37,14 @@ __wt_block_verify_start(WT_SESSION_IMPL *session,
WT_CONFIG_ITEM cval;
wt_off_t size;
+ /* Configuration: strict behavior on any error. */
+ WT_RET(__wt_config_gets(session, cfg, "strict", &cval));
+ block->verify_strict = cval.val != 0;
+
+ /* Configuration: dump the file's layout. */
+ WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval));
+ block->verify_layout = cval.val != 0;
+
/*
* Find the last checkpoint in the list: if there are none, or the only
* checkpoint we have is fake, there's no work to do. Don't complain,
@@ -49,8 +59,8 @@ __wt_block_verify_start(WT_SESSION_IMPL *session,
return (0);
}
- /* Truncate the file to the size of the last checkpoint. */
- WT_RET(__verify_last_truncate(session, block, ckpt));
+ /* Set the size of the file to the size of the last checkpoint. */
+ WT_RET(__verify_set_file_size(session, block, ckpt));
/*
* We're done if the file has no data pages (this happens if we verify
@@ -105,9 +115,6 @@ __wt_block_verify_start(WT_SESSION_IMPL *session,
*/
WT_RET(__verify_last_avail(session, block, ckpt));
- /* Configuration: strict behavior on any error. */
- WT_RET(__wt_config_gets(session, cfg, "strict", &cval));
- block->verify_strict = cval.val != 0;
return (0);
}
@@ -144,21 +151,40 @@ err: __wt_block_ckpt_destroy(session, ci);
}
/*
- * __verify_last_truncate --
- * Truncate the file to the last checkpoint's size.
+ * __verify_set_file_size --
+ * Set the file size to the last checkpoint's size.
*/
static int
-__verify_last_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt)
+__verify_set_file_size(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt)
{
WT_BLOCK_CKPT *ci, _ci;
WT_DECL_RET;
+ WT_DECL_ITEM(tmp);
ci = &_ci;
WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name));
WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
- WT_ERR_BUSY_OK(__wt_block_truncate(session, block, ci->file_size));
+
+ if (block->verify_layout) {
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_msg(session, "%s: physical size %s", block->name,
+ __wt_buf_set_size(
+ session, (uint64_t)block->size, true, tmp)));
+ WT_ERR(
+ __wt_msg(session, "%s: correcting to %s checkpoint size %s",
+ block->name, ckpt->name, __wt_buf_set_size(
+ session, (uint64_t)ci->file_size, true, tmp)));
+ }
+
+ /*
+ * Verify is read-only. Set the block's file size information as if we
+ * truncated the file during checkpoint load, so references to blocks
+ * after last checkpoint's file size fail.
+ */
+ block->size = block->extend_size = ci->file_size;
err: __wt_block_ckpt_destroy(session, ci);
+ __wt_scr_free(session, &tmp);
return (ret);
}
@@ -247,9 +273,9 @@ __wt_verify_ckpt_load(
}
/*
- * We don't need to list of blocks on a checkpoint's avail list, but we
- * read it to ensure it wasn't corrupted. We could confirm correctness
- * of intermediate avail lists (that is, if they're logically the result
+ * We don't need the blocks on a checkpoint's avail list, but we read it
+ * to ensure it wasn't corrupted. We could confirm correctness of the
+ * intermediate avail lists (that is, if they're logically the result
* of the allocations and discards to this point). We don't because the
* only avail list ever used is the one for the last checkpoint, which
* is separately verified by checking it against all of the blocks found
@@ -437,6 +463,7 @@ __verify_filefrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
__bit_set(block->fragfile, last);
}
+#ifdef HAVE_VERBOSE
if (!WT_VERBOSE_ISSET(session, WT_VERB_VERIFY))
continue;
@@ -444,6 +471,7 @@ __verify_filefrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
"file range %" PRIuMAX "-%" PRIuMAX " never verified",
(uintmax_t)WT_FRAG_TO_OFF(block, first),
(uintmax_t)WT_FRAG_TO_OFF(block, last));
+#endif
}
if (count == 0)
return (0);
@@ -528,6 +556,7 @@ __verify_ckptfrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
__bit_clear(block->fragckpt, last);
}
+#ifdef HAVE_VERBOSE
if (!WT_VERBOSE_ISSET(session, WT_VERB_VERIFY))
continue;
@@ -535,6 +564,7 @@ __verify_ckptfrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
"checkpoint range %" PRIuMAX "-%" PRIuMAX " never verified",
(uintmax_t)WT_FRAG_TO_OFF(block, first),
(uintmax_t)WT_FRAG_TO_OFF(block, last));
+#endif
}
if (count == 0)
diff --git a/src/block/block_write.c b/src/block/block_write.c
index 134272b52f9..1fefeee09da 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -15,6 +15,24 @@
int
__wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
{
+ /*
+ * Backups are done by copying files outside of WiredTiger, potentially
+ * by system utilities. We cannot truncate the file during the backup
+ * window, we might surprise an application.
+ *
+ * Stop block truncation. This affects files that aren't involved in the
+ * backup (for example, doing incremental backups, which only copies log
+ * files, or targeted backups, stops all truncation). We may want a more
+ * targeted solution at some point.
+ */
+ if (S2C(session)->hot_backup)
+ return (EBUSY);
+
+ /*
+ * Additionally, the truncate might fail if there's a file mapping (if
+ * there's an open checkpoint on the file), in which case the underlying
+ * function returns EBUSY.
+ */
WT_RET(__wt_ftruncate(session, block->fh, len));
block->size = block->extend_size = len;
@@ -30,27 +48,28 @@ int
__wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size)
{
WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+ /* The file may not support this call. */
+ handle = block->fh->handle;
+ if (handle->fh_advise == NULL)
+ return (0);
+
+ /* The call may not be configured. */
if (block->os_cache_max == 0)
return (0);
/*
* We're racing on the addition, but I'm not willing to serialize on it
- * in the standard read path with more evidence it's needed.
+ * in the standard read path without evidence it's needed.
*/
if ((block->os_cache += added_size) <= block->os_cache_max)
return (0);
block->os_cache = 0;
- WT_ERR(block->fh->fh_advise(session,
- block->fh, (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED));
- return (0);
-
-err: /* Ignore ENOTSUP, but don't try again. */
- if (ret != ENOTSUP)
- return (ret);
- block->os_cache_max = 0;
- return (0);
+ ret = handle->fh_advise(handle, (WT_SESSION *)session,
+ (wt_off_t)0, (wt_off_t)0, WT_FILE_HANDLE_DONTNEED);
+ return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
}
/*
@@ -62,6 +81,7 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_FH *fh, wt_off_t offset, size_t align_size, bool *release_lockp)
{
WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
bool locked;
/*
@@ -107,7 +127,9 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
* based on the filesystem type, fall back to ftruncate in that case,
* and remember that ftruncate requires locking.
*/
- if (fh->fallocate_available != WT_FALLOCATE_NOT_AVAILABLE) {
+ handle = fh->handle;
+ if (handle->fh_allocate != NULL ||
+ handle->fh_allocate_nolock != NULL) {
/*
* Release any locally acquired lock if not needed to extend the
* file, extending the file may require updating on-disk file's
@@ -115,7 +137,7 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
* configure for file extension on systems that require locking
* over the extend call.)
*/
- if (!fh->fallocate_requires_locking && *release_lockp) {
+ if (handle->fh_allocate_nolock != NULL && *release_lockp) {
*release_lockp = locked = false;
__wt_spin_unlock(session, &block->live_lock);
}
@@ -131,8 +153,7 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
if ((ret = __wt_fallocate(
session, fh, block->size, block->extend_len * 2)) == 0)
return (0);
- if (ret != ENOTSUP)
- return (ret);
+ WT_RET_ERROR_OK(ret, ENOTSUP);
}
/*
@@ -155,9 +176,8 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
* The truncate might fail if there's a mapped file (in other words, if
* there's an open checkpoint on the file), that's OK.
*/
- if ((ret = __wt_ftruncate(session, fh, block->extend_size)) == EBUSY)
- ret = 0;
- return (ret);
+ WT_RET_BUSY_OK(__wt_ftruncate(session, fh, block->extend_size));
+ return (0);
}
/*
diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c
index 505630f12cf..e32544d5521 100644
--- a/src/bloom/bloom.c
+++ b/src/bloom/bloom.c
@@ -295,7 +295,7 @@ __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash)
err: /* Don't return WT_NOTFOUND from a failed search. */
if (ret == WT_NOTFOUND)
ret = WT_ERROR;
- __wt_err(bloom->session, ret, "Failed lookup in bloom filter.");
+ __wt_err(bloom->session, ret, "Failed lookup in bloom filter");
return (ret);
}
diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c
index 63b2e2abebc..70b3ba56e31 100644
--- a/src/btree/bt_curnext.c
+++ b/src/btree/bt_curnext.c
@@ -86,10 +86,10 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_fix_last_recno(page);
+ cbt->last_standard_recno = __col_fix_last_recno(cbt->ref);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
- __cursor_set_recno(cbt, page->pg_fix_recno);
+ __cursor_set_recno(cbt, cbt->ref->ref_recno);
goto new_page;
}
@@ -107,7 +107,7 @@ new_page:
cbt->ins = NULL;
upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd);
if (upd == NULL) {
- cbt->v = __bit_getv_recno(page, cbt->recno, btree->bitcnt);
+ cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
val->data = &cbt->v;
} else
val->data = WT_UPDATE_DATA(upd);
@@ -179,10 +179,10 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_var_last_recno(page);
+ cbt->last_standard_recno = __col_var_last_recno(cbt->ref);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
- __cursor_set_recno(cbt, page->pg_var_recno);
+ __cursor_set_recno(cbt, cbt->ref->ref_recno);
goto new_page;
}
@@ -194,7 +194,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage)
new_page: /* Find the matching WT_COL slot. */
if ((cip =
- __col_var_search(page, cbt->recno, &rle_start)) == NULL)
+ __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
return (WT_NOTFOUND);
cbt->slot = WT_COL_SLOT(page, cip);
@@ -558,7 +558,8 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt)
* page.
*/
cbt->last_standard_recno = page->type == WT_PAGE_COL_VAR ?
- __col_var_last_recno(page) : __col_fix_last_recno(page);
+ __col_var_last_recno(cbt->ref) :
+ __col_fix_last_recno(cbt->ref);
/* If we're traversing the append list, set the reference. */
if (cbt->ins_head != NULL &&
diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c
index 7475c0f1312..872f648446c 100644
--- a/src/btree/bt_curprev.c
+++ b/src/btree/bt_curprev.c
@@ -128,12 +128,10 @@ static inline int
__cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage)
{
WT_ITEM *val;
- WT_PAGE *page;
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
session = (WT_SESSION_IMPL *)cbt->iface.session;
- page = cbt->ref->page;
val = &cbt->iface.value;
if (newpage) {
@@ -176,8 +174,8 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage)
* to a record number matching the first record on the page.
*/
if (cbt->ins == NULL &&
- (cbt->recno == page->pg_fix_recno ||
- __col_fix_last_recno(page) != 0))
+ (cbt->recno == cbt->ref->ref_recno ||
+ __col_fix_last_recno(cbt->ref) != 0))
return (WT_NOTFOUND);
}
@@ -234,7 +232,7 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_fix_last_recno(page);
+ cbt->last_standard_recno = __col_fix_last_recno(cbt->ref);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->last_standard_recno);
@@ -242,7 +240,7 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage)
}
/* Move to the previous entry and return the item. */
- if (cbt->recno == page->pg_fix_recno)
+ if (cbt->recno == cbt->ref->ref_recno)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->recno - 1);
@@ -255,7 +253,7 @@ new_page:
cbt->ins = NULL;
upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd);
if (upd == NULL) {
- cbt->v = __bit_getv_recno(page, cbt->recno, btree->bitcnt);
+ cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
val->data = &cbt->v;
} else
val->data = WT_UPDATE_DATA(upd);
@@ -327,7 +325,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_var_last_recno(page);
+ cbt->last_standard_recno = __col_var_last_recno(cbt->ref);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->last_standard_recno);
@@ -338,12 +336,12 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage)
for (;;) {
__cursor_set_recno(cbt, cbt->recno - 1);
-new_page: if (cbt->recno < page->pg_var_recno)
+new_page: if (cbt->recno < cbt->ref->ref_recno)
return (WT_NOTFOUND);
/* Find the matching WT_COL slot. */
if ((cip =
- __col_var_search(page, cbt->recno, &rle_start)) == NULL)
+ __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
return (WT_NOTFOUND);
cbt->slot = WT_COL_SLOT(page, cip);
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index 018eb6ed73a..9a57759570a 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -164,12 +164,12 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* column-store pages don't have slots, but map one-to-one to
* keys, check for retrieval past the end of the page.
*/
- if (cbt->recno >= page->pg_fix_recno + page->pg_fix_entries)
+ if (cbt->recno >= cbt->ref->ref_recno + page->pg_fix_entries)
return (false);
/*
- * Updates aren't stored on the page, an update would have
- * appeared as an "insert" object; no further checks to do.
+ * An update would have appeared as an "insert" object; no
+ * further checks to do.
*/
break;
case BTREE_COL_VAR:
@@ -179,19 +179,18 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
WT_ASSERT(session, cbt->slot < page->pg_var_entries);
/*
- * Column-store updates aren't stored on the page, instead they
- * are stored as "insert" objects. If search returned an insert
- * object we can't return, the returned on-page object must be
- * checked for a match.
+ * Column-store updates are stored as "insert" objects. If
+ * search returned an insert object we can't return, the
+ * returned on-page object must be checked for a match.
*/
if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH))
return (false);
/*
- * Updates aren't stored on the page, an update would have
- * appeared as an "insert" object; however, variable-length
- * column store deletes are written into the backing store,
- * check the cell for a record already deleted when read.
+ * Although updates would have appeared as an "insert" objects,
+ * variable-length column store deletes are written into the
+ * backing store; check the cell for a record already deleted
+ * when read.
*/
cip = &page->pg_var_d[cbt->slot];
if ((cell = WT_COL_PTR(page, cip)) == NULL ||
@@ -211,9 +210,11 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
if (cbt->ins != NULL)
return (false);
- /* Updates are stored on the page, check for a delete. */
- if (page->pg_row_upd != NULL && (upd = __wt_txn_read(
- session, page->pg_row_upd[cbt->slot])) != NULL) {
+ /* Check for an update. */
+ if (page->modify != NULL &&
+ page->modify->mod_row_update != NULL &&
+ (upd = __wt_txn_read(session,
+ page->modify->mod_row_update[cbt->slot])) != NULL) {
if (WT_UPDATE_DELETED_ISSET(upd))
return (false);
if (updp != NULL)
@@ -558,7 +559,6 @@ retry: WT_RET(__cursor_func_init(cbt, true));
ret = __cursor_row_modify(session, cbt, false);
break;
- WT_ILLEGAL_VALUE_ERR(session);
}
err: if (ret == WT_RESTART) {
@@ -596,9 +596,12 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt)
return (0);
if (cbt->ins != NULL)
return (__wt_txn_update_check(session, cbt->ins->upd));
- if (btree->type == BTREE_ROW && cbt->ref->page->pg_row_upd != NULL)
- return (__wt_txn_update_check(
- session, cbt->ref->page->pg_row_upd[cbt->slot]));
+
+ if (btree->type == BTREE_ROW &&
+ cbt->ref->page->modify != NULL &&
+ cbt->ref->page->modify->mod_row_update != NULL)
+ return (__wt_txn_update_check(session,
+ cbt->ref->page->modify->mod_row_update[cbt->slot]));
return (0);
}
@@ -636,7 +639,8 @@ retry: WT_RET(__cursor_func_init(cbt, true));
break;
case BTREE_COL_FIX:
case BTREE_COL_VAR:
- WT_ILLEGAL_VALUE_ERR(session);
+ WT_ERR(__wt_illegal_value(session, NULL));
+ break;
}
err: if (ret == WT_RESTART) {
@@ -714,7 +718,6 @@ retry: WT_RET(__cursor_func_init(cbt, true));
ret = __cursor_row_modify(session, cbt, true);
break;
- WT_ILLEGAL_VALUE_ERR(session);
}
err: if (ret == WT_RESTART) {
@@ -805,7 +808,6 @@ retry: WT_RET(__cursor_func_init(cbt, true));
}
ret = __cursor_row_modify(session, cbt, false);
break;
- WT_ILLEGAL_VALUE_ERR(session);
}
err: if (ret == WT_RESTART) {
@@ -972,7 +974,6 @@ __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp)
WT_RET(__wt_compare(
session, a_arg->btree->collator, &a->key, &b->key, cmpp));
break;
- WT_ILLEGAL_VALUE(session);
}
return (0);
}
@@ -1023,6 +1024,7 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp)
a = (WT_CURSOR *)a_arg;
b = (WT_CURSOR *)b_arg;
+ cmp = 0;
session = (WT_SESSION_IMPL *)a->session;
/* Confirm both cursors reference the same object. */
@@ -1110,7 +1112,7 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session,
int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool))
{
WT_DECL_RET;
- uint8_t *value;
+ const uint8_t *value;
/*
* Handle fixed-length column-store objects separately: for row-store
@@ -1139,7 +1141,7 @@ retry: WT_RET(__wt_btcur_remove(start));
if ((ret = __wt_btcur_next(start, true)) != 0)
break;
start->compare = 0; /* Exact match */
- value = (uint8_t *)start->iface.value.data;
+ value = (const uint8_t *)start->iface.value.data;
if (*value != 0 &&
(ret = rmfunc(session, start, 1)) != 0)
break;
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c
index 8ce1463a0db..b1579d25dc6 100644
--- a/src/btree/bt_debug.c
+++ b/src/btree/bt_debug.c
@@ -19,7 +19,7 @@ typedef struct {
* When using the standard event handlers, the debugging output has to
* do its own message handling because its output isn't line-oriented.
*/
- WT_FH *fh; /* Output file stream */
+ FILE *fp;
WT_ITEM *msg; /* Buffered message */
WT_ITEM *tmp; /* Temporary space */
@@ -36,17 +36,17 @@ static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *);
static int __debug_dsk_cell(WT_DBG *, const WT_PAGE_HEADER *);
static void __debug_dsk_col_fix(WT_DBG *, const WT_PAGE_HEADER *);
static void __debug_item(WT_DBG *, const char *, const void *, size_t);
-static int __debug_page(WT_DBG *, WT_PAGE *, uint32_t);
-static void __debug_page_col_fix(WT_DBG *, WT_PAGE *);
+static int __debug_page(WT_DBG *, WT_REF *, uint32_t);
+static void __debug_page_col_fix(WT_DBG *, WT_REF *);
static int __debug_page_col_int(WT_DBG *, WT_PAGE *, uint32_t);
-static int __debug_page_col_var(WT_DBG *, WT_PAGE *);
-static int __debug_page_metadata(WT_DBG *, WT_PAGE *);
+static int __debug_page_col_var(WT_DBG *, WT_REF *);
+static int __debug_page_metadata(WT_DBG *, WT_REF *);
static int __debug_page_row_int(WT_DBG *, WT_PAGE *, uint32_t);
static int __debug_page_row_leaf(WT_DBG *, WT_PAGE *);
static void __debug_ref(WT_DBG *, WT_REF *);
static void __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *);
static int __debug_tree(
- WT_SESSION_IMPL *, WT_BTREE *, WT_PAGE *, const char *, uint32_t);
+ WT_SESSION_IMPL *, WT_BTREE *, WT_REF *, const char *, uint32_t);
static void __debug_update(WT_DBG *, WT_UPDATE *, bool);
static void __dmsg(WT_DBG *, const char *, ...)
WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
@@ -97,8 +97,11 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile)
if (ofile == NULL)
return (__wt_scr_alloc(session, 512, &ds->msg));
- return (__wt_open(session, ofile, WT_FILE_TYPE_REGULAR,
- WT_OPEN_CREATE | WT_STREAM_LINE_BUFFER | WT_STREAM_WRITE, &ds->fh));
+ if ((ds->fp = fopen(ofile, "w")) == NULL)
+ return (EIO);
+ __wt_stream_set_line_buffer(ds->fp);
+
+ return (0);
}
/*
@@ -127,7 +130,8 @@ __dmsg_wrapup(WT_DBG *ds)
}
/* Close any file we opened. */
- (void)__wt_close(session, &ds->fh);
+ if (ds->fp != NULL)
+ (void)fclose(ds->fp);
}
/*
@@ -152,7 +156,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...)
* the output chunk, and pass it to the event handler once we see a
* terminating newline.
*/
- if (ds->fh == NULL) {
+ if (ds->fp == NULL) {
msg = ds->msg;
for (;;) {
p = (char *)msg->mem + msg->size;
@@ -184,7 +188,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...)
}
} else {
va_start(ap, fmt);
- (void)__wt_vfprintf(session, ds->fh, fmt, ap);
+ (void)vfprintf(ds->fp, fmt, ap);
va_end(ap);
}
}
@@ -427,12 +431,12 @@ __debug_tree_shape_info(WT_PAGE *page)
v = page->memory_footprint;
if (v >= WT_GIGABYTE)
snprintf(buf, sizeof(buf),
- "(%p %" PRIu64 "G)", page, v / WT_GIGABYTE);
+ "(%p %" PRIu64 "G)", (void *)page, v / WT_GIGABYTE);
else if (v >= WT_MEGABYTE)
snprintf(buf, sizeof(buf),
- "(%p %" PRIu64 "M)", page, v / WT_MEGABYTE);
+ "(%p %" PRIu64 "M)", (void *)page, v / WT_MEGABYTE);
else
- snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", page, v);
+ snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", (void *)page, v);
return (buf);
}
@@ -498,10 +502,10 @@ __wt_debug_tree_shape(
*/
int
__wt_debug_tree_all(
- WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile)
+ WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
return (__debug_tree(session,
- btree, page, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK));
+ btree, ref, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK));
}
/*
@@ -513,9 +517,9 @@ __wt_debug_tree_all(
*/
int
__wt_debug_tree(
- WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile)
+ WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
- return (__debug_tree(session, btree, page, ofile, WT_DEBUG_TREE_WALK));
+ return (__debug_tree(session, btree, ref, ofile, WT_DEBUG_TREE_WALK));
}
/*
@@ -523,7 +527,7 @@ __wt_debug_tree(
* Dump the in-memory information for a page.
*/
int
-__wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile)
+__wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile)
{
WT_DBG *ds, _ds;
WT_DECL_RET;
@@ -533,7 +537,7 @@ __wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile)
ds = &_ds;
WT_RET(__debug_config(session, ds, ofile));
- ret = __debug_page(ds, page, WT_DEBUG_TREE_LEAF);
+ ret = __debug_page(ds, ref, WT_DEBUG_TREE_LEAF);
__dmsg_wrapup(ds);
@@ -549,9 +553,8 @@ __wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile)
* in this function
*/
static int
-__debug_tree(
- WT_SESSION_IMPL *session, WT_BTREE *btree,
- WT_PAGE *page, const char *ofile, uint32_t flags)
+__debug_tree(WT_SESSION_IMPL *session,
+ WT_BTREE *btree, WT_REF *ref, const char *ofile, uint32_t flags)
{
WT_DBG *ds, _ds;
WT_DECL_RET;
@@ -560,10 +563,10 @@ __debug_tree(
WT_RET(__debug_config(session, ds, ofile));
/* A NULL page starts at the top of the tree -- it's a convenience. */
- if (page == NULL)
- page = btree->root.page;
+ if (ref == NULL)
+ ref = &btree->root;
- WT_WITH_BTREE(session, btree, ret = __debug_page(ds, page, flags));
+ WT_WITH_BTREE(session, btree, ret = __debug_page(ds, ref, flags));
__dmsg_wrapup(ds);
@@ -575,7 +578,7 @@ __debug_tree(
* Dump the in-memory information for an in-memory page.
*/
static int
-__debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
+__debug_page(WT_DBG *ds, WT_REF *ref, uint32_t flags)
{
WT_DECL_RET;
WT_SESSION_IMPL *session;
@@ -583,32 +586,32 @@ __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
session = ds->session;
/* Dump the page metadata. */
- WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, page));
+ WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, ref));
WT_RET(ret);
/* Dump the page. */
- switch (page->type) {
+ switch (ref->page->type) {
case WT_PAGE_COL_FIX:
if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- __debug_page_col_fix(ds, page);
+ __debug_page_col_fix(ds, ref);
break;
case WT_PAGE_COL_INT:
WT_WITH_PAGE_INDEX(session,
- ret = __debug_page_col_int(ds, page, flags));
+ ret = __debug_page_col_int(ds, ref->page, flags));
WT_RET(ret);
break;
case WT_PAGE_COL_VAR:
if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_col_var(ds, page));
+ WT_RET(__debug_page_col_var(ds, ref));
break;
case WT_PAGE_ROW_INT:
WT_WITH_PAGE_INDEX(session,
- ret = __debug_page_row_int(ds, page, flags));
+ ret = __debug_page_row_int(ds, ref->page, flags));
WT_RET(ret);
break;
case WT_PAGE_ROW_LEAF:
if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_row_leaf(ds, page));
+ WT_RET(__debug_page_row_leaf(ds, ref->page));
break;
WT_ILLEGAL_VALUE(session);
}
@@ -621,30 +624,32 @@ __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
* Dump an in-memory page's metadata.
*/
static int
-__debug_page_metadata(WT_DBG *ds, WT_PAGE *page)
+__debug_page_metadata(WT_DBG *ds, WT_REF *ref)
{
+ WT_PAGE *page;
WT_PAGE_INDEX *pindex;
WT_PAGE_MODIFY *mod;
WT_SESSION_IMPL *session;
uint32_t entries;
session = ds->session;
+ page = ref->page;
mod = page->modify;
- __dmsg(ds, "%p", page);
+ __dmsg(ds, "%p", (void *)page);
switch (page->type) {
case WT_PAGE_COL_INT:
- __dmsg(ds, " recno %" PRIu64, page->pg_intl_recno);
+ __dmsg(ds, " recno %" PRIu64, ref->ref_recno);
WT_INTL_INDEX_GET(session, page, pindex);
entries = pindex->entries;
break;
case WT_PAGE_COL_FIX:
- __dmsg(ds, " recno %" PRIu64, page->pg_fix_recno);
+ __dmsg(ds, " recno %" PRIu64, ref->ref_recno);
entries = page->pg_fix_entries;
break;
case WT_PAGE_COL_VAR:
- __dmsg(ds, " recno %" PRIu64, page->pg_var_recno);
+ __dmsg(ds, " recno %" PRIu64, ref->ref_recno);
entries = page->pg_var_entries;
break;
case WT_PAGE_ROW_INT:
@@ -658,7 +663,8 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page)
}
__dmsg(ds, ": %s\n", __wt_page_type_string(page->type));
- __dmsg(ds, "\t" "disk %p, entries %" PRIu32, page->dsk, entries);
+ __dmsg(ds,
+ "\t" "disk %p, entries %" PRIu32, (void *)page->dsk, entries);
__dmsg(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean");
__dmsg(ds, ", %s", __wt_fair_islocked(
session, &page->page_lock) ? "locked" : "unlocked");
@@ -707,10 +713,11 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page)
* Dump an in-memory WT_PAGE_COL_FIX page.
*/
static void
-__debug_page_col_fix(WT_DBG *ds, WT_PAGE *page)
+__debug_page_col_fix(WT_DBG *ds, WT_REF *ref)
{
WT_BTREE *btree;
WT_INSERT *ins;
+ WT_PAGE *page;
const WT_PAGE_HEADER *dsk;
WT_SESSION_IMPL *session;
uint64_t recno;
@@ -721,8 +728,9 @@ __debug_page_col_fix(WT_DBG *ds, WT_PAGE *page)
session = ds->session;
btree = S2BT(session);
+ page = ref->page;
dsk = page->dsk;
- recno = page->pg_fix_recno;
+ recno = ref->ref_recno;
if (dsk != NULL) {
ins = WT_SKIP_FIRST(WT_COL_UPDATE_SINGLE(page));
@@ -767,7 +775,7 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
session = ds->session;
WT_INTL_FOREACH_BEGIN(session, page, ref) {
- __dmsg(ds, "\trecno %" PRIu64 "\n", ref->key.recno);
+ __dmsg(ds, "\trecno %" PRIu64 "\n", ref->ref_recno);
__debug_ref(ds, ref);
} WT_INTL_FOREACH_END;
@@ -775,7 +783,7 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
WT_INTL_FOREACH_BEGIN(session, page, ref) {
if (ref->state == WT_REF_MEM) {
__dmsg(ds, "\n");
- WT_RET(__debug_page(ds, ref->page, flags));
+ WT_RET(__debug_page(ds, ref, flags));
}
} WT_INTL_FOREACH_END;
@@ -787,18 +795,20 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
* Dump an in-memory WT_PAGE_COL_VAR page.
*/
static int
-__debug_page_col_var(WT_DBG *ds, WT_PAGE *page)
+__debug_page_col_var(WT_DBG *ds, WT_REF *ref)
{
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
WT_COL *cip;
WT_INSERT_HEAD *update;
+ WT_PAGE *page;
uint64_t recno, rle;
uint32_t i;
char tag[64];
unpack = &_unpack;
- recno = page->pg_var_recno;
+ page = ref->page;
+ recno = ref->ref_recno;
WT_COL_FOREACH(page, cip, i) {
if ((cell = WT_COL_PTR(page, cip)) == NULL) {
@@ -849,7 +859,7 @@ __debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
WT_INTL_FOREACH_BEGIN(session, page, ref) {
if (ref->state == WT_REF_MEM) {
__dmsg(ds, "\n");
- WT_RET(__debug_page(ds, ref->page, flags));
+ WT_RET(__debug_page(ds, ref, flags));
}
} WT_INTL_FOREACH_END;
return (0);
@@ -885,7 +895,7 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
/* Dump the page's K/V pairs. */
WT_ROW_FOREACH(page, rip, i) {
- WT_RET(__wt_row_leaf_key(session, page, rip, key, false));
+ WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
__debug_item(ds, "K", key->data, key->size);
if ((cell = __wt_row_leaf_value_cell(page, rip, NULL)) == NULL)
@@ -952,8 +962,7 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte)
__dmsg(ds, "\tvalue {deleted}\n");
else if (hexbyte) {
__dmsg(ds, "\t{");
- __debug_hex_byte(ds,
- ((uint8_t *)WT_UPDATE_DATA(upd))[0]);
+ __debug_hex_byte(ds, *(uint8_t *)WT_UPDATE_DATA(upd));
__dmsg(ds, "}\n");
} else
__debug_item(ds,
@@ -982,10 +991,10 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
__dmsg(ds, "deleted");
break;
case WT_REF_LOCKED:
- __dmsg(ds, "locked %p", ref->page);
+ __dmsg(ds, "locked %p", (void *)ref->page);
break;
case WT_REF_MEM:
- __dmsg(ds, "memory %p", ref->page);
+ __dmsg(ds, "memory %p", (void *)ref->page);
break;
case WT_REF_READING:
__dmsg(ds, "reading");
@@ -1147,14 +1156,14 @@ static void
__debug_item(WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
{
size_t i;
- int ch;
+ u_char ch;
const uint8_t *data;
__dmsg(ds, "\t%s%s{", tag == NULL ? "" : tag, tag == NULL ? "" : " ");
for (data = data_arg, i = 0; i < size; ++i, ++data) {
ch = data[0];
- if (isprint(ch))
- __dmsg(ds, "%c", ch);
+ if (__wt_isprint(ch))
+ __dmsg(ds, "%c", (int)ch);
else
__debug_hex_byte(ds, data[0]);
}
diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c
index ba16dd204e8..54b7fedb31d 100644
--- a/src/btree/bt_delete.c
+++ b/src/btree/bt_delete.c
@@ -288,10 +288,9 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
* read-only or if the application never modifies the tree, we're not
* able to do so.)
*/
- if (btree->modified) {
- WT_RET(__wt_page_modify_init(session, page));
+ WT_RET(__wt_page_modify_init(session, page));
+ if (btree->modified)
__wt_page_modify_set(session, page);
- }
/*
* An operation is accessing a "deleted" page, and we're building an
@@ -326,7 +325,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
/* Allocate the per-page update array. */
WT_ERR(__wt_calloc_def(session, page->pg_row_entries, &upd_array));
- page->pg_row_upd = upd_array;
+ page->modify->mod_row_update = upd_array;
/*
* Fill in the per-reference update array with references to update
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 1181d92609f..a00bb7dc2b5 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -40,7 +40,6 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref)
void
__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
{
- WT_FH *fh;
WT_PAGE *page;
WT_PAGE_HEADER *dsk;
WT_PAGE_MODIFY *mod;
@@ -83,7 +82,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
if (hp != NULL)
__wt_errx(session,
"discarded page has hazard pointer: (%p: %s, line %d)",
- hp->page, hp->file, hp->line);
+ (void *)hp->page, hp->file, hp->line);
WT_ASSERT(session, hp == NULL);
}
#endif
@@ -134,10 +133,11 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
dsk = (WT_PAGE_HEADER *)page->dsk;
if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
__wt_overwrite_and_free_len(session, dsk, dsk->mem_size);
- if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) {
- fh = S2BT(session)->bm->block->fh;
- (void)fh->fh_map_discard(session, fh, dsk, dsk->mem_size);
- }
+
+ /* Discard any mapped image. */
+ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
+ (void)S2BT(session)->bm->map_discard(
+ S2BT(session)->bm, session, dsk, (size_t)dsk->mem_size);
__wt_overwrite_and_free(session, page);
}
@@ -194,16 +194,33 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
__free_skip_list(
session, WT_SKIP_FIRST(append), update_ignore);
__wt_free(session, append);
- __wt_free(session, mod->mod_append);
+ __wt_free(session, mod->mod_col_append);
}
/* Free the insert/update array. */
- if (mod->mod_update != NULL)
- __free_skip_array(session, mod->mod_update,
+ if (mod->mod_col_update != NULL)
+ __free_skip_array(session, mod->mod_col_update,
page->type ==
WT_PAGE_COL_FIX ? 1 : page->pg_var_entries,
update_ignore);
break;
+ case WT_PAGE_ROW_LEAF:
+ /*
+ * Free the insert array.
+ *
+ * Row-store tables have one additional slot in the insert array
+ * (the insert array has an extra slot to hold keys that sort
+ * before keys found on the original page).
+ */
+ if (mod->mod_row_insert != NULL)
+ __free_skip_array(session, mod->mod_row_insert,
+ page->pg_row_entries + 1, update_ignore);
+
+ /* Free the update array. */
+ if (mod->mod_row_update != NULL)
+ __free_update(session, mod->mod_row_update,
+ page->pg_row_entries, update_ignore);
+ break;
}
/* Free the overflow on-page, reuse and transaction-cache skiplists. */
@@ -324,10 +341,6 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_ROW *rip;
uint32_t i;
void *copy;
- bool update_ignore;
-
- /* In some failed-split cases, we can't discard updates. */
- update_ignore = F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE);
/*
* Free the in-memory index array.
@@ -342,22 +355,6 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
page, copy, &ikey, NULL, NULL, NULL);
__wt_free(session, ikey);
}
-
- /*
- * Free the insert array.
- *
- * Row-store tables have one additional slot in the insert array (the
- * insert array has an extra slot to hold keys that sort before keys
- * found on the original page).
- */
- if (page->pg_row_ins != NULL)
- __free_skip_array(session,
- page->pg_row_ins, page->pg_row_entries + 1, update_ignore);
-
- /* Free the update array. */
- if (page->pg_row_upd != NULL)
- __free_update(session,
- page->pg_row_upd, page->pg_row_entries, update_ignore);
}
/*
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 02eea9c2f0c..c97e05d74a7 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -371,7 +371,7 @@ __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno)
root_ref->page = root;
root_ref->state = WT_REF_MEM;
- root_ref->key.recno = is_recno ? 1 : WT_RECNO_OOB;
+ root_ref->ref_recno = is_recno ? 1 : WT_RECNO_OOB;
root->pg_intl_parent_ref = root_ref;
}
@@ -495,7 +495,7 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation)
case BTREE_COL_FIX:
case BTREE_COL_VAR:
WT_ERR(__wt_page_alloc(
- session, WT_PAGE_COL_INT, 1, 1, true, &root));
+ session, WT_PAGE_COL_INT, 1, true, &root));
root->pg_intl_parent_ref = &btree->root;
pindex = WT_INTL_INDEX_GET_SAFE(root);
@@ -504,11 +504,11 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation)
ref->page = NULL;
ref->addr = NULL;
ref->state = WT_REF_DELETED;
- ref->key.recno = 1;
+ ref->ref_recno = 1;
break;
case BTREE_ROW:
WT_ERR(__wt_page_alloc(
- session, WT_PAGE_ROW_INT, 0, 1, true, &root));
+ session, WT_PAGE_ROW_INT, 1, true, &root));
root->pg_intl_parent_ref = &btree->root;
pindex = WT_INTL_INDEX_GET_SAFE(root);
@@ -519,12 +519,11 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation)
ref->state = WT_REF_DELETED;
WT_ERR(__wt_row_ikey_incr(session, root, 0, "", 1, ref));
break;
- WT_ILLEGAL_VALUE_ERR(session);
}
/* Bulk loads require a leaf page for reconciliation: create it now. */
if (F_ISSET(btree, WT_BTREE_BULK)) {
- WT_ERR(__wt_btree_new_leaf_page(session, 1, &leaf));
+ WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
ref->page = leaf;
ref->state = WT_REF_MEM;
WT_ERR(__wt_page_modify_init(session, leaf));
@@ -548,8 +547,7 @@ err: if (leaf != NULL)
* Create an empty leaf page.
*/
int
-__wt_btree_new_leaf_page(
- WT_SESSION_IMPL *session, uint64_t recno, WT_PAGE **pagep)
+__wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep)
{
WT_BTREE *btree;
@@ -558,17 +556,16 @@ __wt_btree_new_leaf_page(
switch (btree->type) {
case BTREE_COL_FIX:
WT_RET(__wt_page_alloc(
- session, WT_PAGE_COL_FIX, recno, 0, false, pagep));
+ session, WT_PAGE_COL_FIX, 0, false, pagep));
break;
case BTREE_COL_VAR:
WT_RET(__wt_page_alloc(
- session, WT_PAGE_COL_VAR, recno, 0, false, pagep));
+ session, WT_PAGE_COL_VAR, 0, false, pagep));
break;
case BTREE_ROW:
WT_RET(__wt_page_alloc(
- session, WT_PAGE_ROW_LEAF, WT_RECNO_OOB, 0, false, pagep));
+ session, WT_PAGE_ROW_LEAF, 0, false, pagep));
break;
- WT_ILLEGAL_VALUE(session);
}
return (0);
}
@@ -639,7 +636,7 @@ __btree_get_last_recno(WT_SESSION_IMPL *session)
page = next_walk->page;
btree->last_recno = page->type == WT_PAGE_COL_VAR ?
- __col_var_last_recno(page) : __col_fix_last_recno(page);
+ __col_var_last_recno(next_walk) : __col_fix_last_recno(next_walk);
return (__wt_page_release(session, next_walk, 0));
}
@@ -690,22 +687,19 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
"size (%" PRIu32 "B)", btree->allocsize);
/*
- * When a page is forced to split, we want at least 50 entries on its
- * parent.
- *
- * Don't let pages grow larger than a quarter of the cache, with too-
- * small caches, we can end up in a situation where nothing can be
- * evicted. Take care getting the cache size: with a shared cache,
- * it may not have been set.
+ * Don't let pages grow large compared to the cache size or we can end
+ * up in a situation where nothing can be evicted. Take care getting
+ * the cache size: with a shared cache, it may not have been set.
*/
WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
- btree->maxmempage =
- WT_MAX((uint64_t)cval.val, 50 * (uint64_t)btree->maxleafpage);
+ btree->maxmempage = (uint64_t)cval.val;
if (!F_ISSET(conn, WT_CONN_CACHE_POOL)) {
if ((cache_size = conn->cache_size) > 0)
btree->maxmempage =
- WT_MIN(btree->maxmempage, cache_size / 4);
+ WT_MIN(btree->maxmempage, cache_size / 10);
}
+ /* Enforce a lower bound of a single disk leaf page */
+ btree->maxmempage = WT_MAX(btree->maxmempage, btree->maxleafpage);
/*
* Try in-memory splits once we hit 80% of the maximum in-memory page
diff --git a/src/btree/bt_huffman.c b/src/btree/bt_huffman.c
index a1aaf2c7ea0..9e9d69c342e 100644
--- a/src/btree/bt_huffman.c
+++ b/src/btree/bt_huffman.c
@@ -133,10 +133,10 @@ static int __wt_huffman_read(WT_SESSION_IMPL *,
* Check for a Huffman configuration file and return the file name.
*/
static int
-__huffman_confchk_file(
- WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FH **fhp)
+__huffman_confchk_file(WT_SESSION_IMPL *session,
+ WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FSTREAM **fsp)
{
- WT_FH *fh;
+ WT_FSTREAM *fs;
WT_DECL_RET;
size_t len;
char *fname;
@@ -157,14 +157,13 @@ __huffman_confchk_file(
/* Check the file exists. */
WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname));
- WT_ERR(__wt_open(session, fname, WT_FILE_TYPE_REGULAR,
- WT_OPEN_FIXED | WT_OPEN_READONLY | WT_STREAM_READ, &fh));
+ WT_ERR(__wt_fopen(session, fname, WT_OPEN_FIXED, WT_STREAM_READ, &fs));
/* Optionally return the file handle. */
- if (fhp == NULL)
- (void)__wt_close(session, &fh);
+ if (fsp == NULL)
+ (void)__wt_fclose(session, &fs);
else
- *fhp = fh;
+ *fsp = fs;
err: __wt_free(session, fname);
@@ -300,7 +299,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
struct __wt_huffman_table *table, *tp;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
- WT_FH *fh;
+ WT_FSTREAM *fs;
int64_t symbol, frequency;
u_int entries, lineno;
int n;
@@ -309,13 +308,13 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
*tablep = NULL;
*entriesp = *numbytesp = 0;
- fh = NULL;
+ fs = NULL;
table = NULL;
/*
* Try and open the backing file.
*/
- WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fh));
+ WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fs));
/*
* UTF-8 table is 256 bytes, with a range of 0-255.
@@ -333,7 +332,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
WT_ERR(__wt_scr_alloc(session, 0, &tmp));
for (tp = table, lineno = 1;; ++tp, ++lineno) {
- WT_ERR(__wt_getline(session, tmp, fh));
+ WT_ERR(__wt_getline(session, fs, tmp));
if (tmp->size == 0)
break;
n = sscanf(
@@ -378,7 +377,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
if (0) {
err: __wt_free(session, table);
}
- (void)__wt_close(session, &fh);
+ (void)__wt_fclose(session, &fs);
__wt_scr_free(session, &tmp);
return (ret);
diff --git a/src/btree/bt_io.c b/src/btree/bt_io.c
index aaf906ca785..4339de6f25c 100644
--- a/src/btree/bt_io.c
+++ b/src/btree/bt_io.c
@@ -343,6 +343,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
* Checksum the data if the buffer isn't compressed or checksums are
* configured.
*/
+ data_cksum = true; /* -Werror=maybe-uninitialized */
switch (btree->checksum) {
case CKSUM_ON:
data_cksum = true;
@@ -351,7 +352,6 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
data_cksum = false;
break;
case CKSUM_UNCOMPRESSED:
- default:
data_cksum = !compressed;
break;
}
diff --git a/src/btree/bt_misc.c b/src/btree/bt_misc.c
index 7f188502a0a..b6e2cc07f5a 100644
--- a/src/btree/bt_misc.c
+++ b/src/btree/bt_misc.c
@@ -129,19 +129,3 @@ __wt_addr_string(WT_SESSION_IMPL *session,
}
return (buf->data);
}
-
-/*
- * __wt_buf_set_printable --
- * Set the contents of the buffer to a printable representation of a
- * byte string.
- */
-const char *
-__wt_buf_set_printable(
- WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf)
-{
- if (__wt_raw_to_esc_hex(session, p, size, buf)) {
- buf->data = "[Error]";
- buf->size = strlen("[Error]");
- }
- return (buf->data);
-}
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index 9fa0145bbdd..00ec8aa4494 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -10,7 +10,7 @@
static void __inmem_col_fix(WT_SESSION_IMPL *, WT_PAGE *);
static void __inmem_col_int(WT_SESSION_IMPL *, WT_PAGE *);
-static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
+static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, uint64_t, size_t *);
static int __inmem_row_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
static int __inmem_row_leaf(WT_SESSION_IMPL *, WT_PAGE *);
static int __inmem_row_leaf_entries(
@@ -21,8 +21,8 @@ static int __inmem_row_leaf_entries(
* Create or read a page into the cache.
*/
int
-__wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type,
- uint64_t recno, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep)
+__wt_page_alloc(WT_SESSION_IMPL *session,
+ uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep)
{
WT_CACHE *cache;
WT_DECL_RET;
@@ -67,13 +67,10 @@ __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type,
switch (type) {
case WT_PAGE_COL_FIX:
- page->pg_fix_recno = recno;
page->pg_fix_entries = alloc_entries;
break;
case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
- page->pg_intl_recno = recno;
-
/*
* Internal pages have an array of references to objects so they
* can split. Allocate the array of references and optionally,
@@ -105,7 +102,6 @@ err: if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) {
}
break;
case WT_PAGE_COL_VAR:
- page->pg_var_recno = recno;
page->pg_var_d = (WT_COL *)((uint8_t *)page + sizeof(WT_PAGE));
page->pg_var_entries = alloc_entries;
break;
@@ -191,8 +187,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
}
/* Allocate and initialize a new WT_PAGE. */
- WT_RET(__wt_page_alloc(
- session, dsk->type, dsk->recno, alloc_entries, true, &page));
+ WT_RET(__wt_page_alloc(session, dsk->type, alloc_entries, true, &page));
page->dsk = dsk;
F_SET_ATOMIC(page, flags);
@@ -211,7 +206,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
__inmem_col_int(session, page);
break;
case WT_PAGE_COL_VAR:
- WT_ERR(__inmem_col_var(session, page, &size));
+ WT_ERR(__inmem_col_var(session, page, dsk->recno, &size));
break;
case WT_PAGE_ROW_INT:
WT_ERR(__inmem_row_int(session, page, &size));
@@ -292,7 +287,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
__wt_cell_unpack(cell, unpack);
ref->addr = cell;
- ref->key.recno = unpack->v;
+ ref->ref_recno = unpack->v;
}
}
@@ -329,7 +324,8 @@ __inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np)
* column-store trees.
*/
static int
-__inmem_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
+__inmem_col_var(
+ WT_SESSION_IMPL *session, WT_PAGE *page, uint64_t recno, size_t *sizep)
{
WT_BTREE *btree;
WT_COL *cip;
@@ -337,13 +333,12 @@ __inmem_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
const WT_PAGE_HEADER *dsk;
- uint64_t recno, rle;
+ uint64_t rle;
size_t bytes_allocated;
uint32_t i, indx, n, repeat_off;
btree = S2BT(session);
dsk = page->dsk;
- recno = page->pg_var_recno;
repeats = NULL;
repeat_off = 0;
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index df5f5cc2df8..086500c8b2f 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -377,9 +377,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
if (addr == NULL) {
WT_ASSERT(session, previous_state == WT_REF_DELETED);
- WT_ERR(__wt_btree_new_leaf_page(session,
- btree->type == BTREE_ROW ? WT_RECNO_OOB : ref->key.recno,
- &page));
+ WT_ERR(__wt_btree_new_leaf_page(session, &page));
ref->page = page;
goto done;
}
@@ -463,6 +461,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
btree = S2BT(session);
+ WT_STAT_FAST_CONN_INCR(session, cache_pages_requested);
+ WT_STAT_FAST_DATA_INCR(session, cache_pages_requested);
for (evict_soon = stalled = false,
force_attempts = 0, sleep_cnt = wait_cnt = 0;;) {
switch (ref->state) {
diff --git a/src/btree/bt_rebalance.c b/src/btree/bt_rebalance.c
index d94eb2ddd80..de54e8433a8 100644
--- a/src/btree/bt_rebalance.c
+++ b/src/btree/bt_rebalance.c
@@ -90,7 +90,7 @@ __rebalance_leaf_append(WT_SESSION_IMPL *session,
if (recno == WT_RECNO_OOB)
WT_RET(__wt_row_ikey(session, 0, key, key_len, copy));
else
- copy->key.recno = recno;
+ copy->ref_recno = recno;
copy->page_del = NULL;
return (0);
@@ -147,8 +147,7 @@ __rebalance_internal(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs)
leaf_next = (uint32_t)rs->leaf_next;
/* Allocate a row-store root (internal) page and fill it in. */
- WT_RET(__wt_page_alloc(session, rs->type,
- rs->type == WT_PAGE_COL_INT ? 1 : 0, leaf_next, false, &page));
+ WT_RET(__wt_page_alloc(session, rs->type, leaf_next, false, &page));
page->pg_intl_parent_ref = &btree->root;
WT_ERR(__wt_page_modify_init(session, page));
__wt_page_modify_set(session, page);
diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c
index ebc0499f6a2..8ef2db67e7b 100644
--- a/src/btree/bt_ret.c
+++ b/src/btree/bt_ret.c
@@ -46,7 +46,7 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
}
/* Take the value from the original page. */
- v = __bit_getv_recno(page, cbt->iface.recno, btree->bitcnt);
+ v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt);
return (__wt_buf_set(session, &cursor->value, &v, 1));
case WT_PAGE_COL_VAR:
/*
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index 0e064d306b6..9b5e4daf74a 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -116,8 +116,8 @@ struct __wt_track {
static int __slvg_cleanup(WT_SESSION_IMPL *, WT_STUFF *);
static int __slvg_col_build_internal(WT_SESSION_IMPL *, uint32_t, WT_STUFF *);
static int __slvg_col_build_leaf(WT_SESSION_IMPL *, WT_TRACK *, WT_REF *);
-static int __slvg_col_ovfl(
- WT_SESSION_IMPL *, WT_TRACK *, WT_PAGE *, uint64_t, uint64_t);
+static int __slvg_col_ovfl(WT_SESSION_IMPL *,
+ WT_TRACK *, WT_PAGE *, uint64_t, uint64_t, uint64_t);
static int __slvg_col_range(WT_SESSION_IMPL *, WT_STUFF *);
static int __slvg_col_range_missing(WT_SESSION_IMPL *, WT_STUFF *);
static int __slvg_col_range_overlap(
@@ -166,11 +166,13 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
WT_DECL_RET;
WT_STUFF *ss, stuff;
uint32_t i, leaf_cnt;
+ bool evict_reset;
WT_UNUSED(cfg);
btree = S2BT(session);
bm = btree->bm;
+ evict_reset = false;
WT_CLEAR(stuff);
ss = &stuff;
@@ -182,6 +184,13 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2));
/*
+ * Salvage handles its own page eviction; get exclusive access to the
+ * file, have eviction ignore the tree entirely.
+ */
+ WT_ERR(__wt_evict_file_exclusive_on(session));
+ evict_reset = true;
+
+ /*
* Step 1:
* Inform the underlying block manager that we're salvaging the file.
*/
@@ -295,13 +304,13 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
case WT_PAGE_COL_VAR:
WT_WITH_PAGE_INDEX(session,
ret = __slvg_col_build_internal(
- session, leaf_cnt, ss));
+ session, leaf_cnt, ss));
WT_ERR(ret);
break;
case WT_PAGE_ROW_LEAF:
WT_WITH_PAGE_INDEX(session,
ret = __slvg_row_build_internal(
- session, leaf_cnt, ss));
+ session, leaf_cnt, ss));
WT_ERR(ret);
break;
}
@@ -341,6 +350,9 @@ err: WT_TRET(bm->salvage_end(bm, session));
if (ss->root_ref.page != NULL)
__wt_ref_out(session, &ss->root_ref);
+ if (evict_reset)
+ __wt_evict_file_exclusive_off(session);
+
/* Discard the leaf and overflow page memory. */
WT_TRET(__slvg_cleanup(session, ss));
@@ -1159,7 +1171,7 @@ __slvg_col_build_internal(
/* Allocate a column-store root (internal) page and fill it in. */
WT_RET(__wt_page_alloc(
- session, WT_PAGE_COL_INT, 1, leaf_cnt, true, &page));
+ session, WT_PAGE_COL_INT, leaf_cnt, true, &page));
WT_ERR(__slvg_modify_init(session, page));
pindex = WT_INTL_INDEX_GET_SAFE(page);
@@ -1180,7 +1192,7 @@ __slvg_col_build_internal(
ref->addr = addr;
addr = NULL;
- ref->key.recno = trk->col_start;
+ ref->ref_recno = trk->col_start;
ref->state = WT_REF_DISK;
/*
@@ -1223,7 +1235,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
WT_DECL_RET;
WT_PAGE *page;
WT_SALVAGE_COOKIE *cookie, _cookie;
- uint64_t skip, take;
+ uint64_t recno, skip, take;
uint32_t *entriesp, save_entries;
cookie = &_cookie;
@@ -1243,7 +1255,8 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
* Calculate the number of K/V entries we are going to skip, and
* the total number of K/V entries we'll take from this page.
*/
- cookie->skip = skip = trk->col_start - page->pg_var_recno;
+ recno = page->dsk->recno;
+ cookie->skip = skip = trk->col_start - recno;
cookie->take = take = (trk->col_stop - trk->col_start) + 1;
WT_ERR(__wt_verbose(session, WT_VERB_SALVAGE,
@@ -1255,7 +1268,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
/* Set the referenced flag on overflow pages we're using. */
if (page->type == WT_PAGE_COL_VAR && trk->trk_ovfl_cnt != 0)
- WT_ERR(__slvg_col_ovfl(session, trk, page, skip, take));
+ WT_ERR(__slvg_col_ovfl(session, trk, page, recno, skip, take));
/*
* If we're missing some part of the range, the real start range is in
@@ -1263,9 +1276,9 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
* reference as well as the page itself.
*/
if (trk->col_missing == 0)
- page->pg_var_recno = trk->col_start;
+ ref->ref_recno = trk->col_start;
else {
- page->pg_var_recno = trk->col_missing;
+ ref->ref_recno = trk->col_missing;
cookie->missing = trk->col_start - trk->col_missing;
WT_ERR(__wt_verbose(session, WT_VERB_SALVAGE,
@@ -1274,7 +1287,6 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1),
cookie->missing));
}
- ref->key.recno = page->pg_var_recno;
/*
* We can't discard the original blocks associated with this page now.
@@ -1338,21 +1350,20 @@ __slvg_col_ovfl_single(
* Mark overflow items referenced by the merged page.
*/
static int
-__slvg_col_ovfl(WT_SESSION_IMPL *session,
- WT_TRACK *trk, WT_PAGE *page, uint64_t skip, uint64_t take)
+__slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk,
+ WT_PAGE *page, uint64_t recno, uint64_t skip, uint64_t take)
{
WT_CELL_UNPACK unpack;
WT_CELL *cell;
WT_COL *cip;
WT_DECL_RET;
- uint64_t recno, start, stop;
+ uint64_t start, stop;
uint32_t i;
/*
* Merging a variable-length column-store page, and we took some number
* of records, figure out which (if any) overflow records we used.
*/
- recno = page->pg_var_recno;
start = recno + skip;
stop = (recno + skip + take) - 1;
@@ -1816,7 +1827,7 @@ __slvg_row_build_internal(
/* Allocate a row-store root (internal) page and fill it in. */
WT_RET(__wt_page_alloc(
- session, WT_PAGE_ROW_INT, WT_RECNO_OOB, leaf_cnt, true, &page));
+ session, WT_PAGE_ROW_INT, leaf_cnt, true, &page));
WT_ERR(__slvg_modify_init(session, page));
pindex = WT_INTL_INDEX_GET_SAFE(page);
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 00bea5a6773..7a05a883f83 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -207,8 +207,8 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_INTL_FOREACH_BEGIN(session, page, ref) {
WT_ASSERT(session, ref->home == page);
- WT_ASSERT(session, ref->key.recno > recno);
- recno = ref->key.recno;
+ WT_ASSERT(session, ref->ref_recno > recno);
+ recno = ref->ref_recno;
} WT_INTL_FOREACH_END;
break;
case WT_PAGE_ROW_INT:
@@ -298,7 +298,7 @@ static int
__split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
WT_REF **from_refp, size_t *decrp, WT_REF **to_refp, size_t *incrp)
{
- WT_ADDR *addr, *ref_addr;
+ WT_ADDR *addr;
WT_CELL_UNPACK unpack;
WT_DECL_RET;
WT_IKEY *ikey;
@@ -335,7 +335,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
if ((ikey = __wt_ref_key_instantiated(ref)) == NULL) {
__wt_ref_key(from_home, ref, &key, &size);
WT_RET(__wt_row_ikey(session, 0, key, size, ref));
- ikey = ref->key.ikey;
+ ikey = ref->ref_ikey;
} else {
WT_RET(
__split_ovfl_key_cleanup(session, from_home, ref));
@@ -345,18 +345,13 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
}
/*
- * If there's no address at all (the page has never been written), or
- * the address has already been instantiated, there's no work to do.
- * Otherwise, the address still references a split page on-page cell,
- * instantiate it. We can race with reconciliation and/or eviction of
- * the child pages, be cautious: read the address and verify it, and
- * only update it if the value is unchanged from the original. In the
- * case of a race, the address must no longer reference the split page,
- * we're done.
+ * If there's no address (the page has never been written), or the
+ * address has been instantiated, there's no work to do. Otherwise,
+ * instantiate the address in-memory, from the on-page cell.
*/
- WT_ORDERED_READ(ref_addr, ref->addr);
- if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
- __wt_cell_unpack((WT_CELL *)ref_addr, &unpack);
+ addr = ref->addr;
+ if (addr != NULL && !__wt_off_page(from_home, addr)) {
+ __wt_cell_unpack((WT_CELL *)ref->addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
if ((ret = __wt_strndup(
session, unpack.data, unpack.size, &addr->addr)) != 0) {
@@ -376,10 +371,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
break;
WT_ILLEGAL_VALUE(session);
}
- if (!__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr)) {
- __wt_free(session, addr->addr);
- __wt_free(session, addr);
- }
+ ref->addr = addr;
}
/* And finally, copy the WT_REF pointer itself. */
@@ -537,7 +529,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
WT_REF **child_refp, *ref, **root_refp;
WT_SPLIT_ERROR_PHASE complete;
size_t child_incr, root_decr, root_incr, size;
- uint64_t recno, split_gen;
+ uint64_t split_gen;
uint32_t children, chunk, i, j, remain;
uint32_t slots;
void *p;
@@ -601,10 +593,8 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
alloc_refp = alloc_index->index, i = 0; i < children; ++i) {
slots = i == children - 1 ? remain : chunk;
- recno = root->type == WT_PAGE_COL_INT ?
- (*root_refp)->key.recno : WT_RECNO_OOB;
WT_ERR(__wt_page_alloc(
- session, root->type, recno, slots, false, &child));
+ session, root->type, slots, false, &child));
/*
* Initialize the page's child reference; we need a copy of the
@@ -619,7 +609,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
WT_ERR(__wt_row_ikey(session, 0, p, size, ref));
root_incr += sizeof(WT_IKEY) + size;
} else
- ref->key.recno = recno;
+ ref->ref_recno = (*root_refp)->ref_recno;
ref->state = WT_REF_MEM;
/* Initialize the child page. */
@@ -745,7 +735,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
alloc_index = pindex = NULL;
parent_decr = 0;
- parent_entries = 0;
empty_parent = false;
complete = WT_ERR_RETURN;
@@ -1022,7 +1011,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
WT_REF **child_refp, *page_ref, **page_refp, *ref;
WT_SPLIT_ERROR_PHASE complete;
size_t child_incr, page_decr, page_incr, parent_incr, size;
- uint64_t recno, split_gen;
+ uint64_t split_gen;
uint32_t children, chunk, i, j, remain;
uint32_t slots;
void *p;
@@ -1107,10 +1096,8 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
for (alloc_refp = alloc_index->index + 1, i = 1; i < children; ++i) {
slots = i == children - 1 ? remain : chunk;
- recno = page->type == WT_PAGE_COL_INT ?
- (*page_refp)->key.recno : WT_RECNO_OOB;
WT_ERR(__wt_page_alloc(
- session, page->type, recno, slots, false, &child));
+ session, page->type, slots, false, &child));
/*
* Initialize the page's child reference; we need a copy of the
@@ -1125,7 +1112,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
WT_ERR(__wt_row_ikey(session, 0, p, size, ref));
parent_incr += sizeof(WT_IKEY) + size;
} else
- ref->key.recno = recno;
+ ref->ref_recno = (*page_refp)->ref_recno;
ref->state = WT_REF_MEM;
/* Initialize the child page. */
@@ -1491,6 +1478,15 @@ __split_multi_inmem(
uint32_t i, slot;
/*
+ * In 04/2016, we removed column-store record numbers from the WT_PAGE
+ * structure, leading to hard-to-debug problems because we corrupt the
+ * page if we search it using the wrong initial record number. For now,
+ * assert the record number is set.
+ */
+ WT_ASSERT(session,
+ orig->type != WT_PAGE_COL_VAR || ref->ref_recno != 0);
+
+ /*
* This code re-creates an in-memory page that is part of a set created
* while evicting a large page, and adds references to any unresolved
* update chains to the new page. We get here due to choosing to keep
@@ -1533,7 +1529,7 @@ __split_multi_inmem(
/* Build a key. */
if (supd->ins == NULL) {
slot = WT_ROW_SLOT(orig, supd->rip);
- upd = orig->pg_row_upd[slot];
+ upd = orig->modify->mod_row_update[slot];
WT_ERR(__wt_row_leaf_key(
session, orig, supd->rip, key, false));
@@ -1596,7 +1592,7 @@ __split_multi_inmem_final(WT_PAGE *orig, WT_MULTI *multi)
case WT_PAGE_ROW_LEAF:
if (supd->ins == NULL) {
slot = WT_ROW_SLOT(orig, supd->rip);
- orig->pg_row_upd[slot] = NULL;
+ orig->modify->mod_row_update[slot] = NULL;
} else
supd->ins->upd = NULL;
break;
@@ -1613,11 +1609,16 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref)
/*
* We failed creating new in-memory pages. For error-handling reasons,
* we've left the update chains referenced by both the original and
- * new pages. Discard the new pages, setting a flag so the discard code
- * doesn't discard the updates on the page.
+ * new pages. Discard the new allocated WT_REF structures and their
+ * pages (setting a flag so the discard code doesn't discard the updates
+ * on the page).
+ *
+ * Our callers allocate WT_REF arrays, then individual WT_REFs, check
+ * for uninitialized information.
*/
- if (ref->page != NULL) {
- F_SET_ATOMIC(ref->page, WT_PAGE_UPDATE_IGNORE);
+ if (ref != NULL) {
+ if (ref->page != NULL)
+ F_SET_ATOMIC(ref->page, WT_PAGE_UPDATE_IGNORE);
__wt_free_ref(session, ref, orig->type, true);
}
}
@@ -1635,7 +1636,6 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
WT_REF *ref;
size_t incr;
- addr = NULL;
incr = 0;
/* Allocate an underlying WT_REF. */
@@ -1643,9 +1643,24 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
ref = *refp;
incr += sizeof(WT_REF);
- /* Any parent reference is filled in by our caller. */
- ref->home = NULL;
+ /*
+ * Set the WT_REF key before (optionally) building the page, underlying
+ * column-store functions need the page's key space to search it.
+ */
+ switch (page->type) {
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ ikey = multi->key.ikey;
+ WT_RET(__wt_row_ikey(
+ session, 0, WT_IKEY_DATA(ikey), ikey->size, ref));
+ incr += sizeof(WT_IKEY) + ikey->size;
+ break;
+ default:
+ ref->ref_recno = multi->key.recno;
+ break;
+ }
+ /* If there's a disk image, build a page, otherwise set the address. */
if (multi->disk_image == NULL) {
/*
* Copy the address: we could simply take the buffer, but that
@@ -1659,28 +1674,13 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
addr->type = multi->addr.type;
WT_RET(__wt_strndup(session,
multi->addr.addr, addr->size, &addr->addr));
- } else
+ ref->state = WT_REF_DISK;
+ } else {
WT_RET(__split_multi_inmem(session, page, ref, multi));
-
- switch (page->type) {
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- ikey = multi->key.ikey;
- WT_RET(__wt_row_ikey(
- session, 0, WT_IKEY_DATA(ikey), ikey->size, ref));
- incr += sizeof(WT_IKEY) + ikey->size;
- break;
- default:
- ref->key.recno = multi->key.recno;
- break;
+ ref->state = WT_REF_MEM;
}
- ref->state = addr != NULL ? WT_REF_DISK : WT_REF_MEM;
-
- /*
- * If our caller wants to track the memory allocations, we have a return
- * reference.
- */
+ /* Optionally return changes in the memory footprint. */
if (incrp != NULL)
*incrp += incr;
return (0);
@@ -1781,17 +1781,12 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
parent_incr += sizeof(WT_IKEY) + key->size;
__wt_scr_free(session, &key);
} else
- child->key.recno = ref->key.recno;
+ child->ref_recno = ref->ref_recno;
/*
* The second page in the split is a new WT_REF/page pair.
*/
- if (type == WT_PAGE_ROW_LEAF)
- WT_ERR(__wt_page_alloc(session,
- type, WT_RECNO_OOB, 0, false, &right));
- else
- WT_ERR(__wt_page_alloc(session,
- type, WT_INSERT_RECNO(moved_ins), 0, false, &right));
+ WT_ERR(__wt_page_alloc(session, type, 0, false, &right));
/*
* The new page is dirty by definition, plus column-store splits update
@@ -1801,11 +1796,15 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_page_modify_set(session, right);
if (type == WT_PAGE_ROW_LEAF) {
- WT_ERR(__wt_calloc_one(session, &right->pg_row_ins));
- WT_ERR(__wt_calloc_one(session, &right->pg_row_ins[0]));
+ WT_ERR(__wt_calloc_one(
+ session, &right->modify->mod_row_insert));
+ WT_ERR(__wt_calloc_one(
+ session, &right->modify->mod_row_insert[0]));
} else {
- WT_ERR(__wt_calloc_one(session, &right->modify->mod_append));
- WT_ERR(__wt_calloc_one(session, &right->modify->mod_append[0]));
+ WT_ERR(__wt_calloc_one(
+ session, &right->modify->mod_col_append));
+ WT_ERR(__wt_calloc_one(
+ session, &right->modify->mod_col_append[0]));
}
right_incr += sizeof(WT_INSERT_HEAD);
right_incr += sizeof(WT_INSERT_HEAD *);
@@ -1822,7 +1821,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
child));
parent_incr += sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins);
} else
- child->key.recno = WT_INSERT_RECNO(moved_ins);
+ child->ref_recno = WT_INSERT_RECNO(moved_ins);
/*
* Allocation operations completed, we're going to split.
@@ -1831,8 +1830,8 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
*/
if (type != WT_PAGE_ROW_LEAF) {
WT_ASSERT(session,
- page->modify->mod_split_recno == WT_RECNO_OOB);
- page->modify->mod_split_recno = child->key.recno;
+ page->modify->mod_col_split_recno == WT_RECNO_OOB);
+ page->modify->mod_col_split_recno = child->ref_recno;
}
/*
@@ -1842,8 +1841,11 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
*/
for (i = 0; i < WT_SKIP_MAXDEPTH && ins_head->tail[i] == moved_ins; ++i)
;
- WT_MEM_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) +
- (size_t)i * sizeof(WT_INSERT *) + WT_INSERT_KEY_SIZE(moved_ins));
+ WT_MEM_TRANSFER(page_decr, right_incr,
+ sizeof(WT_INSERT) + (size_t)i * sizeof(WT_INSERT *));
+ if (type == WT_PAGE_ROW_LEAF)
+ WT_MEM_TRANSFER(
+ page_decr, right_incr, WT_INSERT_KEY_SIZE(moved_ins));
WT_MEM_TRANSFER(
page_decr, right_incr, __wt_update_list_memsize(moved_ins->upd));
@@ -1856,7 +1858,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
* can be ignored.)
*/
tmp_ins_head = type == WT_PAGE_ROW_LEAF ?
- right->pg_row_ins[0] : right->modify->mod_append[0];
+ right->modify->mod_row_insert[0] : right->modify->mod_col_append[0];
tmp_ins_head->head[0] = tmp_ins_head->tail[0] = moved_ins;
/*
@@ -1952,9 +1954,6 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
/*
* Update the page accounting.
- *
- * XXX
- * If we fail to split the parent, the page's accounting will be wrong.
*/
__wt_cache_page_inmem_decr(session, page, page_decr);
__wt_cache_page_inmem_incr(session, right, right_incr);
@@ -1978,7 +1977,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
* Reset the split column-store page record.
*/
if (type != WT_PAGE_ROW_LEAF)
- page->modify->mod_split_recno = WT_RECNO_OOB;
+ page->modify->mod_col_split_recno = WT_RECNO_OOB;
/*
* Clear the allocated page's reference to the moved insert list element
@@ -1991,15 +1990,18 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
* lists have.
*/
if (type == WT_PAGE_ROW_LEAF)
- right->pg_row_ins[0]->head[0] =
- right->pg_row_ins[0]->tail[0] = NULL;
+ right->modify->mod_row_insert[0]->head[0] =
+ right->modify->mod_row_insert[0]->tail[0] = NULL;
else
- right->modify->mod_append[0]->head[0] =
- right->modify->mod_append[0]->tail[0] = NULL;
+ right->modify->mod_col_append[0]->head[0] =
+ right->modify->mod_col_append[0]->tail[0] = NULL;
ins_head->tail[0]->next[0] = moved_ins;
ins_head->tail[0] = moved_ins;
+ /* Fix up accounting for the page size. */
+ __wt_cache_page_inmem_incr(session, page, page_decr);
+
err: if (split_ref[0] != NULL) {
/*
* The address was moved to the replacement WT_REF, restore it.
@@ -2007,12 +2009,12 @@ err: if (split_ref[0] != NULL) {
ref->addr = split_ref[0]->addr;
if (type == WT_PAGE_ROW_LEAF)
- __wt_free(session, split_ref[0]->key.ikey);
+ __wt_free(session, split_ref[0]->ref_ikey);
__wt_free(session, split_ref[0]);
}
if (split_ref[1] != NULL) {
if (type == WT_PAGE_ROW_LEAF)
- __wt_free(session, split_ref[1]->key.ikey);
+ __wt_free(session, split_ref[1]->ref_ikey);
__wt_free(session, split_ref[1]);
}
if (right != NULL) {
@@ -2178,7 +2180,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
- WT_REF new;
+ WT_REF *new;
page = ref->page;
mod = page->modify;
@@ -2195,9 +2197,15 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
* exactly what we want to do.
*
* Build the new page.
+ *
+ * Allocate a WT_REF because the error path uses routines that will ea
+ * free memory. The only field we need to set is the record number, as
+ * it's used by the search routines.
*/
- memset(&new, 0, sizeof(new));
- WT_ERR(__split_multi_inmem(session, page, &new, &mod->mod_multi[0]));
+ WT_RET(__wt_calloc_one(session, &new));
+ new->ref_recno = ref->ref_recno;
+
+ WT_ERR(__split_multi_inmem(session, page, new, &mod->mod_multi[0]));
/*
* The rewrite succeeded, we can no longer fail.
@@ -2217,11 +2225,12 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_ref_out(session, ref);
/* Swap the new page into place. */
- ref->page = new.page;
+ ref->page = new->page;
WT_PUBLISH(ref->state, WT_REF_MEM);
+ __wt_free(session, new);
return (0);
-err: __split_multi_inmem_fail(session, page, &new);
+err: __split_multi_inmem_fail(session, page, new);
return (ret);
}
diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c
index f1e3c0b40d5..3d5abf34147 100644
--- a/src/btree/bt_stat.c
+++ b/src/btree/bt_stat.c
@@ -41,9 +41,6 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage);
WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue);
- WT_STAT_SET(session, stats, cache_bytes_inuse,
- __wt_btree_bytes_inuse(session));
-
/* Everything else is really, really expensive. */
if (!F_ISSET(cst, WT_CONN_STAT_ALL))
return (0);
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index df4ceea8ffa..da6c53aa316 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -26,14 +26,12 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
uint64_t oldest_id, saved_snap_min;
uint32_t flags;
- u_int saved_evict_walk_period;
conn = S2C(session);
btree = S2BT(session);
walk = NULL;
txn = &session->txn;
saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min;
- saved_evict_walk_period = btree->evict_walk_period;
flags = WT_READ_CACHE | WT_READ_NO_GEN;
internal_bytes = leaf_bytes = 0;
@@ -98,8 +96,10 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* snapshot now.
*
* All changes committed up to this point should be included.
- * We don't update the snapshot in between pages because (a)
- * the metadata shouldn't be that big, and (b) if we do ever
+ * We don't update the snapshot in between pages because the
+ * metadata shouldn't have many pages. Instead, read-committed
+ * isolation ensures that all metadata updates completed before
+ * the checkpoint are included.
*/
if (txn->isolation == WT_ISO_READ_COMMITTED)
WT_ERR(__wt_txn_get_snapshot(session));
@@ -188,7 +188,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
break;
case WT_SYNC_CLOSE:
case WT_SYNC_DISCARD:
- WT_ILLEGAL_VALUE_ERR(session);
+ WT_ERR(__wt_illegal_value(session, NULL));
+ break;
}
if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) {
@@ -238,10 +239,10 @@ err: /* On error, clear any left-over tree walk. */
WT_FULL_BARRIER();
/*
- * In case this tree was being skipped by the eviction server
- * during the checkpoint, restore the previous state.
+ * If this tree was being skipped by the eviction server during
+ * the checkpoint, clear the wait.
*/
- btree->evict_walk_period = saved_evict_walk_period;
+ btree->evict_walk_period = 0;
/*
* Wake the eviction server, in case application threads have
@@ -273,6 +274,8 @@ err: /* On error, clear any left-over tree walk. */
int
__wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op)
{
+ WT_DECL_RET;
+
switch (op) {
case WT_SYNC_CHECKPOINT:
case WT_SYNC_CLOSE:
@@ -292,10 +295,12 @@ __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op)
switch (op) {
case WT_SYNC_CHECKPOINT:
case WT_SYNC_WRITE_LEAVES:
- return (__sync_file(session, op));
+ ret = __sync_file(session, op);
+ break;
case WT_SYNC_CLOSE:
case WT_SYNC_DISCARD:
- return (__wt_evict_file(session, op));
- WT_ILLEGAL_VALUE(session);
+ ret = __wt_evict_file(session, op);
+ break;
}
+ return (ret);
}
diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c
index 83dc7924312..0a04c387a0f 100644
--- a/src/btree/bt_vrfy.c
+++ b/src/btree/bt_vrfy.c
@@ -22,13 +22,13 @@ typedef struct {
#define WT_VRFY_DUMP(vs) \
((vs)->dump_address || \
- (vs)->dump_blocks || (vs)->dump_pages || (vs)->dump_shape)
+ (vs)->dump_blocks || (vs)->dump_layout || (vs)->dump_pages)
bool dump_address; /* Configure: dump special */
bool dump_blocks;
+ bool dump_layout;
bool dump_pages;
- bool dump_shape;
-
- u_int depth, depth_internal[100], depth_leaf[100];
+ /* Page layout information */
+ uint64_t depth, depth_internal[100], depth_leaf[100];
WT_ITEM *tmp1, *tmp2, *tmp3, *tmp4; /* Temporary buffers */
} WT_VSTUFF;
@@ -59,12 +59,12 @@ __verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs)
WT_RET(__wt_config_gets(session, cfg, "dump_blocks", &cval));
vs->dump_blocks = cval.val != 0;
+ WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval));
+ vs->dump_layout = cval.val != 0;
+
WT_RET(__wt_config_gets(session, cfg, "dump_pages", &cval));
vs->dump_pages = cval.val != 0;
- WT_RET(__wt_config_gets(session, cfg, "dump_shape", &cval));
- vs->dump_shape = cval.val != 0;
-
#if !defined(HAVE_DIAGNOSTIC)
if (vs->dump_blocks || vs->dump_pages)
WT_RET_MSG(session, ENOTSUP,
@@ -112,33 +112,38 @@ __verify_config_offsets(
}
/*
- * __verify_tree_shape --
+ * __verify_layout --
* Dump the tree shape.
*/
static int
-__verify_tree_shape(WT_SESSION_IMPL *session, WT_VSTUFF *vs)
+__verify_layout(WT_SESSION_IMPL *session, WT_VSTUFF *vs)
{
- uint32_t total;
+ uint64_t total;
size_t i;
for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_internal); ++i)
total += vs->depth_internal[i];
WT_RET(__wt_msg(
- session, "Internal page tree-depth (total %" PRIu32 "):", total));
+ session, "Internal page tree-depth (total %" PRIu64 "):", total));
for (i = 0; i < WT_ELEMENTS(vs->depth_internal); ++i)
- if (vs->depth_internal[i] != 0)
+ if (vs->depth_internal[i] != 0) {
WT_RET(__wt_msg(session,
- "\t%03zu: %u", i, vs->depth_internal[i]));
+ "\t%03" WT_SIZET_FMT ": %" PRIu64,
+ i, vs->depth_internal[i]));
+ vs->depth_internal[i] = 0;
+ }
for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i)
total += vs->depth_leaf[i];
WT_RET(__wt_msg(
- session, "Leaf page tree-depth (total %" PRIu32 "):", total));
+ session, "Leaf page tree-depth (total %" PRIu64 "):", total));
for (i = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i)
- if (vs->depth_leaf[i] != 0)
+ if (vs->depth_leaf[i] != 0) {
WT_RET(__wt_msg(session,
- "\t%03zu: %u", i, vs->depth_leaf[i]));
-
+ "\t%03" WT_SIZET_FMT ": %" PRIu64,
+ i, vs->depth_leaf[i]));
+ vs->depth_leaf[i] = 0;
+ }
return (0);
}
@@ -200,9 +205,11 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
/* House-keeping between checkpoints. */
__verify_checkpoint_reset(vs);
- if (WT_VRFY_DUMP(vs))
+ if (WT_VRFY_DUMP(vs)) {
+ WT_ERR(__wt_msg(session, "%s", WT_DIVIDER));
WT_ERR(__wt_msg(session, "%s: checkpoint %s",
btree->dhandle->name, ckpt->name));
+ }
/* Load the checkpoint. */
WT_ERR(bm->checkpoint_load(bm, session,
@@ -234,8 +241,8 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(ret);
/* Display the tree shape. */
- if (vs->dump_shape)
- WT_ERR(__verify_tree_shape(session, vs));
+ if (vs->dump_layout)
+ WT_ERR(__verify_layout(session, vs));
}
done:
@@ -355,7 +362,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
if (vs->dump_blocks)
WT_RET(__wt_debug_disk(session, page->dsk, NULL));
if (vs->dump_pages)
- WT_RET(__wt_debug_page(session, page, NULL));
+ WT_RET(__wt_debug_page(session, ref, NULL));
#endif
/*
@@ -364,13 +371,11 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
*/
switch (page->type) {
case WT_PAGE_COL_FIX:
- recno = page->pg_fix_recno;
- goto recno_chk;
case WT_PAGE_COL_INT:
- recno = page->pg_intl_recno;
+ recno = ref->ref_recno;
goto recno_chk;
case WT_PAGE_COL_VAR:
- recno = page->pg_var_recno;
+ recno = ref->ref_recno;
recno_chk: if (recno != vs->record_total + 1)
WT_RET_MSG(session, WT_ERROR,
"page at %s has a starting record of %" PRIu64
@@ -485,7 +490,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
* reviewed to this point.
*/
++entry;
- if (child_ref->key.recno != vs->record_total + 1) {
+ if (child_ref->ref_recno != vs->record_total + 1) {
WT_RET_MSG(session, WT_ERROR,
"the starting record number in entry %"
PRIu32 " of the column internal page at "
@@ -494,7 +499,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
entry,
__wt_page_addr_string(
session, child_ref, vs->tmp1),
- child_ref->key.recno,
+ child_ref->ref_recno,
vs->record_total + 1);
}
diff --git a/src/btree/bt_vrfy_dsk.c b/src/btree/bt_vrfy_dsk.c
index 5480a25b5ec..3a6fd8261ba 100644
--- a/src/btree/bt_vrfy_dsk.c
+++ b/src/btree/bt_vrfy_dsk.c
@@ -298,14 +298,11 @@ __verify_dsk_row(
case WT_CELL_ADDR_LEAF_NO:
case WT_CELL_KEY_OVFL:
case WT_CELL_VALUE_OVFL:
- ret = bm->addr_invalid(
- bm, session, unpack->data, unpack->size);
- WT_RET_ERROR_OK(ret, EINVAL);
- if (ret == EINVAL) {
+ if ((ret = bm->addr_invalid(
+ bm, session, unpack->data, unpack->size)) == EINVAL)
ret = __err_cell_corrupt_or_eof(
session, cell_num, tag);
- goto err;
- }
+ WT_ERR(ret);
break;
}
diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c
index fd60b12538a..a7920da5267 100644
--- a/src/btree/col_modify.c
+++ b/src/btree/col_modify.c
@@ -55,7 +55,8 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
*/
if (recno == WT_RECNO_OOB ||
recno > (btree->type == BTREE_COL_VAR ?
- __col_var_last_recno(page) : __col_fix_last_recno(page)))
+ __col_var_last_recno(cbt->ref) :
+ __col_fix_last_recno(cbt->ref)))
append = true;
}
@@ -107,17 +108,17 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
/* Allocate the append/update list reference as necessary. */
if (append) {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, mod->mod_append, ins_headp, 1);
- ins_headp = &mod->mod_append[0];
+ page, mod->mod_col_append, ins_headp, 1);
+ ins_headp = &mod->mod_col_append[0];
} else if (page->type == WT_PAGE_COL_FIX) {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, mod->mod_update, ins_headp, 1);
- ins_headp = &mod->mod_update[0];
+ page, mod->mod_col_update, ins_headp, 1);
+ ins_headp = &mod->mod_col_update[0];
} else {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, mod->mod_update, ins_headp,
+ page, mod->mod_col_update, ins_headp,
page->pg_var_entries);
- ins_headp = &mod->mod_update[cbt->slot];
+ ins_headp = &mod->mod_col_update[cbt->slot];
}
/* Allocate the WT_INSERT_HEAD structure as necessary. */
@@ -142,8 +143,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
* it's easy (as opposed to in row-store) and a difficult bug to
* otherwise diagnose.
*/
- WT_ASSERT(session, mod->mod_split_recno == WT_RECNO_OOB ||
- (recno != WT_RECNO_OOB && mod->mod_split_recno > recno));
+ WT_ASSERT(session, mod->mod_col_split_recno == WT_RECNO_OOB ||
+ (recno != WT_RECNO_OOB &&
+ mod->mod_col_split_recno > recno));
if (upd_arg == NULL) {
WT_ERR(
diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c
index 4730267a545..6c96181d3bf 100644
--- a/src/btree/col_srch.c
+++ b/src/btree/col_srch.c
@@ -30,7 +30,7 @@ __check_leaf_key_range(WT_SESSION_IMPL *session,
* Check if the search key is smaller than the parent's starting key for
* this page.
*/
- if (recno < leaf->key.recno) {
+ if (recno < leaf->ref_recno) {
cbt->compare = 1; /* page keys > search key */
return (0);
}
@@ -48,7 +48,7 @@ __check_leaf_key_range(WT_SESSION_IMPL *session,
WT_INTL_INDEX_GET(session, leaf->home, pindex);
indx = leaf->pindex_hint;
if (indx + 1 < pindex->entries && pindex->index[indx] == leaf)
- if (recno >= pindex->index[indx + 1]->key.recno) {
+ if (recno >= pindex->index[indx + 1]->ref_recno) {
cbt->compare = -1; /* page keys < search key */
return (0);
}
@@ -133,14 +133,12 @@ restart: /*
if (page->type != WT_PAGE_COL_INT)
break;
- WT_ASSERT(session, current->key.recno == page->pg_intl_recno);
-
WT_INTL_INDEX_GET(session, page, pindex);
base = pindex->entries;
descent = pindex->index[base - 1];
/* Fast path appends. */
- if (recno >= descent->key.recno) {
+ if (recno >= descent->ref_recno) {
/*
* If on the last slot (the key is larger than any key
* on the page), check for an internal page split race.
@@ -158,9 +156,9 @@ restart: /*
indx = base + (limit >> 1);
descent = pindex->index[indx];
- if (recno == descent->key.recno)
+ if (recno == descent->ref_recno)
break;
- if (recno < descent->key.recno)
+ if (recno < descent->ref_recno)
continue;
base = indx + 1;
--limit;
@@ -172,7 +170,7 @@ descend: /*
* (last + 1) index. The slot for descent is the one before
* base.
*/
- if (recno != descent->key.recno) {
+ if (recno != descent->ref_recno) {
/*
* We don't have to correct for base == 0 because the
* only way for base to be 0 is if recno is the page's
@@ -237,13 +235,13 @@ leaf_only:
* do in that case, the record may be appended to the page.
*/
if (page->type == WT_PAGE_COL_FIX) {
- if (recno < page->pg_fix_recno) {
- cbt->recno = page->pg_fix_recno;
+ if (recno < current->ref_recno) {
+ cbt->recno = current->ref_recno;
cbt->compare = 1;
return (0);
}
- if (recno >= page->pg_fix_recno + page->pg_fix_entries) {
- cbt->recno = page->pg_fix_recno + page->pg_fix_entries;
+ if (recno >= current->ref_recno + page->pg_fix_entries) {
+ cbt->recno = current->ref_recno + page->pg_fix_entries;
goto past_end;
} else {
cbt->recno = recno;
@@ -251,14 +249,14 @@ leaf_only:
ins_head = WT_COL_UPDATE_SINGLE(page);
}
} else {
- if (recno < page->pg_var_recno) {
- cbt->recno = page->pg_var_recno;
+ if (recno < current->ref_recno) {
+ cbt->recno = current->ref_recno;
cbt->slot = 0;
cbt->compare = 1;
return (0);
}
- if ((cip = __col_var_search(page, recno, NULL)) == NULL) {
- cbt->recno = __col_var_last_recno(page);
+ if ((cip = __col_var_search(current, recno, NULL)) == NULL) {
+ cbt->recno = __col_var_last_recno(current);
cbt->slot = page->pg_var_entries == 0 ?
0 : page->pg_var_entries - 1;
goto past_end;
diff --git a/src/btree/row_key.c b/src/btree/row_key.c
index 9fff092d079..83fd2dad9e4 100644
--- a/src/btree/row_key.c
+++ b/src/btree/row_key.c
@@ -517,7 +517,7 @@ __wt_row_ikey(WT_SESSION_IMPL *session,
{
uintptr_t oldv;
- oldv = (uintptr_t)ref->key.ikey;
+ oldv = (uintptr_t)ref->ref_ikey;
WT_DIAGNOSTIC_YIELD;
/*
@@ -527,10 +527,10 @@ __wt_row_ikey(WT_SESSION_IMPL *session,
WT_ASSERT(session, oldv == 0 || (oldv & WT_IK_FLAG) != 0);
WT_ASSERT(session, ref->state != WT_REF_SPLIT);
WT_ASSERT(session,
- __wt_atomic_cas_ptr(&ref->key.ikey, (WT_IKEY *)oldv, ikey));
+ __wt_atomic_cas_ptr(&ref->ref_ikey, (WT_IKEY *)oldv, ikey));
}
#else
- ref->key.ikey = ikey;
+ ref->ref_ikey = ikey;
#endif
return (0);
}
diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c
index 176016bb340..f0424ff93b4 100644
--- a/src/btree/row_modify.c
+++ b/src/btree/row_modify.c
@@ -53,6 +53,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_INSERT *ins;
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
WT_UPDATE *old_upd, *upd, **upd_entry;
size_t ins_size, upd_size;
uint32_t ins_slot;
@@ -70,6 +71,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
/* If we don't yet have a modify structure, we'll need one. */
WT_RET(__wt_page_modify_init(session, page));
+ mod = page->modify;
/*
* Modify: allocate an update array as necessary, build a WT_UPDATE
@@ -83,11 +85,12 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
if (cbt->compare == 0) {
if (cbt->ins == NULL) {
/* Allocate an update array as necessary. */
- WT_PAGE_ALLOC_AND_SWAP(session, page,
- page->pg_row_upd, upd_entry, page->pg_row_entries);
+ WT_PAGE_ALLOC_AND_SWAP(session,
+ page, mod->mod_row_update,
+ upd_entry, page->pg_row_entries);
/* Set the WT_UPDATE array reference. */
- upd_entry = &page->pg_row_upd[cbt->slot];
+ upd_entry = &mod->mod_row_update[cbt->slot];
} else
upd_entry = &cbt->ins->upd;
@@ -144,11 +147,11 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
* slot. That's hard, so we set a flag.
*/
WT_PAGE_ALLOC_AND_SWAP(session, page,
- page->pg_row_ins, ins_headp, page->pg_row_entries + 1);
+ mod->mod_row_insert, ins_headp, page->pg_row_entries + 1);
ins_slot = F_ISSET(cbt, WT_CBT_SEARCH_SMALLEST) ?
page->pg_row_entries: cbt->slot;
- ins_headp = &page->pg_row_ins[ins_slot];
+ ins_headp = &mod->mod_row_insert[ins_slot];
/* Allocate the WT_INSERT_HEAD structure as necessary. */
WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1);
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index 6169a0a810a..4afcd74520f 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -634,14 +634,16 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
WT_INSERT *ins, **start, **stop;
WT_INSERT_HEAD *ins_head;
WT_PAGE *page;
+ uint64_t samples;
uint32_t choice, entries, i;
int level;
page = cbt->ref->page;
-
start = stop = NULL; /* [-Wconditional-uninitialized] */
entries = 0; /* [-Wconditional-uninitialized] */
+ __cursor_pos_clear(cbt);
+
/* If the page has disk-based entries, select from them. */
if (page->pg_row_entries != 0) {
cbt->compare = 0;
@@ -688,7 +690,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* Step down the skip list levels, selecting a random chunk of the name
* space at each level.
*/
- while (level > 0) {
+ for (samples = entries; level > 0; samples += entries) {
/*
* There are (entries) or (entries + 1) chunks of the name space
* considered at each level. They are: between start and the 1st
@@ -765,6 +767,16 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
cbt->ins_head = ins_head;
cbt->compare = 0;
+ /*
+ * Random lookups in newly created collections can be slow if a page
+ * consists of a large skiplist. Schedule the page for eviction if we
+ * encounter a large skiplist. This worthwhile because applications
+ * that take a sample often take many samples, so the overhead of
+ * traversing the skip list each time accumulates to real time.
+ */
+ if (samples > 5000)
+ __wt_page_evict_soon(page);
+
return (0);
}
@@ -784,8 +796,6 @@ __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
btree = S2BT(session);
current = NULL;
- __cursor_pos_clear(cbt);
-
if (0) {
restart: /*
* Discard the currently held page and restart the search from
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index fd541458fa8..27c2900fa98 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -42,6 +42,17 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
WT_STAT_SET(session, cstats, cache_lookaside_insert, v);
v = WT_STAT_READ(dstats, cursor_remove);
WT_STAT_SET(session, cstats, cache_lookaside_remove, v);
+ /*
+ * If we're clearing stats we need to clear the cursor values we just
+ * read. This does not clear the rest of the statistics in the
+ * lookaside data source stat cursor, but we own that namespace so we
+ * don't have to worry about users seeing inconsistent data source
+ * information.
+ */
+ if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_CLEAR)) {
+ WT_STAT_SET(session, dstats, cursor_insert, 0);
+ WT_STAT_SET(session, dstats, cursor_remove, 0);
+ }
}
/*
diff --git a/src/support/cksum.c b/src/checksum/checksum.c
index 0b086753406..b6a76dacfd8 100644
--- a/src/support/cksum.c
+++ b/src/checksum/checksum.c
@@ -1103,6 +1103,7 @@ static const uint32_t g_crc_slicing[8][256] = {
#endif
};
+#if !defined(__powerpc64__)
/*
* __wt_cksum_sw --
* Return a checksum for a chunk of memory, computed in software.
@@ -1171,6 +1172,7 @@ __wt_cksum_sw(const void *chunk, size_t len)
#endif
return (~crc);
}
+#endif
#if (defined(__amd64) || defined(__x86_64))
/*
diff --git a/src/support/power8/LICENSE.TXT b/src/checksum/power8/LICENSE.TXT
index 2f4bb91f574..2f4bb91f574 100644
--- a/src/support/power8/LICENSE.TXT
+++ b/src/checksum/power8/LICENSE.TXT
diff --git a/src/support/power8/README.md b/src/checksum/power8/README.md
index 3e2976650cd..3e2976650cd 100644
--- a/src/support/power8/README.md
+++ b/src/checksum/power8/README.md
diff --git a/src/support/power8/crc32.S b/src/checksum/power8/crc32.S
index f990acb7b12..0b7870668b5 100644
--- a/src/support/power8/crc32.S
+++ b/src/checksum/power8/crc32.S
@@ -773,6 +773,6 @@ FUNC_END(__crc32_vpmsum)
/*
* Make sure the stack isn't executable with GCC (regardless of platform).
*/
-#ifndef __clang__
+#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif
diff --git a/src/support/power8/crc32_constants.h b/src/checksum/power8/crc32_constants.h
index 02c471d1c56..02c471d1c56 100644
--- a/src/support/power8/crc32_constants.h
+++ b/src/checksum/power8/crc32_constants.h
diff --git a/src/support/power8/crc32_wrapper.c b/src/checksum/power8/crc32_wrapper.c
index 34ac4150338..62bd3e64f5c 100644
--- a/src/support/power8/crc32_wrapper.c
+++ b/src/checksum/power8/crc32_wrapper.c
@@ -2,7 +2,7 @@
#define CRC_TABLE
#include "crc32_constants.h"
-#define VMX_ALIGN 16
+#define VMX_ALIGN 16U
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
#ifdef REFLECT
@@ -26,6 +26,9 @@ static unsigned int crc32_align(unsigned int crc, unsigned char *p,
unsigned int __crc32_vpmsum(unsigned int crc, unsigned char *p,
unsigned long len);
+/* -Werror=missing-prototypes */
+unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
+ unsigned long len);
unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
unsigned long len)
{
diff --git a/src/support/power8/ppc-opcode.h b/src/checksum/power8/ppc-opcode.h
index b63feea60a0..b63feea60a0 100644
--- a/src/support/power8/ppc-opcode.h
+++ b/src/checksum/power8/ppc-opcode.h
diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c
index 27bd6255a0a..591d22284f5 100644
--- a/src/config/config_collapse.c
+++ b/src/config/config_collapse.c
@@ -38,6 +38,8 @@ __wt_config_collapse(
WT_DECL_ITEM(tmp);
WT_DECL_RET;
+ *config_ret = NULL;
+
WT_RET(__wt_scr_alloc(session, 0, &tmp));
WT_ERR(__wt_config_init(session, &cparser, cfg[0]));
@@ -59,6 +61,8 @@ __wt_config_collapse(
WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,",
(int)k.len, k.str, (int)v.len, v.str));
}
+
+ /* We loop until error, and the expected error is WT_NOTFOUND. */
if (ret != WT_NOTFOUND)
goto err;
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 4b601fbc53a..1b656c5a0aa 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -17,6 +17,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_close[] = {
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_load_extension[] = {
{ "config", "string", NULL, NULL, NULL, 0 },
+ { "early_load", "boolean", NULL, NULL, NULL, 0 },
{ "entry", "string", NULL, NULL, NULL, 0 },
{ "terminate", "string", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
@@ -305,6 +306,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_join[] = {
NULL, "choices=[\"eq\",\"ge\",\"gt\",\"le\",\"lt\"]",
NULL, 0 },
{ "count", "int", NULL, NULL, NULL, 0 },
+ { "operation", "string",
+ NULL, "choices=[\"and\",\"or\"]",
+ NULL, 0 },
{ "strategy", "string",
NULL, "choices=[\"bloom\",\"default\"]",
NULL, 0 },
@@ -376,9 +380,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_transaction_sync[] = {
static const WT_CONFIG_CHECK confchk_WT_SESSION_verify[] = {
{ "dump_address", "boolean", NULL, NULL, NULL, 0 },
{ "dump_blocks", "boolean", NULL, NULL, NULL, 0 },
+ { "dump_layout", "boolean", NULL, NULL, NULL, 0 },
{ "dump_offsets", "list", NULL, NULL, NULL, 0 },
{ "dump_pages", "boolean", NULL, NULL, NULL, 0 },
- { "dump_shape", "boolean", NULL, NULL, NULL, 0 },
{ "strict", "boolean", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -956,9 +960,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_WT_CONNECTION_close, 1
},
{ "WT_CONNECTION.load_extension",
- "config=,entry=wiredtiger_extension_init,"
+ "config=,early_load=0,entry=wiredtiger_extension_init,"
"terminate=wiredtiger_extension_terminate",
- confchk_WT_CONNECTION_load_extension, 3
+ confchk_WT_CONNECTION_load_extension, 4
},
{ "WT_CONNECTION.open_session",
"isolation=read-committed",
@@ -972,7 +976,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95"
",file_manager=(close_handle_minimum=250,close_idle_time=30,"
"close_scan_interval=10),log=(archive=,compressor=,enabled=0,"
- "file_max=100MB,path=,prealloc=,recover=on,zero_fill=0),"
+ "file_max=100MB,path=\".\",prealloc=,recover=on,zero_fill=0),"
"lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,"
"shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
"statistics=none,statistics_log=(json=0,on_close=0,"
@@ -980,6 +984,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=",
confchk_WT_CONNECTION_reconfigure, 18
},
+ { "WT_CONNECTION.set_file_system",
+ "",
+ NULL, 0
+ },
{ "WT_CURSOR.close",
"",
NULL, 0
@@ -1032,8 +1040,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "WT_SESSION.join",
"bloom_bit_count=16,bloom_hash_count=8,compare=\"eq\",count=,"
- "strategy=",
- confchk_WT_SESSION_join, 5
+ "operation=\"and\",strategy=",
+ confchk_WT_SESSION_join, 6
},
{ "WT_SESSION.log_flush",
"sync=on",
@@ -1094,8 +1102,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
NULL, 0
},
{ "WT_SESSION.verify",
- "dump_address=0,dump_blocks=0,dump_offsets=,dump_pages=0,"
- "dump_shape=0,strict=0",
+ "dump_address=0,dump_blocks=0,dump_layout=0,dump_offsets=,"
+ "dump_pages=0,strict=0",
confchk_WT_SESSION_verify, 6
},
{ "colgroup.meta",
@@ -1169,14 +1177,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"file_extend=,file_manager=(close_handle_minimum=250,"
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB,"
- "path=,prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
- "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
- "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
- ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
- "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "transaction_sync=(enabled=0,method=fsync),use_environment=,"
- "use_environment_priv=0,verbose=,write_through=",
+ "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge="
+ ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0"
+ ",session_max=100,session_scratch_max=2MB,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=0,on_close=0,"
+ "path=\"WiredTigerStat.%d.%H\",sources=,"
+ "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
+ ",method=fsync),use_environment=,use_environment_priv=0,verbose=,"
+ "write_through=",
confchk_wiredtiger_open, 38
},
{ "wiredtiger_open_all",
@@ -1190,15 +1199,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"file_extend=,file_manager=(close_handle_minimum=250,"
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB,"
- "path=,prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
- "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
- "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
- ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
- "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "transaction_sync=(enabled=0,method=fsync),use_environment=,"
- "use_environment_priv=0,verbose=,version=(major=0,minor=0),"
- "write_through=",
+ "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge="
+ ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0"
+ ",session_max=100,session_scratch_max=2MB,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=0,on_close=0,"
+ "path=\"WiredTigerStat.%d.%H\",sources=,"
+ "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
+ ",method=fsync),use_environment=,use_environment_priv=0,verbose=,"
+ "version=(major=0,minor=0),write_through=",
confchk_wiredtiger_open_all, 39
},
{ "wiredtiger_open_basecfg",
@@ -1210,7 +1219,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "log=(archive=,compressor=,enabled=0,file_max=100MB,path=,"
+ "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
"prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
"worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
"session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
@@ -1230,7 +1239,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "log=(archive=,compressor=,enabled=0,file_max=100MB,path=,"
+ "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
"prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
"worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
"session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
diff --git a/src/conn/api_strerror.c b/src/conn/api_strerror.c
index 87864f7f4b0..edb11957556 100644
--- a/src/conn/api_strerror.c
+++ b/src/conn/api_strerror.c
@@ -40,8 +40,6 @@ __wt_wiredtiger_error(int error)
return ("WT_RUN_RECOVERY: recovery must be run to continue");
case WT_CACHE_FULL:
return ("WT_CACHE_FULL: operation would overflow cache");
- case WT_PERM_DENIED:
- return ("WT_PERM_DENIED: permission denied (internal)");
}
/*
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 9e2f03da21f..98267eeeb2c 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -751,6 +751,7 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
conn->extension_api.err_printf = __wt_ext_err_printf;
conn->extension_api.msg_printf = __wt_ext_msg_printf;
conn->extension_api.strerror = __wt_ext_strerror;
+ conn->extension_api.map_windows_error = __wt_ext_map_windows_error;
conn->extension_api.scr_alloc = __wt_ext_scr_alloc;
conn->extension_api.scr_free = __wt_ext_scr_free;
conn->extension_api.collator_config = ext_collator_config;
@@ -806,6 +807,7 @@ static int
__conn_load_default_extensions(WT_CONNECTION_IMPL *conn)
{
WT_UNUSED(conn);
+
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
WT_RET(snappy_extension_init(&conn->iface, NULL));
#endif
@@ -819,18 +821,16 @@ __conn_load_default_extensions(WT_CONNECTION_IMPL *conn)
}
/*
- * __conn_load_extension --
- * WT_CONNECTION->load_extension method.
+ * __conn_load_extension_int --
+ * Internal extension load interface
*/
static int
-__conn_load_extension(
- WT_CONNECTION *wt_conn, const char *path, const char *config)
+__conn_load_extension_int(WT_SESSION_IMPL *session,
+ const char *path, const char *cfg[], bool early_load)
{
WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_DLH *dlh;
- WT_SESSION_IMPL *session;
int (*load)(WT_CONNECTION *, WT_CONFIG_ARG *);
bool is_local;
const char *init_name, *terminate_name;
@@ -839,8 +839,10 @@ __conn_load_extension(
init_name = terminate_name = NULL;
is_local = strcmp(path, "local") == 0;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, load_extension, config, cfg);
+ /* Ensure that the load matches the phase of startup we are in. */
+ WT_ERR(__wt_config_gets(session, cfg, "early_load", &cval));
+ if ((cval.val == 0 && early_load) || (cval.val != 0 && !early_load))
+ return (0);
/*
* This assumes the underlying shared libraries are reference counted,
@@ -865,20 +867,39 @@ __conn_load_extension(
__wt_dlsym(session, dlh, terminate_name, false, &dlh->terminate));
/* Call the load function last, it simplifies error handling. */
- WT_ERR(load(wt_conn, (WT_CONFIG_ARG *)cfg));
+ WT_ERR(load(&S2C(session)->iface, (WT_CONFIG_ARG *)cfg));
/* Link onto the environment's list of open libraries. */
- __wt_spin_lock(session, &conn->api_lock);
- TAILQ_INSERT_TAIL(&conn->dlhqh, dlh, q);
- __wt_spin_unlock(session, &conn->api_lock);
+ __wt_spin_lock(session, &S2C(session)->api_lock);
+ TAILQ_INSERT_TAIL(&S2C(session)->dlhqh, dlh, q);
+ __wt_spin_unlock(session, &S2C(session)->api_lock);
dlh = NULL;
err: if (dlh != NULL)
WT_TRET(__wt_dlclose(session, dlh));
__wt_free(session, init_name);
__wt_free(session, terminate_name);
+ return (ret);
+}
- API_END_RET_NOTFOUND_MAP(session, ret);
+/*
+ * __conn_load_extension --
+ * WT_CONNECTION->load_extension method.
+ */
+static int
+__conn_load_extension(
+ WT_CONNECTION *wt_conn, const char *path, const char *config)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, load_extension, config, cfg);
+
+ ret = __conn_load_extension_int(session, path, cfg, false);
+
+err: API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
@@ -886,18 +907,16 @@ err: if (dlh != NULL)
* Load the list of application-configured extensions.
*/
static int
-__conn_load_extensions(WT_SESSION_IMPL *session, const char *cfg[])
+__conn_load_extensions(
+ WT_SESSION_IMPL *session, const char *cfg[], bool early_load)
{
WT_CONFIG subconfig;
WT_CONFIG_ITEM cval, skey, sval;
- WT_CONNECTION_IMPL *conn;
WT_DECL_ITEM(exconfig);
WT_DECL_ITEM(expath);
WT_DECL_RET;
-
- conn = S2C(session);
-
- WT_ERR(__conn_load_default_extensions(conn));
+ const char *sub_cfg[] = {
+ WT_CONFIG_BASE(session, WT_CONNECTION_load_extension), NULL, NULL };
WT_ERR(__wt_config_gets(session, cfg, "extensions", &cval));
WT_ERR(__wt_config_subinit(session, &subconfig, &cval));
@@ -912,8 +931,9 @@ __conn_load_extensions(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_buf_fmt(session,
exconfig, "%.*s", (int)sval.len, sval.str));
}
- WT_ERR(conn->iface.load_extension(&conn->iface,
- expath->data, (sval.len > 0) ? exconfig->data : NULL));
+ sub_cfg[1] = sval.len > 0 ? exconfig->data : NULL;
+ WT_ERR(__conn_load_extension_int(
+ session, expath->data, sub_cfg, early_load));
}
WT_ERR_NOTFOUND_OK(ret);
@@ -1192,13 +1212,12 @@ __conn_config_file(WT_SESSION_IMPL *session,
fh = NULL;
/* Configuration files are always optional. */
- WT_RET(__wt_exist(session, filename, &exist));
+ WT_RET(__wt_fs_exist(session, filename, &exist));
if (!exist)
return (0);
/* Open the configuration file. */
- WT_RET(__wt_open(
- session, filename, WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &fh));
+ WT_RET(__wt_open(session, filename, WT_OPEN_FILE_TYPE_REGULAR, 0, &fh));
WT_ERR(__wt_filesize(session, fh, &size));
if (size == 0)
goto err;
@@ -1280,7 +1299,8 @@ __conn_config_file(WT_SESSION_IMPL *session,
* the next character is a hash mark, skip to the next newline.
*/
for (;;) {
- for (*t++ = ','; --len > 0 && isspace(*++p);)
+ for (*t++ = ',';
+ --len > 0 && __wt_isspace((u_char)*++p);)
;
if (len == 0)
break;
@@ -1489,8 +1509,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
*/
exist = false;
if (!is_create)
- WT_ERR(__wt_exist(session, WT_WIREDTIGER, &exist));
- ret = __wt_open(session, WT_SINGLETHREAD, WT_FILE_TYPE_REGULAR,
+ WT_ERR(__wt_fs_exist(session, WT_WIREDTIGER, &exist));
+ ret = __wt_open(session, WT_SINGLETHREAD, WT_OPEN_FILE_TYPE_REGULAR,
is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh);
/*
@@ -1499,17 +1519,14 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
* if the file does not exist. If so, then ignore the error.
* XXX Ignoring the error does allow multiple read-only
* connections to exist at the same time on a read-only directory.
+ *
+ * If we got an expected permission or non-existence error then skip
+ * the byte lock.
*/
- if (F_ISSET(conn, WT_CONN_READONLY)) {
- /*
- * If we got an expected permission or non-existence error
- * then skip the byte lock.
- */
- ret = __wt_map_error_rdonly(ret);
- if (ret == WT_NOTFOUND || ret == WT_PERM_DENIED) {
- bytelock = false;
- ret = 0;
- }
+ if (F_ISSET(conn, WT_CONN_READONLY) &&
+ (ret == EACCES || ret == ENOENT)) {
+ bytelock = false;
+ ret = 0;
}
WT_ERR(ret);
if (bytelock) {
@@ -1546,22 +1563,19 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
/* We own the lock file, optionally create the WiredTiger file. */
ret = __wt_open(session, WT_WIREDTIGER,
- WT_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh);
+ WT_OPEN_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh);
/*
- * If we're read-only, check for success as well as handled errors.
- * Even if we're able to open the WiredTiger file successfully, we
- * do not try to lock it. The lock file test above is the only
- * one we do for read-only.
+ * If we're read-only, check for handled errors. Even if able to open
+ * the WiredTiger file successfully, we do not try to lock it. The
+ * lock file test above is the only one we do for read-only.
*/
if (F_ISSET(conn, WT_CONN_READONLY)) {
- ret = __wt_map_error_rdonly(ret);
- if (ret == 0 || ret == WT_NOTFOUND || ret == WT_PERM_DENIED)
+ if (ret == EACCES || ret == ENOENT)
ret = 0;
WT_ERR(ret);
} else {
WT_ERR(ret);
-
/*
* Lock the WiredTiger file (for backward compatibility reasons
* as described above). Immediately release the lock, it's
@@ -1583,13 +1597,14 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
* and there's never a database home after that point without a turtle
* file. If the turtle file doesn't exist, it's a create.
*/
- WT_ERR(__wt_exist(session, WT_METADATA_TURTLE, &exist));
+ WT_ERR(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist));
conn->is_new = exist ? 0 : 1;
if (conn->is_new) {
if (F_ISSET(conn, WT_CONN_READONLY))
- WT_ERR_MSG(session, EINVAL, "Creating a new database is"
- " incompatible with read-only configuration.");
+ WT_ERR_MSG(session, EINVAL,
+ "Creating a new database is incompatible with "
+ "read-only configuration");
len = (size_t)snprintf(buf, sizeof(buf),
"%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING);
WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf));
@@ -1754,14 +1769,14 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
static int
__conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_FH *fh;
+ WT_FSTREAM *fs;
WT_CONFIG parser;
WT_CONFIG_ITEM cval, k, v;
WT_DECL_RET;
bool exist;
const char *base_config;
- fh = NULL;
+ fs = NULL;
base_config = NULL;
/*
@@ -1789,15 +1804,14 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
* only NOT exist if we crashed before it was created; in other words,
* if the base configuration file exists, we're done.
*/
- WT_RET(__wt_exist(session, WT_BASECONFIG, &exist));
+ WT_RET(__wt_fs_exist(session, WT_BASECONFIG, &exist));
if (exist)
return (0);
- WT_RET(__wt_open(session,
- WT_BASECONFIG_SET, WT_FILE_TYPE_REGULAR,
- WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE | WT_STREAM_WRITE, &fh));
+ WT_RET(__wt_fopen(session, WT_BASECONFIG_SET,
+ WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
- WT_ERR(__wt_fprintf(session, fh, "%s\n\n",
+ WT_ERR(__wt_fprintf(session, fs, "%s\n\n",
"# Do not modify this file.\n"
"#\n"
"# WiredTiger created this file when the database was created,\n"
@@ -1844,18 +1858,18 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
--v.str;
v.len += 2;
}
- WT_ERR(__wt_fprintf(session, fh,
+ WT_ERR(__wt_fprintf(session, fs,
"%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str));
}
WT_ERR_NOTFOUND_OK(ret);
- /* Flush the handle and rename the file into place. */
- ret = __wt_sync_handle_and_rename(
- session, &fh, WT_BASECONFIG_SET, WT_BASECONFIG);
+ /* Flush the stream and rename the file into place. */
+ ret = __wt_sync_and_rename(
+ session, &fs, WT_BASECONFIG_SET, WT_BASECONFIG);
if (0) {
/* Close open file handle, remove any temporary file. */
-err: WT_TRET(__wt_close(session, &fh));
+err: WT_TRET(__wt_fclose(session, &fs));
WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET));
}
@@ -1865,6 +1879,57 @@ err: WT_TRET(__wt_close(session, &fh));
}
/*
+ * __conn_set_file_system --
+ * Configure a custom file system implementation on database open.
+ */
+static int
+__conn_set_file_system(
+ WT_CONNECTION *wt_conn, WT_FILE_SYSTEM *file_system, const char *config)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, set_file_system, config, cfg);
+ WT_UNUSED(cfg);
+
+ conn->file_system = file_system;
+
+err: API_END_RET(session, ret);
+}
+
+/*
+ * __conn_chk_file_system --
+ * Check the configured file system.
+ */
+static int
+__conn_chk_file_system(WT_SESSION_IMPL *session, bool readonly)
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+#define WT_CONN_SET_FILE_SYSTEM_REQ(name) \
+ if (conn->file_system->name == NULL) \
+ WT_RET_MSG(session, EINVAL, \
+ "a WT_FILE_SYSTEM.%s method must be configured", #name)
+
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_directory_list);
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_directory_list_free);
+ /* not required: directory_sync */
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_exist);
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_open_file);
+ if (!readonly) {
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_remove);
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_rename);
+ }
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_size);
+
+ return (0);
+}
+
+/*
* wiredtiger_open --
* Main library entry point: open a new connection to a WiredTiger
* database.
@@ -1888,12 +1953,13 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
__conn_add_compressor,
__conn_add_encryptor,
__conn_add_extractor,
+ __conn_set_file_system,
__conn_get_extension_api
};
static const WT_NAME_FLAG file_types[] = {
- { "checkpoint", WT_FILE_TYPE_CHECKPOINT },
- { "data", WT_FILE_TYPE_DATA },
- { "log", WT_FILE_TYPE_LOG },
+ { "checkpoint", WT_DIRECT_IO_CHECKPOINT },
+ { "data", WT_DIRECT_IO_DATA },
+ { "log", WT_DIRECT_IO_LOG },
{ NULL, 0 }
};
@@ -1942,6 +2008,14 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
WT_ERR(__wt_os_stdio(session));
__wt_event_handler_set(session, event_handler);
+ /*
+ * Set the default session's strerror method. If one of the extensions
+ * being loaded reports an error via the WT_EXTENSION_API strerror
+ * method, but doesn't supply that method a WT_SESSION handle, we'll
+ * use the WT_CONNECTION_IMPL's default session and its strerror method.
+ */
+ conn->default_session->iface.strerror = __wt_session_strerror;
+
/* Basic initialization of the connection structure. */
WT_ERR(__wt_connection_init(conn));
@@ -1983,10 +2057,27 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
F_SET(conn, WT_CONN_READONLY);
/*
- * After checking readonly and in-memory, but before we do anything that
- * touches the filesystem, configure the OS layer.
+ * Load early extensions before doing further initialization (one early
+ * extension is to configure a file system).
+ */
+ WT_ERR(__conn_load_extensions(session, cfg, true));
+
+ /*
+ * If the application didn't configure its own file system, configure
+ * one of ours. Check to ensure we have a valid file system.
*/
- WT_ERR(__wt_os_init(session));
+ if (conn->file_system == NULL) {
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY))
+ WT_ERR(__wt_os_inmemory(session));
+ else
+#if defined(_MSC_VER)
+ WT_ERR(__wt_os_win(session));
+#else
+ WT_ERR(__wt_os_posix(session));
+#endif
+ }
+ WT_ERR(
+ __conn_chk_file_system(session, F_ISSET(conn, WT_CONN_READONLY)));
/*
* Capture the config_base setting file for later use. Again, if the
@@ -2036,7 +2127,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
__conn_config_append(cfg, version);
/* Ignore the base_config file if config_base_set is false. */
- if (config_base_set || F_ISSET(conn, WT_CONN_READONLY))
+ if (config_base_set)
WT_ERR(
__conn_config_file(session, WT_BASECONFIG, false, cfg, i1));
__conn_config_append(cfg, config);
@@ -2119,8 +2210,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
if (ret == 0) {
if (sval.val)
FLD_SET(conn->direct_io, ft->flag);
- } else if (ret != WT_NOTFOUND)
- goto err;
+ } else
+ WT_ERR_NOTFOUND_OK(ret);
}
WT_ERR(__wt_config_gets(session, cfg, "write_through", &cval));
@@ -2129,8 +2220,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
if (ret == 0) {
if (sval.val)
FLD_SET(conn->write_through, ft->flag);
- } else if (ret != WT_NOTFOUND)
- goto err;
+ } else
+ WT_ERR_NOTFOUND_OK(ret);
}
/*
@@ -2154,15 +2245,15 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
ret = __wt_config_subgets(session, &cval, ft->name, &sval);
if (ret == 0) {
switch (ft->flag) {
- case WT_FILE_TYPE_DATA:
+ case WT_DIRECT_IO_DATA:
conn->data_extend_len = sval.val;
break;
- case WT_FILE_TYPE_LOG:
+ case WT_DIRECT_IO_LOG:
conn->log_extend_len = sval.val;
break;
}
- } else if (ret != WT_NOTFOUND)
- goto err;
+ } else
+ WT_ERR_NOTFOUND_OK(ret);
}
WT_ERR(__wt_config_gets(session, cfg, "mmap", &cval));
@@ -2191,7 +2282,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
* everything else to be in place, and the extensions call back into the
* library.
*/
- WT_ERR(__conn_load_extensions(session, cfg));
+ WT_ERR(__conn_load_default_extensions(conn));
+ WT_ERR(__conn_load_extensions(session, cfg, false));
/*
* The metadata/log encryptor is configured after extensions, since
@@ -2234,7 +2326,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
*/
WT_ERR(__wt_turtle_init(session));
- __wt_metadata_init(session);
WT_ERR(__wt_metadata_cursor(session, NULL));
/* Start the worker threads and run recovery. */
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 9dfd1cdcbfa..9f15db5382b 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -127,6 +127,7 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ int i;
conn = S2C(session);
@@ -157,13 +158,23 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
false, 10000, WT_MILLION, &cache->evict_cond));
WT_ERR(__wt_cond_alloc(session,
"eviction waiters", false, &cache->evict_waiter_cond));
- WT_ERR(__wt_spin_init(session, &cache->evict_lock, "cache eviction"));
+ WT_ERR(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass"));
+ WT_ERR(__wt_spin_init(session,
+ &cache->evict_queue_lock, "cache eviction queue"));
WT_ERR(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
+ if ((ret = __wt_open_internal_session(conn, "evict pass",
+ false, WT_SESSION_NO_DATA_HANDLES, &cache->walk_session)) != 0)
+ WT_ERR_MSG(NULL, ret,
+ "Failed to create session for eviction walks");
/* Allocate the LRU eviction queue. */
cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR;
- WT_ERR(__wt_calloc_def(session,
- cache->evict_slots, &cache->evict_queue));
+ for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
+ WT_ERR(__wt_calloc_def(session,
+ cache->evict_slots, &cache->evict_queues[i].evict_queue));
+ WT_ERR(__wt_spin_init(session,
+ &cache->evict_queues[i].evict_lock, "cache eviction"));
+ }
/*
* We get/set some values in the cache statistics (rather than have
@@ -237,6 +248,8 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_SESSION *wt_session;
+ int i;
conn = S2C(session);
cache = conn->cache;
@@ -262,10 +275,17 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond));
WT_TRET(__wt_cond_destroy(session, &cache->evict_waiter_cond));
- __wt_spin_destroy(session, &cache->evict_lock);
+ __wt_spin_destroy(session, &cache->evict_pass_lock);
+ __wt_spin_destroy(session, &cache->evict_queue_lock);
__wt_spin_destroy(session, &cache->evict_walk_lock);
+ wt_session = &cache->walk_session->iface;
+ if (wt_session != NULL)
+ WT_TRET(wt_session->close(wt_session, NULL));
- __wt_free(session, cache->evict_queue);
+ for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
+ __wt_spin_destroy(session, &cache->evict_queues[i].evict_lock);
+ __wt_free(session, cache->evict_queues[i].evict_queue);
+ }
__wt_free(session, conn->cache);
return (ret);
}
diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c
index 72f23b015b7..75ecb6b3b4a 100644
--- a/src/conn/conn_cache_pool.c
+++ b/src/conn/conn_cache_pool.c
@@ -58,7 +58,6 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
created = updating = false;
pool_name = NULL;
cp = NULL;
- size = 0;
if (F_ISSET(conn, WT_CONN_CACHE_POOL))
updating = true;
@@ -310,6 +309,7 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session)
if (!F_ISSET(conn, WT_CONN_CACHE_POOL))
return (0);
+ F_CLR(conn, WT_CONN_CACHE_POOL);
__wt_spin_lock(session, &cp->cache_pool_lock);
cp_locked = true;
@@ -572,6 +572,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
cp = __wt_process.cache_pool;
grow = false;
pool_full = cp->currently_used >= cp->size;
+ pct_full = 0;
/* Highest as a percentage, avoid 0 */
highest_percentile = (highest / 100) + 1;
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 5019ab59fe3..08fb2b24468 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -39,6 +39,9 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session,
WT_BTREE *btree;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
+ uint64_t bucket;
+
+ *dhandlep = NULL;
WT_RET(__wt_calloc_one(session, &dhandle));
@@ -57,6 +60,16 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session,
__wt_stat_dsrc_init(dhandle);
+ if (strcmp(uri, WT_METAFILE_URI) == 0)
+ F_SET(dhandle, WT_DHANDLE_IS_METADATA);
+
+ /*
+ * Prepend the handle to the connection list, assuming we're likely to
+ * need new files again soon, until they are cached by all sessions.
+ */
+ bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket);
+
*dhandlep = dhandle;
return (0);
@@ -106,14 +119,6 @@ __wt_conn_dhandle_find(
WT_RET(__conn_dhandle_alloc(session, uri, checkpoint, &dhandle));
- /*
- * Prepend the handle to the connection list, assuming we're likely to
- * need new files again soon, until they are cached by all sessions.
- * Find the right hash bucket to insert into as well.
- */
- bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
- WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket);
-
session->dhandle = dhandle;
return (0);
}
@@ -158,7 +163,8 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force)
/*
* We may not be holding the schema lock, and threads may be walking
* the list of open handles (for example, checkpoint). Acquire the
- * handle's close lock.
+ * handle's close lock. We don't have the sweep server acquire the
+ * handle's rwlock so we have to prevent races through the close code.
*/
__wt_spin_lock(session, &dhandle->close_lock);
@@ -538,6 +544,7 @@ __wt_conn_dhandle_discard_single(
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
int tret;
+ bool set_pass_intr;
dhandle = session->dhandle;
@@ -556,12 +563,17 @@ __wt_conn_dhandle_discard_single(
* Kludge: interrupt the eviction server in case it is holding the
* handle list lock.
*/
- if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST))
- F_SET(S2C(session)->cache, WT_CACHE_CLEAR_WALKS);
+ set_pass_intr = false;
+ if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) {
+ set_pass_intr = true;
+ (void)__wt_atomic_add32(&S2C(session)->cache->pass_intr, 1);
+ }
/* Try to remove the handle, protected by the data handle lock. */
WT_WITH_HANDLE_LIST_LOCK(session,
tret = __conn_dhandle_remove(session, final));
+ if (set_pass_intr)
+ (void)__wt_atomic_sub32(&S2C(session)->cache->pass_intr, 1);
WT_TRET(tret);
/*
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index 5f4c38e7361..509966793e5 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -149,15 +149,17 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_spin_destroy(session, &conn->page_lock[i]);
__wt_free(session, conn->page_lock);
+ /* Destroy the file-system configuration. */
+ if (conn->file_system != NULL && conn->file_system->terminate != NULL)
+ WT_TRET(conn->file_system->terminate(
+ conn->file_system, (WT_SESSION *)session));
+
/* Free allocated memory. */
__wt_free(session, conn->cfg);
__wt_free(session, conn->home);
__wt_free(session, conn->error_prefix);
__wt_free(session, conn->sessions);
- /* Destroy the OS configuration. */
- WT_TRET(__wt_os_cleanup(session));
-
__wt_free(NULL, conn);
return (ret);
}
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index f83430735ef..1ae370ef2fa 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -178,6 +178,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
conn = S2C(session);
log = conn->log;
logcount = 0;
+ locked = false;
logfiles = NULL;
/*
@@ -198,14 +199,14 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
* Main archive code. Get the list of all log files and
* remove any earlier than the minimum log number.
*/
- WT_RET(__wt_dirlist(session, conn->log_path,
- WT_LOG_FILENAME, WT_DIRLIST_INCLUDE, &logfiles, &logcount));
+ WT_ERR(__wt_fs_directory_list(
+ session, conn->log_path, WT_LOG_FILENAME, &logfiles, &logcount));
/*
* We can only archive files if a hot backup is not in progress or
* if we are the backup.
*/
- WT_RET(__wt_readlock(session, conn->hot_backup_lock));
+ WT_ERR(__wt_readlock(session, conn->hot_backup_lock));
locked = true;
if (!conn->hot_backup || backup_file != 0) {
for (i = 0; i < logcount; i++) {
@@ -218,9 +219,6 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
}
WT_ERR(__wt_readunlock(session, conn->hot_backup_lock));
locked = false;
- __wt_log_files_free(session, logfiles, logcount);
- logfiles = NULL;
- logcount = 0;
/*
* Indicate what is our new earliest LSN. It is the start
@@ -232,8 +230,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
err: __wt_err(session, ret, "log archive server error");
if (locked)
WT_TRET(__wt_readunlock(session, conn->hot_backup_lock));
- if (logfiles != NULL)
- __wt_log_files_free(session, logfiles, logcount);
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
return (ret);
}
@@ -259,10 +256,9 @@ __log_prealloc_once(WT_SESSION_IMPL *session)
* Allocate up to the maximum number, accounting for any existing
* files that may not have been used yet.
*/
- WT_ERR(__wt_dirlist(session, conn->log_path,
- WT_LOG_PREPNAME, WT_DIRLIST_INCLUDE, &recfiles, &reccount));
- __wt_log_files_free(session, recfiles, reccount);
- recfiles = NULL;
+ WT_ERR(__wt_fs_directory_list(
+ session, conn->log_path, WT_LOG_PREPNAME, &recfiles, &reccount));
+
/*
* Adjust the number of files to pre-allocate if we find that
* the critical path had to allocate them since we last ran.
@@ -292,8 +288,7 @@ __log_prealloc_once(WT_SESSION_IMPL *session)
if (0)
err: __wt_err(session, ret, "log pre-alloc server error");
- if (recfiles != NULL)
- __wt_log_files_free(session, recfiles, reccount);
+ WT_TRET(__wt_fs_directory_list_free(session, &recfiles, reccount));
return (ret);
}
@@ -314,12 +309,15 @@ __wt_log_truncate_files(
WT_UNUSED(cfg);
conn = S2C(session);
- log = conn->log;
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ return (0);
if (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE))
WT_RET_MSG(session, EINVAL,
"Attempt to archive manually while a server is running");
+ log = conn->log;
+
backup_file = 0;
if (cursor != NULL)
backup_file = WT_CURSOR_BACKUP_ID(cursor);
@@ -327,6 +325,7 @@ __wt_log_truncate_files(
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"log_truncate_files: Archive once up to %" PRIu32,
backup_file));
+
WT_RET(__wt_writelock(session, log->log_archive_lock));
locked = true;
WT_ERR(__log_archive_once(session, backup_file));
@@ -677,7 +676,6 @@ __log_wrlsn_server(void *arg)
log = conn->log;
yield = 0;
WT_INIT_LSN(&prev);
- did_work = false;
while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
/*
* Write out any log record buffers if anything was done
@@ -692,10 +690,8 @@ __log_wrlsn_server(void *arg)
else
WT_STAT_FAST_CONN_INCR(session, log_write_lsn_skip);
prev = log->alloc_lsn;
- if (yield == 0)
- did_work = true;
- else
- did_work = false;
+ did_work = yield == 0;
+
/*
* If __wt_log_wrlsn did work we want to yield instead of sleep.
*/
@@ -865,9 +861,9 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
"log write LSN"));
WT_RET(__wt_rwlock_alloc(session,
&log->log_archive_lock, "log archive lock"));
- if (FLD_ISSET(conn->direct_io, WT_FILE_TYPE_LOG))
- log->allocsize =
- WT_MAX((uint32_t)conn->buffer_alignment, WT_LOG_ALIGN);
+ if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG))
+ log->allocsize = (uint32_t)
+ WT_MAX(conn->buffer_alignment, WT_LOG_ALIGN);
else
log->allocsize = WT_LOG_ALIGN;
WT_INIT_LSN(&log->alloc_lsn);
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index fccc4786402..855ff57808e 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -209,11 +209,11 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
}
if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) {
- WT_ERR(__wt_fprintf(session, conn->stat_fh,
+ WT_ERR(__wt_fprintf(session, conn->stat_fs,
"{\"version\":\"%s\",\"localTime\":\"%s\"",
WIREDTIGER_VERSION_STRING, conn->stat_stamp));
WT_ERR(__wt_fprintf(
- session, conn->stat_fh, ",\"wiredTiger\":{"));
+ session, conn->stat_fs, ",\"wiredTiger\":{"));
while ((ret = cursor->next(cursor)) == 0) {
WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val));
/* Check if we are starting a new section. */
@@ -225,23 +225,23 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
strncmp(desc, tmp->data, tmp->size) != 0) {
WT_ERR(__wt_buf_set(
session, tmp, desc, prefixlen));
- WT_ERR(__wt_fprintf(session, conn->stat_fh,
+ WT_ERR(__wt_fprintf(session, conn->stat_fs,
"%s\"%.*s\":{", first ? "" : "},",
(int)prefixlen, desc));
first = false;
groupfirst = true;
}
- WT_ERR(__wt_fprintf(session, conn->stat_fh,
+ WT_ERR(__wt_fprintf(session, conn->stat_fs,
"%s\"%s\":%" PRId64,
groupfirst ? "" : ",", endprefix + 2, val));
groupfirst = false;
}
WT_ERR_NOTFOUND_OK(ret);
- WT_ERR(__wt_fprintf(session, conn->stat_fh, "}}}\n"));
+ WT_ERR(__wt_fprintf(session, conn->stat_fs, "}}}\n"));
} else {
while ((ret = cursor->next(cursor)) == 0) {
WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val));
- WT_ERR(__wt_fprintf(session, conn->stat_fh,
+ WT_ERR(__wt_fprintf(session, conn->stat_fs,
"%s %" PRId64 " %s %s\n",
conn->stat_stamp, val, name, desc));
}
@@ -354,7 +354,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
struct tm *tm, _tm;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- WT_FH *log_file;
+ WT_FSTREAM *log_stream;
conn = S2C(session);
@@ -367,18 +367,16 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
WT_RET_MSG(session, ENOMEM, "strftime path conversion");
/* If the path has changed, cycle the log file. */
- if ((log_file = conn->stat_fh) == NULL ||
+ if ((log_stream = conn->stat_fs) == NULL ||
path == NULL || strcmp(tmp->mem, path->mem) != 0) {
- conn->stat_fh = NULL;
- WT_RET(__wt_close(session, &log_file));
+ WT_RET(__wt_fclose(session, &conn->stat_fs));
if (path != NULL)
(void)strcpy(path->mem, tmp->mem);
- WT_RET(__wt_open(session, tmp->mem,
- WT_FILE_TYPE_REGULAR,
- WT_OPEN_CREATE | WT_OPEN_FIXED | WT_STREAM_APPEND,
- &log_file));
+ WT_RET(__wt_fopen(session, tmp->mem,
+ WT_OPEN_CREATE | WT_OPEN_FIXED, WT_STREAM_APPEND,
+ &log_stream));
}
- conn->stat_fh = log_file;
+ conn->stat_fs = log_stream;
/* Create the entry prefix for this time of day. */
if (strftime(tmp->mem, tmp->memsize, conn->stat_format, tm) == 0)
@@ -411,7 +409,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
WT_RET(__statlog_lsm_apply(session));
/* Flush. */
- return (__wt_fsync(session, conn->stat_fh, true));
+ return (__wt_fflush(session, conn->stat_fs));
}
/*
@@ -597,7 +595,7 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close)
conn->stat_session = NULL;
conn->stat_tid_set = false;
conn->stat_format = NULL;
- WT_TRET(__wt_close(session, &conn->stat_fh));
+ WT_TRET(__wt_fclose(session, &conn->stat_fs));
conn->stat_path = NULL;
conn->stat_sources = NULL;
conn->stat_stamp = NULL;
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index 5be9b311a79..4ee23008687 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -10,7 +10,6 @@
static int __backup_all(WT_SESSION_IMPL *);
static int __backup_cleanup_handles(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *);
-static int __backup_file_create(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, bool);
static int __backup_list_append(
WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *);
static int __backup_list_uri_append(WT_SESSION_IMPL *, const char *, bool *);
@@ -178,8 +177,7 @@ __backup_log_append(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool active)
for (i = 0; i < logcount; i++)
WT_ERR(__backup_list_append(session, cb, logfiles[i]));
}
-err: if (logfiles != NULL)
- __wt_log_files_free(session, logfiles, logcount);
+err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
return (ret);
}
@@ -193,9 +191,13 @@ __backup_start(
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_FSTREAM *srcfs;
+ const char *dest;
bool exist, log_only, target_list;
conn = S2C(session);
+ srcfs = NULL;
+ dest = NULL;
cb->next = 0;
cb->list = NULL;
@@ -224,11 +226,16 @@ __backup_start(
conn->hot_backup = true;
WT_ERR(__wt_writeunlock(session, conn->hot_backup_lock));
- /* Create the hot backup file. */
- WT_ERR(__backup_file_create(session, cb, false));
-
- /* Add log files if logging is enabled. */
-
+ /*
+ * Create a temporary backup file. This must be opened before
+ * generating the list of targets in backup_uri. This file will
+ * later be renamed to the correct name depending on whether or not
+ * we're doing an incremental backup. We need a temp file so that if
+ * we fail or crash while filling it, the existence of a partial file
+ * doesn't confuse restarting in the source database.
+ */
+ WT_ERR(__wt_fopen(session, WT_BACKUP_TMP,
+ WT_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs));
/*
* If a list of targets was specified, work our way through them.
* Else, generate a list of all database objects.
@@ -248,20 +255,23 @@ __backup_start(
/* Add the hot backup and standard WiredTiger files to the list. */
if (log_only) {
/*
- * Close any hot backup file.
- * We're about to open the incremental backup file.
+ * We also open an incremental backup source file so that we
+ * can detect a crash with an incremental backup existing in
+ * the source directory versus an improper destination.
*/
- WT_TRET(__wt_close(session, &cb->bfh));
- WT_ERR(__backup_file_create(session, cb, log_only));
+ dest = WT_INCREMENTAL_BACKUP;
+ WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC,
+ WT_OPEN_CREATE, WT_STREAM_WRITE, &srcfs));
WT_ERR(__backup_list_append(
session, cb, WT_INCREMENTAL_BACKUP));
} else {
+ dest = WT_METADATA_BACKUP;
WT_ERR(__backup_list_append(session, cb, WT_METADATA_BACKUP));
- WT_ERR(__wt_exist(session, WT_BASECONFIG, &exist));
+ WT_ERR(__wt_fs_exist(session, WT_BASECONFIG, &exist));
if (exist)
WT_ERR(__backup_list_append(
session, cb, WT_BASECONFIG));
- WT_ERR(__wt_exist(session, WT_USERCONFIG, &exist));
+ WT_ERR(__wt_fs_exist(session, WT_USERCONFIG, &exist));
if (exist)
WT_ERR(__backup_list_append(
session, cb, WT_USERCONFIG));
@@ -269,10 +279,15 @@ __backup_start(
}
err: /* Close the hot backup file. */
- WT_TRET(__wt_close(session, &cb->bfh));
+ WT_TRET(__wt_fclose(session, &cb->bfs));
+ if (srcfs != NULL)
+ WT_TRET(__wt_fclose(session, &srcfs));
if (ret != 0) {
WT_TRET(__backup_cleanup_handles(session, cb));
WT_TRET(__backup_stop(session));
+ } else {
+ WT_ASSERT(session, dest != NULL);
+ WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest));
}
return (ret);
@@ -384,13 +399,23 @@ __backup_uri(WT_SESSION_IMPL *session,
uri);
/*
- * Handle log targets. We do not need to go through the
- * schema worker, just call the function to append them.
- * Set log_only only if it is our only URI target.
+ * Handle log targets. We do not need to go through the schema
+ * worker, just call the function to append them. Set log_only
+ * only if it is our only URI target.
*/
if (WT_PREFIX_MATCH(uri, "log:")) {
+ /*
+ * Log archive cannot mix with incremental backup, don't
+ * let that happen.
+ */
+ if (FLD_ISSET(
+ S2C(session)->log_flags, WT_CONN_LOG_ARCHIVE))
+ WT_ERR_MSG(session, EINVAL,
+ "incremental backup not possible when "
+ "automatic log archival configured");
*log_only = !target_list;
- WT_ERR(__backup_list_uri_append(session, uri, NULL));
+ WT_ERR(__backup_log_append(
+ session, session->bkp_cursor, false));
} else {
*log_only = false;
WT_ERR(__wt_schema_worker(session,
@@ -404,19 +429,6 @@ err: __wt_scr_free(session, &tmp);
}
/*
- * __backup_file_create --
- * Create the meta-data backup file.
- */
-static int
-__backup_file_create(
- WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool incremental)
-{
- return (__wt_open(session,
- incremental ? WT_INCREMENTAL_BACKUP : WT_METADATA_BACKUP,
- WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_STREAM_WRITE, &cb->bfh));
-}
-
-/*
* __wt_backup_file_remove --
* Remove the incremental and meta-data backup files.
*/
@@ -425,7 +437,15 @@ __wt_backup_file_remove(WT_SESSION_IMPL *session)
{
WT_DECL_RET;
+ /*
+ * Note that order matters for removing the incremental files. We must
+ * remove the backup file before removing the source file so that we
+ * always know we were a source directory while there's any chance of
+ * an incremental backup file existing.
+ */
+ WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP));
WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP));
+ WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC));
WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP));
return (ret);
}
@@ -453,11 +473,6 @@ __backup_list_uri_append(
* if there's an entry backed by anything other than a file or lsm
* entry, we're confused.
*/
- if (WT_PREFIX_MATCH(name, "log:")) {
- WT_RET(__backup_log_append(session, cb, false));
- return (0);
- }
-
if (!WT_PREFIX_MATCH(name, "file:") &&
!WT_PREFIX_MATCH(name, "colgroup:") &&
!WT_PREFIX_MATCH(name, "index:") &&
@@ -473,7 +488,7 @@ __backup_list_uri_append(
/* Add the metadata entry to the backup file. */
WT_RET(__wt_metadata_search(session, name, &value));
- ret = __wt_fprintf(session, cb->bfh, "%s\n%s\n", name, value);
+ ret = __wt_fprintf(session, cb->bfs, "%s\n%s\n", name, value);
__wt_free(session, value);
WT_RET(ret);
diff --git a/src/cursor/cur_bulk.c b/src/cursor/cur_bulk.c
index c013383fa61..d1a53057650 100644
--- a/src/cursor/cur_bulk.c
+++ b/src/cursor/cur_bulk.c
@@ -328,7 +328,6 @@ __wt_curbulk_init(WT_SESSION_IMPL *session,
c->insert = skip_sort_check ?
__curbulk_insert_row_skip_check : __curbulk_insert_row;
break;
- WT_ILLEGAL_VALUE(session);
}
cbulk->first_insert = true;
diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c
index a7b1c98871a..595915df7b7 100644
--- a/src/cursor/cur_dump.c
+++ b/src/cursor/cur_dump.c
@@ -128,7 +128,7 @@ str2recno(WT_SESSION_IMPL *session, const char *p, uint64_t *recnop)
* forth -- none of them are OK with us. Check the string starts with
* digit, that turns off the special processing.
*/
- if (!isdigit(p[0]))
+ if (!__wt_isdigit((u_char)p[0]))
goto format;
errno = 0;
@@ -155,7 +155,9 @@ __curdump_set_key(WT_CURSOR *cursor, ...)
WT_SESSION_IMPL *session;
uint64_t recno;
va_list ap;
+ const uint8_t *up;
const char *p;
+ bool json;
cdump = (WT_CURSOR_DUMP *)cursor;
child = cdump->child;
@@ -168,16 +170,23 @@ __curdump_set_key(WT_CURSOR *cursor, ...)
p = va_arg(ap, const char *);
va_end(ap);
+ json = F_ISSET(cursor, WT_CURSTD_DUMP_JSON);
+ if (json)
+ WT_ERR(__wt_json_to_item(session, p, cursor->key_format,
+ (WT_CURSOR_JSON *)cursor->json_private, true,
+ &cursor->key));
+
if (WT_CURSOR_RECNO(cursor) && !F_ISSET(cursor, WT_CURSTD_RAW)) {
- WT_ERR(str2recno(session, p, &recno));
+ if (json) {
+ up = (const uint8_t *)cursor->key.data;
+ WT_ERR(__wt_vunpack_uint(&up, cursor->key.size,
+ &recno));
+ } else
+ WT_ERR(str2recno(session, p, &recno));
child->set_key(child, recno);
} else {
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
- WT_ERR(__wt_json_to_item(session, p, cursor->key_format,
- (WT_CURSOR_JSON *)cursor->json_private, true,
- &cursor->key));
- else
+ if (!json)
WT_ERR(__dump_to_raw(session, p, &cursor->key,
F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c
index dbe8046ca21..6de68d86778 100644
--- a/src/cursor/cur_index.c
+++ b/src/cursor/cur_index.c
@@ -8,20 +8,6 @@
#include "wt_internal.h"
- /*
- * __wt_curindex_joined --
- * Produce an error that this cursor is being used in a join call.
- */
-int
-__wt_curindex_joined(WT_CURSOR *cursor)
-{
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)cursor->session;
- __wt_errx(session, "index cursor is being used in a join");
- return (ENOTSUP);
-}
-
/*
* __curindex_get_value --
* WT_CURSOR->get_value implementation for index cursors.
@@ -462,7 +448,7 @@ __wt_curindex_open(WT_SESSION_IMPL *session,
if (WT_CURSOR_RECNO(cursor))
WT_ERR_MSG(session, WT_ERROR,
"Column store indexes based on a record number primary "
- "key are not supported.");
+ "key are not supported");
/* Handle projections. */
if (columns != NULL) {
diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c
index 38a83217933..0760a07a3aa 100644
--- a/src/cursor/cur_join.c
+++ b/src/cursor/cur_join.c
@@ -8,159 +8,293 @@
#include "wt_internal.h"
+static int __curjoin_entries_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN *,
+ WT_ITEM *, WT_CURSOR_JOIN_ITER *);
+static int __curjoin_entry_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *,
+ WT_ITEM *, WT_CURSOR_JOIN_ITER *);
+static int __curjoin_entry_member(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *,
+ WT_ITEM *, WT_CURSOR_JOIN_ITER *);
static int __curjoin_insert_endpoint(WT_SESSION_IMPL *,
WT_CURSOR_JOIN_ENTRY *, u_int, WT_CURSOR_JOIN_ENDPOINT **);
+static int __curjoin_iter_close(WT_CURSOR_JOIN_ITER *);
+static int __curjoin_iter_close_all(WT_CURSOR_JOIN_ITER *);
+static bool __curjoin_iter_ready(WT_CURSOR_JOIN_ITER *);
+static int __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *, u_int);
+static int __curjoin_pack_recno(WT_SESSION_IMPL *, uint64_t, uint8_t *,
+ size_t, WT_ITEM *);
+static int __curjoin_split_key(WT_SESSION_IMPL *, WT_CURSOR_JOIN *, WT_ITEM *,
+ WT_CURSOR *, WT_CURSOR *, const char *, bool);
+
+#define WT_CURJOIN_ITER_CONSUMED(iter) \
+ ((iter)->entry_pos >= (iter)->entry_count)
/*
- * __curjoin_entry_iter_init --
+ * __wt_curjoin_joined --
+ * Produce an error that this cursor is being used in a join call.
+ */
+int
+__wt_curjoin_joined(WT_CURSOR *cursor)
+{
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+ __wt_errx(session, "cursor is being used in a join");
+ return (ENOTSUP);
+}
+
+/*
+ * __curjoin_iter_init --
* Initialize an iteration for the index managed by a join entry.
- *
*/
static int
-__curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ITER **iterp)
+__curjoin_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
+ WT_CURSOR_JOIN_ITER **iterp)
{
- WT_CURSOR *to_dup;
- WT_DECL_RET;
- const char *raw_cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_open_cursor), "raw", NULL };
- const char *def_cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_open_cursor), NULL };
- const char *urimain, **config;
- char *mainbuf, *uri;
WT_CURSOR_JOIN_ITER *iter;
- size_t size;
-
- iter = NULL;
- mainbuf = uri = NULL;
- to_dup = entry->ends[0].cursor;
-
- if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW))
- config = &raw_cfg[0];
- else
- config = &def_cfg[0];
-
- size = strlen(to_dup->internal_uri) + 3;
- WT_ERR(__wt_calloc(session, size, 1, &uri));
- snprintf(uri, size, "%s()", to_dup->internal_uri);
- urimain = cjoin->table->name;
- if (cjoin->projection != NULL) {
- size = strlen(urimain) + strlen(cjoin->projection) + 1;
- WT_ERR(__wt_calloc(session, size, 1, &mainbuf));
- snprintf(mainbuf, size, "%s%s", urimain, cjoin->projection);
- urimain = mainbuf;
- }
- WT_ERR(__wt_calloc_one(session, &iter));
- WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)cjoin, config,
- &iter->cursor));
- WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor));
- WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config,
- &iter->main));
+ *iterp = NULL;
+ WT_RET(__wt_calloc_one(session, iterp));
+ iter = *iterp;
iter->cjoin = cjoin;
iter->session = session;
- iter->entry = entry;
- iter->positioned = false;
- iter->isequal = (entry->ends_next == 1 &&
- WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ);
- *iterp = iter;
+ cjoin->iter = iter;
+ WT_RET(__curjoin_iter_set_entry(iter, 0));
+ return (0);
+}
- if (0) {
-err: __wt_free(session, iter);
- }
- __wt_free(session, mainbuf);
- __wt_free(session, uri);
+/*
+ * __curjoin_iter_close --
+ * Close the iteration, release resources.
+ */
+static int
+__curjoin_iter_close(WT_CURSOR_JOIN_ITER *iter)
+{
+ WT_DECL_RET;
+
+ if (iter->cursor != NULL)
+ WT_TRET(iter->cursor->close(iter->cursor));
+ __wt_free(iter->session, iter);
return (ret);
}
/*
- * __curjoin_pack_recno --
- * Pack the given recno into a buffer; prepare an item referencing it.
- *
+ * __curjoin_iter_close_all --
+ * Free the iterator and all of its children recursively.
*/
static int
-__curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf,
- size_t bufsize, WT_ITEM *item)
+__curjoin_iter_close_all(WT_CURSOR_JOIN_ITER *iter)
{
- WT_SESSION *wtsession;
- size_t sz;
+ WT_CURSOR_JOIN *parent;
+ WT_DECL_RET;
- wtsession = (WT_SESSION *)session;
- WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r));
- WT_ASSERT(session, sz < bufsize);
- WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r));
- item->size = sz;
- item->data = buf;
+ if (iter->child)
+ WT_TRET(__curjoin_iter_close_all(iter->child));
+ iter->child = NULL;
+ WT_ASSERT(iter->session, iter->cjoin->parent == NULL ||
+ iter->cjoin->parent->iter->child == iter);
+ if ((parent = iter->cjoin->parent) != NULL)
+ parent->iter->child = NULL;
+ iter->cjoin->iter = NULL;
+ WT_TRET(__curjoin_iter_close(iter));
+ return (ret);
+}
+
+/*
+ * __curjoin_iter_reset --
+ * Reset an iteration to the starting point.
+ */
+static int
+__curjoin_iter_reset(WT_CURSOR_JOIN_ITER *iter)
+{
+ if (iter->child != NULL)
+ WT_RET(__curjoin_iter_close_all(iter->child));
+ WT_RET(__curjoin_iter_set_entry(iter, 0));
+ iter->positioned = false;
return (0);
}
/*
- * __curjoin_split_key --
- * Copy the primary key from a cursor (either main table or index)
- * to another cursor. When copying from an index file, the index
- * key is also returned.
- *
+ * __curjoin_iter_ready --
+ * Check the positioned flag for all nested iterators.
+ */
+static bool
+__curjoin_iter_ready(WT_CURSOR_JOIN_ITER *iter)
+{
+ while (iter != NULL) {
+ if (!iter->positioned)
+ return (false);
+ iter = iter->child;
+ }
+ return (true);
+}
+
+/*
+ * __curjoin_iter_set_entry --
+ * Set the current entry for an iterator.
*/
static int
-__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur,
- const char *repack_fmt, bool isindex)
+__curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos)
{
- WT_CURSOR *firstcg_cur;
- WT_CURSOR_INDEX *cindex;
- WT_ITEM *keyp;
- const uint8_t *p;
+ WT_CURSOR *c, *to_dup;
+ WT_CURSOR_JOIN *cjoin, *topjoin;
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ size_t size;
+ const char *raw_cfg[] = { WT_CONFIG_BASE(
+ iter->session, WT_SESSION_open_cursor), "raw", NULL };
+ const char *def_cfg[] = { WT_CONFIG_BASE(
+ iter->session, WT_SESSION_open_cursor), NULL };
+ const char **config;
+ char *uri;
+
+ session = iter->session;
+ cjoin = iter->cjoin;
+ uri = NULL;
+ entry = iter->entry = &cjoin->entries[entry_pos];
+ iter->positioned = false;
+ iter->entry_pos = entry_pos;
+ iter->end_pos = 0;
- if (isindex) {
- cindex = ((WT_CURSOR_INDEX *)fromcur);
- /*
- * Repack tells us where the index key ends; advance past
- * that to get where the raw primary key starts.
- */
- WT_RET(__wt_struct_repack(session, cindex->child->key_format,
- repack_fmt != NULL ? repack_fmt : cindex->iface.key_format,
- &cindex->child->key, idxkey));
- WT_ASSERT(session, cindex->child->key.size > idxkey->size);
- tocur->key.data = (uint8_t *)idxkey->data + idxkey->size;
- tocur->key.size = cindex->child->key.size - idxkey->size;
- if (WT_CURSOR_RECNO(tocur)) {
- p = (const uint8_t *)tocur->key.data;
- WT_RET(__wt_vunpack_uint(&p, tocur->key.size,
- &tocur->recno));
- } else
- tocur->recno = 0;
- } else {
- firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0];
- keyp = &firstcg_cur->key;
- if (WT_CURSOR_RECNO(tocur)) {
- WT_ASSERT(session, keyp->size == sizeof(uint64_t));
- tocur->recno = *(uint64_t *)keyp->data;
- WT_RET(__curjoin_pack_recno(session, tocur->recno,
- cjoin->recno_buf, sizeof(cjoin->recno_buf),
- &tocur->key));
- } else {
- WT_ITEM_SET(tocur->key, *keyp);
- tocur->recno = 0;
+ iter->is_equal = (entry->ends_next == 1 &&
+ WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ);
+ iter->end_skip = (entry->ends_next > 0 &&
+ WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_GE) ? 1 : 0;
+
+ iter->end_count = WT_MIN(1, entry->ends_next);
+ if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) {
+ iter->entry_count = cjoin->entries_next;
+ if (iter->is_equal)
+ iter->end_count = entry->ends_next;
+ } else
+ iter->entry_count = 1;
+ WT_ASSERT(iter->session, iter->entry_pos < iter->entry_count);
+
+ entry->stats.iterated = 0;
+
+ if (entry->subjoin == NULL) {
+ for (topjoin = iter->cjoin; topjoin->parent != NULL;
+ topjoin = topjoin->parent)
+ ;
+ to_dup = entry->ends[0].cursor;
+
+ if (F_ISSET((WT_CURSOR *)topjoin, WT_CURSTD_RAW))
+ config = &raw_cfg[0];
+ else
+ config = &def_cfg[0];
+
+ size = strlen(to_dup->internal_uri) + 3;
+ WT_ERR(__wt_calloc(session, size, 1, &uri));
+ snprintf(uri, size, "%s()", to_dup->internal_uri);
+ if ((c = iter->cursor) == NULL || !WT_STREQ(c->uri, uri)) {
+ iter->cursor = NULL;
+ if (c != NULL)
+ WT_ERR(c->close(c));
+ WT_ERR(__wt_open_cursor(session, uri,
+ (WT_CURSOR *)topjoin, config, &iter->cursor));
}
- idxkey->data = NULL;
- idxkey->size = 0;
+ WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor));
+ } else if (iter->cursor != NULL) {
+ WT_ERR(iter->cursor->close(iter->cursor));
+ iter->cursor = NULL;
}
+
+err: __wt_free(session, uri);
+ return (ret);
+}
+
+/*
+ * __curjoin_iter_bump --
+ * Called to advance the iterator to the next endpoint, which may in turn
+ * advance to the next entry.
+ */
+static int
+__curjoin_iter_bump(WT_CURSOR_JOIN_ITER *iter)
+{
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_SESSION_IMPL *session;
+
+ session = iter->session;
+ iter->positioned = false;
+ entry = iter->entry;
+ if (entry->subjoin == NULL && iter->is_equal &&
+ ++iter->end_pos < iter->end_count) {
+ WT_RET(__wt_cursor_dup_position(
+ entry->ends[iter->end_pos].cursor, iter->cursor));
+ return (0);
+ }
+ iter->end_pos = iter->end_count = iter->end_skip = 0;
+ if (entry->subjoin != NULL && entry->subjoin->iter != NULL)
+ WT_RET(__curjoin_iter_close_all(entry->subjoin->iter));
+
+ if (++iter->entry_pos >= iter->entry_count) {
+ iter->entry = NULL;
+ return (0);
+ }
+ iter->entry = ++entry;
+ if (entry->subjoin != NULL) {
+ WT_RET(__curjoin_iter_init(session, entry->subjoin,
+ &iter->child));
+ return (0);
+ }
+ WT_RET(__curjoin_iter_set_entry(iter, iter->entry_pos));
return (0);
}
/*
- * __curjoin_entry_iter_next --
+ * __curjoin_iter_next --
* Get the next item in an iteration.
*
*/
static int
-__curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor)
+__curjoin_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor)
{
- if (iter->positioned)
- WT_RET(iter->cursor->next(iter->cursor));
- else
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = iter->session;
+
+ if (WT_CURJOIN_ITER_CONSUMED(iter))
+ return (WT_NOTFOUND);
+again:
+ entry = iter->entry;
+ if (entry->subjoin != NULL) {
+ if (iter->child == NULL)
+ WT_RET(__curjoin_iter_init(session,
+ entry->subjoin, &iter->child));
+ ret = __curjoin_iter_next(iter->child, cursor);
+ if (ret == 0) {
+ /* The child did the work, we're done. */
+ iter->curkey = &cursor->key;
+ iter->positioned = true;
+ return (ret);
+ }
+ else if (ret == WT_NOTFOUND) {
+ WT_RET(__curjoin_iter_close_all(iter->child));
+ entry->subjoin->iter = NULL;
+ iter->child = NULL;
+ WT_RET(__curjoin_iter_bump(iter));
+ ret = 0;
+ }
+ } else if (iter->positioned) {
+ ret = iter->cursor->next(iter->cursor);
+ if (ret == WT_NOTFOUND) {
+ WT_RET(__curjoin_iter_bump(iter));
+ ret = 0;
+ } else
+ WT_RET(ret);
+ } else
iter->positioned = true;
+ if (WT_CURJOIN_ITER_CONSUMED(iter))
+ return (WT_NOTFOUND);
+
+ if (!__curjoin_iter_ready(iter))
+ goto again;
+
+ WT_RET(ret);
+
/*
* Set our key to the primary key, we'll also need this
* to check membership.
@@ -169,57 +303,385 @@ __curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor)
cursor, iter->cursor, iter->entry->repack_format,
iter->entry->index != NULL));
iter->curkey = &cursor->key;
- iter->entry->stats.actual_count++;
- iter->entry->stats.accesses++;
+ iter->entry->stats.iterated++;
return (0);
}
/*
- * __curjoin_entry_iter_reset --
- * Reset an iteration to the starting point.
- *
+ * __curjoin_close --
+ * WT_CURSOR::close for join cursors.
*/
static int
-__curjoin_entry_iter_reset(WT_CURSOR_JOIN_ITER *iter)
+__curjoin_close(WT_CURSOR *cursor)
{
- if (iter->positioned) {
- WT_RET(iter->cursor->reset(iter->cursor));
- WT_RET(iter->main->reset(iter->main));
- WT_RET(__wt_cursor_dup_position(
- iter->cjoin->entries[0].ends[0].cursor, iter->cursor));
- iter->positioned = false;
- iter->entry->stats.actual_count = 0;
+ WT_CURSOR_JOIN *cjoin;
+ WT_CURSOR_JOIN_ENDPOINT *end;
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ cjoin = (WT_CURSOR_JOIN *)cursor;
+
+ JOINABLE_CURSOR_API_CALL(cursor, session, close, NULL);
+
+ __wt_schema_release_table(session, cjoin->table);
+ /* These are owned by the table */
+ cursor->internal_uri = NULL;
+ cursor->key_format = NULL;
+ if (cjoin->projection != NULL) {
+ __wt_free(session, cjoin->projection);
+ __wt_free(session, cursor->value_format);
+ }
+
+ for (entry = cjoin->entries, i = 0; i < cjoin->entries_next;
+ entry++, i++) {
+ if (entry->subjoin != NULL) {
+ F_CLR(&entry->subjoin->iface, WT_CURSTD_JOINED);
+ entry->subjoin->parent = NULL;
+ }
+ if (entry->main != NULL)
+ WT_TRET(entry->main->close(entry->main));
+ if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM))
+ WT_TRET(__wt_bloom_close(entry->bloom));
+ for (end = &entry->ends[0];
+ end < &entry->ends[entry->ends_next]; end++) {
+ F_CLR(end->cursor, WT_CURSTD_JOINED);
+ if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR))
+ WT_TRET(end->cursor->close(end->cursor));
+ }
+ __wt_free(session, entry->ends);
+ __wt_free(session, entry->repack_format);
+ }
+
+ if (cjoin->iter != NULL)
+ WT_TRET(__curjoin_iter_close_all(cjoin->iter));
+ if (cjoin->main != NULL)
+ WT_TRET(cjoin->main->close(cjoin->main));
+
+ __wt_free(session, cjoin->entries);
+ WT_TRET(__wt_cursor_close(cursor));
+
+err: API_END_RET(session, ret);
+}
+
+/*
+ * __curjoin_endpoint_init_key --
+ * Set the key in the reference endpoint.
+ */
+static int
+__curjoin_endpoint_init_key(WT_SESSION_IMPL *session,
+ WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint)
+{
+ WT_CURSOR *cursor;
+ WT_CURSOR_INDEX *cindex;
+ WT_ITEM *k;
+ uint64_t r;
+
+ if ((cursor = endpoint->cursor) != NULL) {
+ if (entry->index != NULL) {
+ /* Extract and save the index's logical key. */
+ cindex = (WT_CURSOR_INDEX *)endpoint->cursor;
+ WT_RET(__wt_struct_repack(session,
+ cindex->child->key_format,
+ (entry->repack_format != NULL ?
+ entry->repack_format : cindex->iface.key_format),
+ &cindex->child->key, &endpoint->key));
+ } else {
+ k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key;
+ if (WT_CURSOR_RECNO(cursor)) {
+ r = *(uint64_t *)k->data;
+ WT_RET(__curjoin_pack_recno(session, r,
+ endpoint->recno_buf,
+ sizeof(endpoint->recno_buf),
+ &endpoint->key));
+ } else
+ endpoint->key = *k;
+ }
}
return (0);
}
/*
- * __curjoin_entry_iter_ready --
- * The iterator is positioned.
- *
+ * __curjoin_entries_in_range --
+ * Check if a key is in the range specified by the remaining entries,
+ * returning WT_NOTFOUND if not.
*/
-static bool
-__curjoin_entry_iter_ready(WT_CURSOR_JOIN_ITER *iter)
+static int
+__curjoin_entries_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
+ WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iterarg)
{
- return (iter->positioned);
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_CURSOR_JOIN_ITER *iter;
+ WT_DECL_RET;
+ u_int pos;
+ int fastret, slowret;
+
+ iter = iterarg;
+ if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) {
+ fastret = 0;
+ slowret = WT_NOTFOUND;
+ } else {
+ fastret = WT_NOTFOUND;
+ slowret = 0;
+ }
+ pos = iter == NULL ? 0 : iter->entry_pos;
+ for (entry = &cjoin->entries[pos]; pos < cjoin->entries_next;
+ entry++, pos++) {
+ ret = __curjoin_entry_member(session, entry, curkey, iter);
+ if (ret == fastret)
+ return (fastret);
+ if (ret != slowret)
+ break;
+ iter = NULL;
+ }
+
+ return (ret == 0 ? slowret : ret);
}
/*
- * __curjoin_entry_iter_close --
- * Close the iteration, release resources.
- *
+ * __curjoin_entry_in_range --
+ * Check if a key is in the range specified by the entry, returning
+ * WT_NOTFOUND if not.
*/
static int
-__curjoin_entry_iter_close(WT_CURSOR_JOIN_ITER *iter)
+__curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
+ WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iter)
{
+ WT_COLLATOR *collator;
+ WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
+ bool disjunction, passed;
+ u_int pos;
+ int cmp;
+
+ collator = (entry->index != NULL) ? entry->index->collator : NULL;
+ endmax = &entry->ends[entry->ends_next];
+ disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION);
+ passed = false;
+
+ /*
+ * The iterator may have already satisfied some endpoint conditions.
+ * If so and we're a disjunction, we're done. If so and we're a
+ * conjunction, we can start past the satisfied conditions.
+ */
+ if (iter == NULL)
+ pos = 0;
+ else {
+ if (disjunction && iter->end_skip)
+ return (0);
+ pos = iter->end_pos + iter->end_skip;
+ }
+
+ for (end = &entry->ends[pos]; end < endmax; end++) {
+ WT_RET(__wt_compare(session, collator, curkey, &end->key,
+ &cmp));
+ switch (WT_CURJOIN_END_RANGE(end)) {
+ case WT_CURJOIN_END_EQ:
+ passed = (cmp == 0);
+ break;
+
+ case WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ:
+ passed = (cmp >= 0);
+ WT_ASSERT(session, iter == NULL);
+ break;
+
+ case WT_CURJOIN_END_GT:
+ passed = (cmp > 0);
+ if (passed && iter != NULL && pos == 0)
+ iter->end_skip = 1;
+ break;
+
+ case WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ:
+ passed = (cmp <= 0);
+ break;
+
+ case WT_CURJOIN_END_LT:
+ passed = (cmp < 0);
+ break;
+
+ default:
+ WT_RET(__wt_illegal_value(session, NULL));
+ break;
+ }
+
+ if (!passed) {
+ if (iter != NULL &&
+ (iter->is_equal ||
+ F_ISSET(end, WT_CURJOIN_END_LT))) {
+ WT_RET(__curjoin_iter_bump(iter));
+ return (WT_NOTFOUND);
+ }
+ if (!disjunction)
+ return (WT_NOTFOUND);
+ iter = NULL;
+ } else if (disjunction)
+ break;
+ }
+ if (disjunction && end == endmax)
+ return (WT_NOTFOUND);
+ else
+ return (0);
+}
+
+typedef struct {
+ WT_CURSOR iface;
+ WT_CURSOR_JOIN_ENTRY *entry;
+ bool ismember;
+} WT_CURJOIN_EXTRACTOR;
+
+/*
+ * __curjoin_extract_insert --
+ * Handle a key produced by a custom extractor.
+ */
+static int
+__curjoin_extract_insert(WT_CURSOR *cursor) {
+ WT_CURJOIN_EXTRACTOR *cextract;
WT_DECL_RET;
+ WT_ITEM ikey;
+ WT_SESSION_IMPL *session;
- if (iter->cursor != NULL)
- WT_TRET(iter->cursor->close(iter->cursor));
- if (iter->main != NULL)
- WT_TRET(iter->main->close(iter->main));
- __wt_free(iter->session, iter);
+ cextract = (WT_CURJOIN_EXTRACTOR *)cursor;
+ /*
+ * This insert method may be called multiple times during a single
+ * extraction. If we already have a definitive answer to the
+ * membership question, exit early.
+ */
+ if (cextract->ismember)
+ return (0);
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+ WT_ITEM_SET(ikey, cursor->key);
+ /*
+ * We appended a padding byte to the key to avoid rewriting the last
+ * column. Strip that away here.
+ */
+ WT_ASSERT(session, ikey.size > 0);
+ --ikey.size;
+
+ ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false);
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ else if (ret == 0)
+ cextract->ismember = true;
+
+ return (ret);
+}
+
+/*
+ * __curjoin_entry_member --
+ * Do a membership check for a particular index that was joined,
+ * if not a member, returns WT_NOTFOUND.
+ */
+static int
+__curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
+ WT_ITEM *key, WT_CURSOR_JOIN_ITER *iter)
+{
+ WT_CURJOIN_EXTRACTOR extract_cursor;
+ WT_CURSOR *c;
+ WT_CURSOR_STATIC_INIT(iface,
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __wt_cursor_notsup, /* next */
+ __wt_cursor_notsup, /* prev */
+ __wt_cursor_notsup, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __curjoin_extract_insert, /* insert */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup); /* close */
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_ITEM v;
+ bool bloom_found;
+
+ if (entry->subjoin == NULL && iter != NULL &&
+ (iter->end_pos + iter->end_skip >= entry->ends_next ||
+ (iter->end_skip > 0 &&
+ F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))))
+ return (0); /* no checks to make */
+
+ entry->stats.membership_check++;
+ bloom_found = false;
+
+ if (entry->bloom != NULL) {
+ /*
+ * If we don't own the Bloom filter, we must be sharing one
+ * in a previous entry. So the shared filter has already
+ * been checked and passed.
+ */
+ if (!F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM))
+ return (0);
+
+ /*
+ * If the item is not in the Bloom filter, we return
+ * immediately, otherwise, we still need to check the
+ * long way.
+ */
+ WT_ERR(__wt_bloom_inmem_get(entry->bloom, key));
+ bloom_found = true;
+ }
+ if (entry->subjoin != NULL) {
+ WT_ASSERT(session,
+ iter == NULL || entry->subjoin == iter->child->cjoin);
+ ret = __curjoin_entries_in_range(session, entry->subjoin,
+ key, iter == NULL ? NULL : iter->child);
+ if (iter != NULL &&
+ WT_CURJOIN_ITER_CONSUMED(iter->child)) {
+ WT_ERR(__curjoin_iter_bump(iter));
+ ret = WT_NOTFOUND;
+ }
+ return (ret);
+ }
+ if (entry->index != NULL) {
+ /*
+ * If this entry is used by the iterator, then we already
+ * have the index key, and we won't have to do any
+ * extraction either.
+ */
+ if (iter != NULL && entry == iter->entry)
+ WT_ITEM_SET(v, iter->idxkey);
+ else {
+ memset(&v, 0, sizeof(v)); /* Keep lint quiet. */
+ c = entry->main;
+ c->set_key(c, key);
+ entry->stats.main_access++;
+ if ((ret = c->search(c)) == 0)
+ ret = c->get_value(c, &v);
+ else if (ret == WT_NOTFOUND)
+ WT_ERR_MSG(session, WT_ERROR,
+ "main table for join is missing entry");
+ WT_TRET(c->reset(c));
+ WT_ERR(ret);
+ }
+ } else
+ WT_ITEM_SET(v, *key);
+
+ if ((idx = entry->index) != NULL && idx->extractor != NULL &&
+ (iter == NULL || entry != iter->entry)) {
+ WT_CLEAR(extract_cursor);
+ extract_cursor.iface = iface;
+ extract_cursor.iface.session = &session->iface;
+ extract_cursor.iface.key_format = idx->exkey_format;
+ extract_cursor.ismember = false;
+ extract_cursor.entry = entry;
+ WT_ERR(idx->extractor->extract(idx->extractor,
+ &session->iface, key, &v, &extract_cursor.iface));
+ if (!extract_cursor.ismember)
+ WT_ERR(WT_NOTFOUND);
+ } else
+ WT_ERR(__curjoin_entry_in_range(session, entry, &v, iter));
+
+ if (0) {
+err: if (ret == WT_NOTFOUND && bloom_found)
+ entry->stats.bloom_false_positive++;
+ }
return (ret);
}
@@ -238,10 +700,10 @@ __curjoin_get_key(WT_CURSOR *cursor, ...)
cjoin = (WT_CURSOR_JOIN *)cursor;
va_start(ap, cursor);
- CURSOR_API_CALL(cursor, session, get_key, NULL);
+ JOINABLE_CURSOR_API_CALL(cursor, session, get_key, NULL);
if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) ||
- !__curjoin_entry_iter_ready(cjoin->iter))
+ !cjoin->iter->positioned)
WT_ERR_MSG(session, EINVAL,
"join cursor must be advanced with next()");
WT_ERR(__wt_cursor_get_keyv(cursor, cursor->flags, ap));
@@ -258,23 +720,21 @@ static int
__curjoin_get_value(WT_CURSOR *cursor, ...)
{
WT_CURSOR_JOIN *cjoin;
- WT_CURSOR_JOIN_ITER *iter;
WT_DECL_RET;
WT_SESSION_IMPL *session;
va_list ap;
cjoin = (WT_CURSOR_JOIN *)cursor;
- iter = cjoin->iter;
va_start(ap, cursor);
- CURSOR_API_CALL(cursor, session, get_value, NULL);
+ JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL);
if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) ||
- !__curjoin_entry_iter_ready(iter))
+ !cjoin->iter->positioned)
WT_ERR_MSG(session, EINVAL,
"join cursor must be advanced with next()");
- WT_ERR(__wt_curtable_get_valuev(iter->main, ap));
+ WT_ERR(__wt_curtable_get_valuev(cjoin->main, ap));
err: va_end(ap);
API_END_RET(session, ret);
@@ -291,14 +751,15 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
WT_COLLATOR *collator;
WT_CURSOR *c;
WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
- WT_DECL_RET;
WT_DECL_ITEM(uribuf);
+ WT_DECL_RET;
WT_ITEM curkey, curvalue;
+ size_t size;
+ u_int skip;
+ int cmp;
+ const char *uri;
const char *raw_cfg[] = { WT_CONFIG_BASE(
session, WT_SESSION_open_cursor), "raw", NULL };
- const char *uri;
- size_t size;
- int cmp, skip;
c = NULL;
skip = 0;
@@ -340,6 +801,7 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
collator = (entry->index == NULL) ? NULL : entry->index->collator;
while (ret == 0) {
WT_ERR(c->get_key(c, &curkey));
+ entry->stats.iterated++;
if (entry->index != NULL) {
/*
* Repack so it's comparable to the
@@ -354,7 +816,34 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
for (end = &entry->ends[skip]; end < endmax; end++) {
WT_ERR(__wt_compare(session, collator, &curkey,
&end->key, &cmp));
- if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
+ if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) {
+ /* if condition satisfied, insert immediately */
+ switch (WT_CURJOIN_END_RANGE(end)) {
+ case WT_CURJOIN_END_EQ:
+ if (cmp == 0)
+ goto insert;
+ break;
+ case WT_CURJOIN_END_GT:
+ if (cmp > 0) {
+ /* skip this check next time */
+ skip = entry->ends_next;
+ goto insert;
+ }
+ break;
+ case WT_CURJOIN_END_GE:
+ if (cmp >= 0)
+ goto insert;
+ break;
+ case WT_CURJOIN_END_LT:
+ if (cmp < 0)
+ goto insert;
+ break;
+ case WT_CURJOIN_END_LE:
+ if (cmp <= 0)
+ goto insert;
+ break;
+ }
+ } else if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
if (cmp < 0 || (cmp == 0 &&
!F_ISSET(end, WT_CURJOIN_END_EQ)))
goto advance;
@@ -370,6 +859,14 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
goto done;
}
}
+ /*
+ * Either it's a disjunction that hasn't satisfied any
+ * condition, or it's a conjunction that has satisfied all
+ * conditions.
+ */
+ if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))
+ goto advance;
+insert:
if (entry->index != NULL) {
curvalue.data =
(unsigned char *)curkey.data + curkey.size;
@@ -379,7 +876,7 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
else
WT_ERR(c->get_key(c, &curvalue));
WT_ERR(__wt_bloom_insert(bloom, &curvalue));
- entry->stats.actual_count++;
+ entry->stats.bloom_insert++;
advance:
if ((ret = c->next(c)) == WT_NOTFOUND)
break;
@@ -394,107 +891,86 @@ err: if (c != NULL)
}
/*
- * __curjoin_endpoint_init_key --
- * Set the key in the reference endpoint.
+ * __curjoin_init_next --
+ * Initialize the cursor join when the next function is first called.
*/
static int
-__curjoin_endpoint_init_key(WT_SESSION_IMPL *session,
- WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint)
-{
- WT_CURSOR *cursor;
- WT_CURSOR_INDEX *cindex;
- WT_ITEM *k;
- uint64_t r;
-
- if ((cursor = endpoint->cursor) != NULL) {
- if (entry->index != NULL) {
- /* Extract and save the index's logical key. */
- cindex = (WT_CURSOR_INDEX *)endpoint->cursor;
- WT_RET(__wt_struct_repack(session,
- cindex->child->key_format,
- (entry->repack_format != NULL ?
- entry->repack_format : cindex->iface.key_format),
- &cindex->child->key, &endpoint->key));
- } else {
- k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key;
- if (WT_CURSOR_RECNO(cursor)) {
- r = *(uint64_t *)k->data;
- WT_RET(__curjoin_pack_recno(session, r,
- endpoint->recno_buf,
- sizeof(endpoint->recno_buf),
- &endpoint->key));
- }
- else
- endpoint->key = *k;
- }
- }
- return (0);
-}
-
-/*
- * __curjoin_init_iter --
- * Initialize before any iteration.
- */
-static int
-__curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin)
+__curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
+ bool iterable)
{
WT_BLOOM *bloom;
- WT_DECL_RET;
WT_CURSOR *origcur;
- WT_CURSOR_JOIN_ENTRY *je, *jeend, *je2;
WT_CURSOR_JOIN_ENDPOINT *end;
+ WT_CURSOR_JOIN_ENTRY *je, *jeend, *je2;
+ WT_DECL_RET;
+ size_t size;
+ uint32_t f, k;
+ char *mainbuf;
const char *def_cfg[] = { WT_CONFIG_BASE(
session, WT_SESSION_open_cursor), NULL };
const char *raw_cfg[] = { WT_CONFIG_BASE(
session, WT_SESSION_open_cursor), "raw", NULL };
- uint32_t f, k;
+ const char **config, *proj, *urimain;
+ mainbuf = NULL;
if (cjoin->entries_next == 0)
WT_RET_MSG(session, EINVAL,
"join cursor has not yet been joined with any other "
"cursors");
- je = &cjoin->entries[0];
- jeend = &cjoin->entries[cjoin->entries_next];
-
- /*
- * For a single compare=le endpoint in the first iterated entry,
- * construct a companion compare=ge endpoint that will actually
- * be iterated.
- */
- if (((je = cjoin->entries) != jeend) &&
- je->ends_next == 1 && F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) {
- origcur = je->ends[0].cursor;
- WT_RET(__curjoin_insert_endpoint(session, je, 0, &end));
- WT_RET(__wt_open_cursor(session, origcur->uri,
- (WT_CURSOR *)cjoin,
- F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg,
- &end->cursor));
- WT_RET(end->cursor->next(end->cursor));
- end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ |
- WT_CURJOIN_END_OWN_CURSOR;
+ if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW))
+ config = &raw_cfg[0];
+ else
+ config = &def_cfg[0];
+ urimain = cjoin->table->name;
+ if ((proj = cjoin->projection) != NULL) {
+ size = strlen(urimain) + strlen(proj) + 1;
+ WT_ERR(__wt_calloc(session, size, 1, &mainbuf));
+ snprintf(mainbuf, size, "%s%s", urimain, proj);
+ urimain = mainbuf;
}
- WT_RET(__curjoin_entry_iter_init(session, cjoin, je, &cjoin->iter));
+ WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config,
+ &cjoin->main));
+ jeend = &cjoin->entries[cjoin->entries_next];
for (je = cjoin->entries; je < jeend; je++) {
+ if (je->subjoin != NULL) {
+ WT_ERR(__curjoin_init_next(session, je->subjoin,
+ iterable));
+ continue;
+ }
__wt_stat_join_init_single(&je->stats);
+ /*
+ * For a single compare=le/lt endpoint in any entry that may
+ * be iterated, construct a companion compare=ge endpoint
+ * that will actually be iterated.
+ */
+ if (iterable && je->ends_next == 1 &&
+ F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) {
+ origcur = je->ends[0].cursor;
+ WT_ERR(__curjoin_insert_endpoint(session, je, 0, &end));
+ WT_ERR(__wt_open_cursor(session, origcur->uri,
+ (WT_CURSOR *)cjoin,
+ F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg,
+ &end->cursor));
+ end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ |
+ WT_CURJOIN_END_OWN_CURSOR;
+ WT_ERR(end->cursor->next(end->cursor));
+ F_CLR(je, WT_CURJOIN_ENTRY_DISJUNCTION);
+ }
for (end = &je->ends[0]; end < &je->ends[je->ends_next];
end++)
- WT_RET(__curjoin_endpoint_init_key(session, je, end));
+ WT_ERR(__curjoin_endpoint_init_key(session, je, end));
/*
- * The first entry is iterated as the 'outermost' cursor.
- * For the common GE case, we don't have to test against
- * the left reference key, we know it will be true since
- * the btree is ordered.
+ * Do any needed Bloom filter initialization. Ignore Bloom
+ * filters for entries that will be iterated. They won't
+ * help since these entries either don't need an inclusion
+ * check or are doing any needed check during the iteration.
*/
- if (je == cjoin->entries && je->ends[0].flags ==
- (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ))
- F_SET(cjoin, WT_CURJOIN_SKIP_FIRST_LEFT);
-
- if (F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) {
+ if (!iterable && F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) {
if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED)
- WT_RET_MSG(session, EINVAL,
+ WT_ERR_MSG(session, EINVAL,
"join cursors with Bloom filters cannot be "
"used with read-uncommitted isolation");
if (je->bloom == NULL) {
@@ -516,10 +992,10 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin)
}
je->bloom_bit_count = f;
je->bloom_hash_count = k;
- WT_RET(__wt_bloom_create(session, NULL,
+ WT_ERR(__wt_bloom_create(session, NULL,
NULL, je->count, f, k, &je->bloom));
F_SET(je, WT_CURJOIN_ENTRY_OWN_BLOOM);
- WT_RET(__curjoin_init_bloom(session, cjoin,
+ WT_ERR(__curjoin_init_bloom(session, cjoin,
je, je->bloom));
/*
* Share the Bloom filter, making all
@@ -541,201 +1017,45 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin)
* merge into the shared one. The Bloom
* parameters of the two filters must match.
*/
- WT_RET(__wt_bloom_create(session, NULL,
+ WT_ERR(__wt_bloom_create(session, NULL,
NULL, je->count, je->bloom_bit_count,
je->bloom_hash_count, &bloom));
- WT_RET(__curjoin_init_bloom(session, cjoin,
+ WT_ERR(__curjoin_init_bloom(session, cjoin,
je, bloom));
- WT_RET(__wt_bloom_intersection(je->bloom,
+ WT_ERR(__wt_bloom_intersection(je->bloom,
bloom));
- WT_RET(__wt_bloom_close(bloom));
+ WT_ERR(__wt_bloom_close(bloom));
}
}
+ if (!F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION))
+ iterable = false;
}
-
F_SET(cjoin, WT_CURJOIN_INITIALIZED);
- return (ret);
-}
-
-/*
- * __curjoin_entry_in_range --
- * Check if a key is in the range specified by the entry, returning
- * WT_NOTFOUND if not.
- */
-static int
-__curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
- WT_ITEM *curkey, bool skip_left)
-{
- WT_COLLATOR *collator;
- WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
- int cmp;
-
- collator = (entry->index != NULL) ? entry->index->collator : NULL;
- endmax = &entry->ends[entry->ends_next];
- for (end = &entry->ends[skip_left ? 1 : 0]; end < endmax; end++) {
- WT_RET(__wt_compare(session, collator, curkey, &end->key,
- &cmp));
- if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
- if (cmp < 0 ||
- (cmp == 0 &&
- !F_ISSET(end, WT_CURJOIN_END_EQ)) ||
- (cmp > 0 && !F_ISSET(end, WT_CURJOIN_END_GT)))
- WT_RET(WT_NOTFOUND);
- } else {
- if (cmp > 0 ||
- (cmp == 0 &&
- !F_ISSET(end, WT_CURJOIN_END_EQ)) ||
- (cmp < 0 && !F_ISSET(end, WT_CURJOIN_END_LT)))
- WT_RET(WT_NOTFOUND);
- }
- }
- return (0);
-}
-
-typedef struct {
- WT_CURSOR iface;
- WT_CURSOR_JOIN_ENTRY *entry;
- bool ismember;
-} WT_CURJOIN_EXTRACTOR;
-
-/*
- * __curjoin_extract_insert --
- * Handle a key produced by a custom extractor.
- */
-static int
-__curjoin_extract_insert(WT_CURSOR *cursor) {
- WT_CURJOIN_EXTRACTOR *cextract;
- WT_DECL_RET;
- WT_ITEM ikey;
- WT_SESSION_IMPL *session;
-
- cextract = (WT_CURJOIN_EXTRACTOR *)cursor;
- /*
- * This insert method may be called multiple times during a single
- * extraction. If we already have a definitive answer to the
- * membership question, exit early.
- */
- if (cextract->ismember)
- return (0);
-
- session = (WT_SESSION_IMPL *)cursor->session;
-
- WT_ITEM_SET(ikey, cursor->key);
- /*
- * We appended a padding byte to the key to avoid rewriting the last
- * column. Strip that away here.
- */
- WT_ASSERT(session, ikey.size > 0);
- --ikey.size;
-
- ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false);
- if (ret == WT_NOTFOUND)
- ret = 0;
- else if (ret == 0)
- cextract->ismember = true;
+err: __wt_free(session, mainbuf);
return (ret);
}
/*
- * __curjoin_entry_member --
- * Do a membership check for a particular index that was joined,
- * if not a member, returns WT_NOTFOUND.
+ * __curjoin_insert_endpoint --
+ * Insert a new entry into the endpoint array for the join entry.
*/
static int
-__curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- WT_CURSOR_JOIN_ENTRY *entry, bool skip_left)
+__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
+ u_int pos, WT_CURSOR_JOIN_ENDPOINT **newendp)
{
- WT_CURJOIN_EXTRACTOR extract_cursor;
- WT_CURSOR *c;
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __wt_cursor_compare_notsup, /* compare */
- __wt_cursor_equals_notsup, /* equals */
- __wt_cursor_notsup, /* next */
- __wt_cursor_notsup, /* prev */
- __wt_cursor_notsup, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_search_near_notsup, /* search-near */
- __curjoin_extract_insert, /* insert */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup); /* close */
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_ITEM *key, v;
- bool bloom_found;
-
- if (skip_left && entry->ends_next == 1)
- return (0); /* no checks to make */
- key = cjoin->iter->curkey;
- entry->stats.accesses++;
- bloom_found = false;
-
- if (entry->bloom != NULL) {
- /*
- * If we don't own the Bloom filter, we must be sharing one
- * in a previous entry. So the shared filter has already
- * been checked and passed.
- */
- if (!F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM))
- return (0);
-
- /*
- * If the item is not in the Bloom filter, we return
- * immediately, otherwise, we still need to check the
- * long way.
- */
- WT_ERR(__wt_bloom_inmem_get(entry->bloom, key));
- bloom_found = true;
- }
- if (entry->index != NULL) {
- /*
- * If this entry is used by the iterator, then we already
- * have the index key, and we won't have to do any extraction
- * either.
- */
- if (entry == cjoin->iter->entry)
- WT_ITEM_SET(v, cjoin->iter->idxkey);
- else {
- memset(&v, 0, sizeof(v)); /* Keep lint quiet. */
- c = entry->main;
- c->set_key(c, key);
- if ((ret = c->search(c)) == 0)
- ret = c->get_value(c, &v);
- else if (ret == WT_NOTFOUND)
- WT_ERR_MSG(session, WT_ERROR,
- "main table for join is missing entry");
- WT_TRET(c->reset(c));
- WT_ERR(ret);
- }
- } else
- WT_ITEM_SET(v, *key);
+ WT_CURSOR_JOIN_ENDPOINT *newend;
- if ((idx = entry->index) != NULL && idx->extractor != NULL &&
- entry != cjoin->iter->entry) {
- WT_CLEAR(extract_cursor);
- extract_cursor.iface = iface;
- extract_cursor.iface.session = &session->iface;
- extract_cursor.iface.key_format = idx->exkey_format;
- extract_cursor.ismember = false;
- extract_cursor.entry = entry;
- WT_ERR(idx->extractor->extract(idx->extractor,
- &session->iface, key, &v, &extract_cursor.iface));
- if (!extract_cursor.ismember)
- WT_ERR(WT_NOTFOUND);
- } else
- WT_ERR(__curjoin_entry_in_range(session, entry, &v, skip_left));
+ WT_RET(__wt_realloc_def(session, &entry->ends_allocated,
+ entry->ends_next + 1, &entry->ends));
+ newend = &entry->ends[pos];
+ memmove(newend + 1, newend,
+ (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT));
+ memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT));
+ entry->ends_next++;
+ *newendp = newend;
- if (0) {
-err: if (ret == WT_NOTFOUND && bloom_found)
- entry->stats.bloom_false_positive++;
- }
- return (ret);
+ return (0);
}
/*
@@ -750,61 +1070,53 @@ __curjoin_next(WT_CURSOR *cursor)
WT_CURSOR_JOIN_ITER *iter;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- bool skip_left;
- u_int i;
+ int tret;
cjoin = (WT_CURSOR_JOIN *)cursor;
- CURSOR_API_CALL(cursor, session, next, NULL);
+ JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL);
if (F_ISSET(cjoin, WT_CURJOIN_ERROR))
WT_ERR_MSG(session, WT_ERROR,
"join cursor encountered previous error");
if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED))
- WT_ERR(__curjoin_init_iter(session, cjoin));
-
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ WT_ERR(__curjoin_init_next(session, cjoin, true));
+ if (cjoin->iter == NULL)
+ WT_ERR(__curjoin_iter_init(session, cjoin, &cjoin->iter));
iter = cjoin->iter;
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-nextkey:
- if ((ret = __curjoin_entry_iter_next(iter, cursor)) == 0) {
- F_SET(cursor, WT_CURSTD_KEY_EXT);
+ while ((ret = __curjoin_iter_next(iter, cursor)) == 0) {
+ if ((ret = __curjoin_entries_in_range(session, cjoin,
+ iter->curkey, iter)) != WT_NOTFOUND)
+ break;
+ }
+ iter->positioned = (ret == 0);
+ if (ret != 0 && ret != WT_NOTFOUND)
+ WT_ERR(ret);
+ if (ret == 0) {
/*
- * We may have already established membership for the
- * 'left' case for the first entry, since we're
- * using that in our iteration.
+ * Position the 'main' cursor, this will be used to retrieve
+ * values from the cursor join. The key we have is raw, but
+ * the main cursor may not be raw.
*/
- skip_left = F_ISSET(cjoin, WT_CURJOIN_SKIP_FIRST_LEFT);
- for (i = 0; i < cjoin->entries_next; i++) {
- ret = __curjoin_entry_member(session, cjoin,
- &cjoin->entries[i], skip_left);
- if (ret == WT_NOTFOUND) {
- /*
- * If this is compare=eq on our outer iterator,
- * and we've moved past it, we're done.
- */
- if (iter->isequal && i == 0)
- break;
- goto nextkey;
- }
- skip_left = false;
- WT_ERR(ret);
- }
- } else if (ret != WT_NOTFOUND)
- WT_ERR(ret);
+ c = cjoin->main;
+ __wt_cursor_set_raw_key(c, iter->curkey);
- if (ret == 0) {
/*
- * Position the 'main' cursor, this will be used to
- * retrieve values from the cursor join.
+ * A failed search is not expected, convert WT_NOTFOUND into a
+ * generic error.
*/
- c = iter->main;
- c->set_key(c, iter->curkey);
- if ((ret = c->search(c)) != 0)
- WT_ERR(c->search(c));
+ iter->entry->stats.main_access++;
+ if ((ret = c->search(c)) == WT_NOTFOUND)
+ ret = WT_ERROR;
+ WT_ERR(ret);
+
F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- }
+ } else if (ret == WT_NOTFOUND &&
+ (tret = __curjoin_iter_close_all(iter)) != 0)
+ WT_ERR(tret);
if (0) {
err: F_SET(cjoin, WT_CURJOIN_ERROR);
@@ -813,78 +1125,146 @@ err: F_SET(cjoin, WT_CURJOIN_ERROR);
}
/*
- * __curjoin_reset --
- * WT_CURSOR::reset for join cursors.
+ * __curjoin_open_main --
+ * For the given index, open the main file with a projection
+ * that is the index keys.
*/
static int
-__curjoin_reset(WT_CURSOR *cursor)
+__curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
+ WT_CURSOR_JOIN_ENTRY *entry)
{
- WT_CURSOR_JOIN *cjoin;
WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_INDEX *idx;
+ size_t len, newsize;
+ char *main_uri, *newformat;
+ const char *raw_cfg[] = { WT_CONFIG_BASE(
+ session, WT_SESSION_open_cursor), "raw", NULL };
- cjoin = (WT_CURSOR_JOIN *)cursor;
+ main_uri = NULL;
+ idx = entry->index;
+
+ newsize = strlen(cjoin->table->name) + idx->colconf.len + 1;
+ WT_ERR(__wt_calloc(session, 1, newsize, &main_uri));
+ snprintf(main_uri, newsize, "%s%.*s",
+ cjoin->table->name, (int)idx->colconf.len, idx->colconf.str);
+ WT_ERR(__wt_open_cursor(session, main_uri,
+ (WT_CURSOR *)cjoin, raw_cfg, &entry->main));
+ if (idx->extractor == NULL) {
+ /*
+ * Add no-op padding so trailing 'u' formats are not
+ * transformed to 'U'. This matches what happens in
+ * the index. We don't do this when we have an
+ * extractor, extractors already use the padding
+ * byte trick.
+ */
+ len = strlen(entry->main->value_format) + 3;
+ WT_ERR(__wt_calloc(session, len, 1, &newformat));
+ snprintf(newformat, len, "%s0x", entry->main->value_format);
+ __wt_free(session, entry->main->value_format);
+ entry->main->value_format = newformat;
+ }
- CURSOR_API_CALL(cursor, session, reset, NULL);
+err: __wt_free(session, main_uri);
+ return (ret);
+}
- if (F_ISSET(cjoin, WT_CURJOIN_INITIALIZED))
- WT_ERR(__curjoin_entry_iter_reset(cjoin->iter));
+/*
+ * __curjoin_pack_recno --
+ * Pack the given recno into a buffer; prepare an item referencing it.
+ *
+ */
+static int
+__curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf,
+ size_t bufsize, WT_ITEM *item)
+{
+ WT_SESSION *wtsession;
+ size_t sz;
-err: API_END_RET(session, ret);
+ wtsession = (WT_SESSION *)session;
+ WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r));
+ WT_ASSERT(session, sz < bufsize);
+ WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r));
+ item->size = sz;
+ item->data = buf;
+ return (0);
}
/*
- * __curjoin_close --
- * WT_CURSOR::close for join cursors.
+ * __curjoin_reset --
+ * WT_CURSOR::reset for join cursors.
*/
static int
-__curjoin_close(WT_CURSOR *cursor)
+__curjoin_reset(WT_CURSOR *cursor)
{
WT_CURSOR_JOIN *cjoin;
- WT_CURSOR_JOIN_ENDPOINT *end;
- WT_CURSOR_JOIN_ENTRY *entry;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- u_int i;
cjoin = (WT_CURSOR_JOIN *)cursor;
- CURSOR_API_CALL(cursor, session, close, NULL);
-
- __wt_schema_release_table(session, cjoin->table);
- /* These are owned by the table */
- cursor->internal_uri = NULL;
- cursor->key_format = NULL;
- if (cjoin->projection != NULL) {
- __wt_free(session, cjoin->projection);
- __wt_free(session, cursor->value_format);
- }
-
- for (entry = cjoin->entries, i = 0; i < cjoin->entries_next;
- entry++, i++) {
- if (entry->main != NULL)
- WT_TRET(entry->main->close(entry->main));
- if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM))
- WT_TRET(__wt_bloom_close(entry->bloom));
- for (end = &entry->ends[0];
- end < &entry->ends[entry->ends_next]; end++) {
- F_CLR(end->cursor, WT_CURSTD_JOINED);
- if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR))
- WT_TRET(end->cursor->close(end->cursor));
- }
- __wt_free(session, entry->ends);
- __wt_free(session, entry->repack_format);
- }
+ JOINABLE_CURSOR_API_CALL(cursor, session, reset, NULL);
if (cjoin->iter != NULL)
- WT_TRET(__curjoin_entry_iter_close(cjoin->iter));
- __wt_free(session, cjoin->entries);
- WT_TRET(__wt_cursor_close(cursor));
+ WT_ERR(__curjoin_iter_reset(cjoin->iter));
err: API_END_RET(session, ret);
}
/*
+ * __curjoin_split_key --
+ * Copy the primary key from a cursor (either main table or index)
+ * to another cursor. When copying from an index file, the index
+ * key is also returned.
+ *
+ */
+static int
+__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
+ WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur,
+ const char *repack_fmt, bool isindex)
+{
+ WT_CURSOR *firstcg_cur;
+ WT_CURSOR_INDEX *cindex;
+ WT_ITEM *keyp;
+ const uint8_t *p;
+
+ if (isindex) {
+ cindex = ((WT_CURSOR_INDEX *)fromcur);
+ /*
+ * Repack tells us where the index key ends; advance past
+ * that to get where the raw primary key starts.
+ */
+ WT_RET(__wt_struct_repack(session, cindex->child->key_format,
+ repack_fmt != NULL ? repack_fmt : cindex->iface.key_format,
+ &cindex->child->key, idxkey));
+ WT_ASSERT(session, cindex->child->key.size > idxkey->size);
+ tocur->key.data = (uint8_t *)idxkey->data + idxkey->size;
+ tocur->key.size = cindex->child->key.size - idxkey->size;
+ if (WT_CURSOR_RECNO(tocur)) {
+ p = (const uint8_t *)tocur->key.data;
+ WT_RET(__wt_vunpack_uint(&p, tocur->key.size,
+ &tocur->recno));
+ } else
+ tocur->recno = 0;
+ } else {
+ firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0];
+ keyp = &firstcg_cur->key;
+ if (WT_CURSOR_RECNO(tocur)) {
+ WT_ASSERT(session, keyp->size == sizeof(uint64_t));
+ tocur->recno = *(uint64_t *)keyp->data;
+ WT_RET(__curjoin_pack_recno(session, tocur->recno,
+ cjoin->recno_buf, sizeof(cjoin->recno_buf),
+ &tocur->key));
+ } else {
+ WT_ITEM_SET(tocur->key, *keyp);
+ tocur->recno = 0;
+ }
+ idxkey->data = NULL;
+ idxkey->size = 0;
+ }
+ return (0);
+}
+
+/*
* __wt_curjoin_open --
* Initialize a join cursor.
*
@@ -977,35 +1357,53 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count)
{
WT_CURSOR_INDEX *cindex;
+ WT_CURSOR_JOIN *child;
WT_CURSOR_JOIN_ENDPOINT *end;
WT_CURSOR_JOIN_ENTRY *entry;
- WT_DECL_RET;
- bool hasins, needbloom, range_eq;
- char *main_uri, *newformat;
- const char *raw_cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_open_cursor), "raw", NULL };
- size_t len, newsize;
+ bool hasins, needbloom, nested, range_eq;
+ size_t len;
u_int i, ins, nonbloom;
+ uint8_t endrange;
entry = NULL;
hasins = needbloom = false;
- ins = 0; /* -Wuninitialized */
- main_uri = NULL;
- nonbloom = 0; /* -Wuninitialized */
+ ins = nonbloom = 0; /* -Wuninitialized */
- for (i = 0; i < cjoin->entries_next; i++) {
- if (cjoin->entries[i].index == idx) {
- entry = &cjoin->entries[i];
- break;
- }
- if (!needbloom && i > 0 &&
- !F_ISSET(&cjoin->entries[i], WT_CURJOIN_ENTRY_BLOOM)) {
- needbloom = true;
- nonbloom = i;
+ if (cjoin->entries_next == 0) {
+ if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION))
+ F_SET(cjoin, WT_CURJOIN_DISJUNCTION);
+ } else if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) &&
+ !F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION))
+ WT_RET_MSG(session, EINVAL,
+ "operation=or does not match previous operation=and");
+ else if (!LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) &&
+ F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION))
+ WT_RET_MSG(session, EINVAL,
+ "operation=and does not match previous operation=or");
+
+ nested = WT_PREFIX_MATCH(ref_cursor->uri, "join:");
+ if (!nested)
+ for (i = 0; i < cjoin->entries_next; i++) {
+ if (cjoin->entries[i].index == idx &&
+ cjoin->entries[i].subjoin == NULL) {
+ entry = &cjoin->entries[i];
+ break;
+ }
+ if (!needbloom && i > 0 &&
+ !F_ISSET(&cjoin->entries[i],
+ WT_CURJOIN_ENTRY_BLOOM)) {
+ needbloom = true;
+ nonbloom = i;
+ }
}
+ else {
+ if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM))
+ WT_RET_MSG(session, EINVAL,
+ "Bloom filters cannot be used with subjoins");
}
+
if (entry == NULL) {
- WT_ERR(__wt_realloc_def(session, &cjoin->entries_allocated,
+ WT_RET(__wt_realloc_def(session, &cjoin->entries_allocated,
cjoin->entries_next + 1, &cjoin->entries));
if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && needbloom) {
/*
@@ -1034,13 +1432,13 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
} else {
/* Merge the join into an existing entry for this index */
if (count != 0 && entry->count != 0 && entry->count != count)
- WT_ERR_MSG(session, EINVAL,
+ WT_RET_MSG(session, EINVAL,
"count=%" PRIu64 " does not match "
"previous count=%" PRIu64 " for this index",
count, entry->count);
if (LF_MASK(WT_CURJOIN_ENTRY_BLOOM) !=
F_MASK(entry, WT_CURJOIN_ENTRY_BLOOM))
- WT_ERR_MSG(session, EINVAL,
+ WT_RET_MSG(session, EINVAL,
"join has incompatible strategy "
"values for the same index");
@@ -1063,19 +1461,20 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
for (i = 0; i < entry->ends_next; i++) {
end = &entry->ends[i];
range_eq = (range == WT_CURJOIN_END_EQ);
+ endrange = WT_CURJOIN_END_RANGE(end);
if ((F_ISSET(end, WT_CURJOIN_END_GT) &&
((range & WT_CURJOIN_END_GT) != 0 || range_eq)) ||
(F_ISSET(end, WT_CURJOIN_END_LT) &&
((range & WT_CURJOIN_END_LT) != 0 || range_eq)) ||
- (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ &&
+ (endrange == WT_CURJOIN_END_EQ &&
(range & (WT_CURJOIN_END_LT | WT_CURJOIN_END_GT))
!= 0))
- WT_ERR_MSG(session, EINVAL,
+ WT_RET_MSG(session, EINVAL,
"join has overlapping ranges");
if (range == WT_CURJOIN_END_EQ &&
- WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ &&
+ endrange == WT_CURJOIN_END_EQ &&
!F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))
- WT_ERR_MSG(session, EINVAL,
+ WT_RET_MSG(session, EINVAL,
"compare=eq can only be combined "
"using operation=or");
@@ -1086,6 +1485,7 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
if (!hasins &&
((range & WT_CURJOIN_END_GT) != 0 ||
(range == WT_CURJOIN_END_EQ &&
+ endrange != WT_CURJOIN_END_EQ &&
!F_ISSET(end, WT_CURJOIN_END_GT)))) {
ins = i;
hasins = true;
@@ -1098,70 +1498,35 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
entry->bloom_hash_count =
WT_MAX(entry->bloom_hash_count, bloom_hash_count);
}
- WT_ERR(__curjoin_insert_endpoint(session, entry,
- hasins ? ins : entry->ends_next, &end));
- end->cursor = ref_cursor;
- F_SET(end, range);
-
- /* Open the main file with a projection of the indexed columns. */
- if (entry->main == NULL && idx != NULL) {
- newsize = strlen(cjoin->table->name) + idx->colconf.len + 1;
- WT_ERR(__wt_calloc(session, 1, newsize, &main_uri));
- snprintf(main_uri, newsize, "%s%.*s",
- cjoin->table->name, (int)idx->colconf.len,
- idx->colconf.str);
- WT_ERR(__wt_open_cursor(session, main_uri,
- (WT_CURSOR *)cjoin, raw_cfg, &entry->main));
- if (idx->extractor == NULL) {
+ if (nested) {
+ child = (WT_CURSOR_JOIN *)ref_cursor;
+ entry->subjoin = child;
+ child->parent = cjoin;
+ } else {
+ WT_RET(__curjoin_insert_endpoint(session, entry,
+ hasins ? ins : entry->ends_next, &end));
+ end->cursor = ref_cursor;
+ F_SET(end, range);
+
+ if (entry->main == NULL && idx != NULL) {
/*
- * Add no-op padding so trailing 'u' formats are not
- * transformed to 'U'. This matches what happens in
- * the index. We don't do this when we have an
- * extractor, extractors already use the padding
- * byte trick.
+ * Open the main file with a projection of the
+ * indexed columns.
*/
- len = strlen(entry->main->value_format) + 3;
- WT_ERR(__wt_calloc(session, len, 1, &newformat));
- snprintf(newformat, len, "%s0x",
- entry->main->value_format);
- __wt_free(session, entry->main->value_format);
- entry->main->value_format = newformat;
- }
+ WT_RET(__curjoin_open_main(session, cjoin, entry));
- /*
- * When we are repacking index keys to remove the primary
- * key, we never want to transform trailing 'u'. Use no-op
- * padding to force this.
- */
- cindex = (WT_CURSOR_INDEX *)ref_cursor;
- len = strlen(cindex->iface.key_format) + 3;
- WT_ERR(__wt_calloc(session, len, 1, &entry->repack_format));
- snprintf(entry->repack_format, len, "%s0x",
- cindex->iface.key_format);
+ /*
+ * When we are repacking index keys to remove the
+ * primary key, we never want to transform trailing
+ * 'u'. Use no-op padding to force this.
+ */
+ cindex = (WT_CURSOR_INDEX *)ref_cursor;
+ len = strlen(cindex->iface.key_format) + 3;
+ WT_RET(__wt_calloc(session, len, 1,
+ &entry->repack_format));
+ snprintf(entry->repack_format, len, "%s0x",
+ cindex->iface.key_format);
+ }
}
-
-err: __wt_free(session, main_uri);
- return (ret);
-}
-
-/*
- * __curjoin_insert_endpoint --
- * Insert a new entry into the endpoint array for the join entry.
- */
-static int
-__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
- u_int pos, WT_CURSOR_JOIN_ENDPOINT **newendp)
-{
- WT_CURSOR_JOIN_ENDPOINT *newend;
-
- WT_RET(__wt_realloc_def(session, &entry->ends_allocated,
- entry->ends_next + 1, &entry->ends));
- newend = &entry->ends[pos];
- memmove(newend + 1, newend,
- (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT));
- memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT));
- entry->ends_next++;
- *newendp = newend;
-
return (0);
}
diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c
index fcb66d3e8b3..f0fa0d8aec2 100644
--- a/src/cursor/cur_json.c
+++ b/src/cursor/cur_json.c
@@ -48,6 +48,10 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *,
case 't': \
WT_RET(json_uint_arg(session, &jstr, &pv.u.u)); \
break; \
+ case 'u': \
+ WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \
+ pv.type = 'K'; \
+ break; \
/* User format strings have already been validated. */ \
WT_ILLEGAL_VALUE(session); \
} \
@@ -62,7 +66,7 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv,
u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name)
{
WT_PACK_VALUE *pv;
- const char *p, *end;
+ const u_char *p, *end;
size_t s, n;
pv = (WT_PACK_VALUE *)voidpv;
@@ -82,7 +86,7 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv,
case 'S':
/* Account for '"' quote in front and back. */
s += 2;
- p = (const char *)pv->u.s;
+ p = (const u_char *)pv->u.s;
if (bufsz > 0) {
*buf++ = '"';
bufsz--;
@@ -118,7 +122,7 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv,
case 'U':
case 'u':
s += 2;
- p = (const char *)pv->u.item.data;
+ p = (const u_char *)pv->u.item.data;
end = p + pv->u.item.size;
if (bufsz > 0) {
*buf++ = '"';
@@ -310,14 +314,14 @@ __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
* Can be called with null buf for sizing.
*/
size_t
-__wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode)
+__wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode)
{
- char abbrev;
+ u_char abbrev;
if (!force_unicode) {
- if (isprint(ch) && ch != '\\' && ch != '"') {
+ if (__wt_isprint(ch) && ch != '\\' && ch != '"') {
if (bufsz >= 1)
- *buf = (u_char)ch;
+ *buf = ch;
return (1);
} else {
abbrev = '\0';
@@ -342,7 +346,7 @@ __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode)
if (abbrev != '\0') {
if (bufsz >= 2) {
*buf++ = '\\';
- *buf = (u_char)abbrev;
+ *buf = abbrev;
}
return (2);
}
@@ -386,7 +390,7 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat,
}
for (nkeys = 0; *keyformat; keyformat++)
- if (!isdigit(*keyformat))
+ if (!__wt_isdigit((u_char)*keyformat))
nkeys++;
p = beginkey;
@@ -409,12 +413,13 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat,
#define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \
size_t _kwlen = strlen(keyword); \
- if (strncmp(in, keyword, _kwlen) == 0 && !isalnum(in[_kwlen])) { \
+ if (strncmp(in, keyword, _kwlen) == 0 && \
+ !__wt_isalnum((u_char)in[_kwlen])) { \
in += _kwlen; \
result = matchval; \
} else { \
const char *_bad = in; \
- while (isalnum(*in)) \
+ while (__wt_isalnum((u_char)*in)) \
in++; \
__wt_errx(session, "unknown keyword \"%.*s\" in JSON", \
(int)(in - _bad), _bad); \
@@ -456,7 +461,7 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
result = -1;
session = (WT_SESSION_IMPL *)wt_session;
- while (isspace(*src))
+ while (__wt_isspace((u_char)*src))
src++;
*tokstart = src;
@@ -493,7 +498,7 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
"invalid Unicode within JSON string");
return (-1);
}
- src += 5;
+ src += 4;
}
backslash = false;
}
@@ -516,13 +521,12 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
isfloat = false;
if (*src == '-')
src++;
- while ((ch = *src) != '\0' && isdigit(ch))
+ while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
src++;
if (*src == '.') {
isfloat = true;
src++;
- while ((ch = *src) != '\0' &&
- isdigit(ch))
+ while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
src++;
}
if (*src == 'e' || *src == 'E') {
@@ -530,8 +534,7 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
src++;
if (*src == '+' || *src == '-')
src++;
- while ((ch = *src) != '\0' &&
- isdigit(ch))
+ while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
src++;
}
result = isfloat ? 'f' : 'i';
@@ -556,10 +559,10 @@ __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
default:
/* An illegal token, move past it anyway */
bad = src;
- isalph = isalnum(*src);
+ isalph = __wt_isalnum((u_char)*src);
src++;
if (isalph)
- while (*src != '\0' && isalnum(*src))
+ while (*src != '\0' && __wt_isalnum((u_char)*src))
src++;
__wt_errx(session, "unknown token \"%.*s\" in JSON",
(int)(src - bad), bad);
@@ -840,20 +843,17 @@ __wt_json_strlen(const char *src, size_t srclen)
if (__wt_hex2byte((const u_char *)src, &lo))
return (-1);
src += 2;
- /* RFC 3629 */
- if (hi >= 0x8) {
- /* 3 bytes total */
- dstlen += 2;
- }
- else if (hi != 0 || lo >= 0x80) {
- /* 2 bytes total */
- dstlen++;
- }
- /* else 1 byte total */
+ if (hi != 0)
+ /*
+ * For our dump representation,
+ * every Unicode character on input
+ * represents a single byte.
+ */
+ return (-1);
}
- }
+ } else
+ src++;
dstlen++;
- src++;
}
if (src != srcend)
return (-1); /* invalid input, e.g. final char is '\\' */
@@ -867,55 +867,58 @@ __wt_json_strlen(const char *src, size_t srclen)
* the result if zero padded.
*/
int
-__wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen)
+__wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen,
+ const char *src, size_t srclen)
{
- char *dst;
+ WT_SESSION_IMPL *session;
+ char ch, *dst;
const char *dstend, *srcend;
u_char hi, lo;
+ session = (WT_SESSION_IMPL *)wt_session;
+
dst = *pdst;
dstend = dst + dstlen;
srcend = src + srclen;
while (src < srcend && dst < dstend) {
/* JSON can include any UTF-8 expressed in 4 hex chars. */
- if (*src == '\\') {
- if (*++src == 'u') {
- if (__wt_hex2byte((const u_char *)++src, &hi))
+ if ((ch = *src++) == '\\')
+ switch (ch = *src++) {
+ case 'u':
+ if (__wt_hex2byte((const u_char *)src, &hi))
return (EINVAL);
src += 2;
if (__wt_hex2byte((const u_char *)src, &lo))
return (EINVAL);
src += 2;
- /* RFC 3629 */
- if (hi >= 0x8) {
- /* 3 bytes total */
- /* byte 0: 1110HHHH */
- /* byte 1: 10HHHHLL */
- /* byte 2: 10LLLLLL */
- *dst++ = (char)(0xe0 |
- ((hi >> 4) & 0x0f));
- *dst++ = (char)(0x80 |
- ((hi << 2) & 0x3c) |
- ((lo >> 6) & 0x03));
- *dst++ = (char)(0x80 | (lo & 0x3f));
- } else if (hi != 0 || lo >= 0x80) {
- /* 2 bytes total */
- /* byte 0: 110HHHLL */
- /* byte 1: 10LLLLLL */
- *dst++ = (char)(0xc0 |
- (hi << 2) |
- ((lo >> 6) & 0x03));
- *dst++ = (char)(0x80 | (lo & 0x3f));
- } else
- /* else 1 byte total */
- /* byte 0: 0LLLLLLL */
- *dst++ = (char)lo;
+ if (hi != 0) {
+ __wt_errx(NULL, "Unicode \"%6.6s\""
+ " byte out of range in JSON",
+ src - 6);
+ return (EINVAL);
+ }
+ *dst++ = (char)lo;
+ break;
+ case 'f':
+ *dst++ = '\f';
+ break;
+ case 'n':
+ *dst++ = '\n';
+ break;
+ case 'r':
+ *dst++ = '\r';
+ break;
+ case 't':
+ *dst++ = '\t';
+ break;
+ case '"':
+ case '\\':
+ *dst++ = ch;
+ break;
+ WT_ILLEGAL_VALUE(session);
}
- else
- *dst++ = *src;
- } else
- *dst++ = *src;
- src++;
+ else
+ *dst++ = ch;
}
if (src != srcend)
return (ENOMEM);
diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c
index 7839971f975..8bb8931f36f 100644
--- a/src/cursor/cur_std.c
+++ b/src/cursor/cur_std.c
@@ -435,7 +435,7 @@ __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap)
} else if (WT_STREQ(fmt, "S"))
*va_arg(ap, const char **) = cursor->value.data;
else if (WT_STREQ(fmt, "t") ||
- (isdigit(fmt[0]) && WT_STREQ(fmt + 1, "t")))
+ (__wt_isdigit((u_char)fmt[0]) && WT_STREQ(fmt + 1, "t")))
*va_arg(ap, uint8_t *) = *(uint8_t *)cursor->value.data;
else
ret = __wt_struct_unpackv(session,
@@ -496,7 +496,7 @@ __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap)
sz = strlen(str) + 1;
buf->data = str;
} else if (WT_STREQ(fmt, "t") ||
- (isdigit(fmt[0]) && WT_STREQ(fmt + 1, "t"))) {
+ (__wt_isdigit((u_char)fmt[0]) && WT_STREQ(fmt + 1, "t"))) {
sz = 1;
WT_ERR(__wt_buf_initsize(session, buf, sz));
*(uint8_t *)buf->mem = (uint8_t)va_arg(ap, int);
@@ -571,7 +571,6 @@ __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp)
WT_SESSION_IMPL *session;
int cmp;
- session = (WT_SESSION_IMPL *)cursor->session;
CURSOR_API_CALL(cursor, session, equals, NULL);
WT_ERR(cursor->compare(cursor, other, &cmp));
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index 9eb88ec6fcd..6d50523043a 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -972,7 +972,8 @@ __wt_curtable_open(WT_SESSION_IMPL *session,
if (0) {
err: if (*cursorp != NULL) {
- WT_TRET(__wt_cursor_close(*cursorp));
+ if (*cursorp != cursor)
+ WT_TRET(__wt_cursor_close(*cursorp));
*cursorp = NULL;
}
WT_TRET(__curtable_close(cursor));
diff --git a/src/docs/Doxyfile b/src/docs/Doxyfile
index 4c1682de6eb..69e9716b425 100644
--- a/src/docs/Doxyfile
+++ b/src/docs/Doxyfile
@@ -1570,6 +1570,8 @@ PREDEFINED = DOXYGEN \
__wt_event_handler:=WT_EVENT_HANDLER \
__wt_extension_api:=WT_EXTENSION_API \
__wt_extractor:=WT_EXTRACTOR \
+ __wt_file_handle:=WT_FILE_HANDLE \
+ __wt_file_system:=WT_FILE_SYSTEM \
__wt_item:=WT_ITEM \
__wt_lsn:=WT_LSN \
__wt_session:=WT_SESSION \
diff --git a/src/docs/backup.dox b/src/docs/backup.dox
index 7742e698ac8..45edc85d6a5 100644
--- a/src/docs/backup.dox
+++ b/src/docs/backup.dox
@@ -42,6 +42,12 @@ Copying the database files for a backup does not require any special
alignment or block size (specifically, Linux or Windows filesystems that
do not support read/write isolation can be safely read for backups).
+The database file may grow in size during the copy, and the file copy
+should not consider that an error. Blocks appended to the file after the
+copy starts can be safely ignored, that is, it is correct for the copy
+to determine an initial size of the file and then copy that many bytes,
+ignoring any bytes appended after the backup cursor was opened.
+
The cursor must not be closed until all of the files have been copied,
however, there is no requirement the files be copied in any order or in
any relationship to the WT_CURSOR::next calls, only that all files have
@@ -98,29 +104,35 @@ and removing log files from the original database home:
1. Perform a full backup of the database (as described above).
-2. Perform a full database checkpoint.
-
-3. Open a cursor on the \c "backup:" data source, with the
- \c "target=(\"log:\\")" target specified, which begins the
- process of an incremental backup.
+2. Open a cursor on the \c "backup:" data source, configured with the
+ \c "target=(\"log:\\")" target specified, which begins the process
+ of an incremental backup.
-4. Copy each log file returned by the WT_CURSOR::next method to the backup
+3. Copy each log file returned by the WT_CURSOR::next method to the backup
directory. It is not an error to copy a log file which has been copied
before, but care should be taken to ensure each log file is completely copied
- as the most recent log file may change in size while being copied.
+ as the most recent log file may grow in size while being copied.
-5. If all log files have been successfully copied, archive the log
+4. If all log files have been successfully copied, archive the log
files by calling the WT_SESSION::truncate method with the URI
- <code>log:</code> and specifying the backup cursor as the
- start cursor to that method.
+ <code>log:</code> and specifying the backup cursor as the start
+ cursor to that method. (Note there is no requirement backups be
+ coordinated with database checkpoints, however, an incremental backup
+ will repeatedly copy the same files, and will not make additional log
+ files available for archival, unless there was a checkpoint after the
+ previous incremental backup.)
+
+5. Close the backup cursor.
+
+Steps 2-5 can be repeated any number of times before step 1 is repeated.
+Full and incremental backups may be repeated as long as the backup
+database directory has not been opened and recovery run. Once recovery
+has run in a backup directory, you can no longer back up to that
+database directory.
-6. Close the backup cursor.
+An example of opening the backup data source for an incremental backup:
-Steps 2-6 can be repeated any number of times before step 1 is
-repeated. These steps can be repeated as long as the backup database
-directory has not been opened, recovery run and become live. Once
-the database becomes live, you must repeat all steps 1-6 to another,
-different backup database directory.
+@snippet ex_all.c incremental backup
@section backup_o_direct Backup and O_DIRECT
diff --git a/src/docs/cursor-join.dox b/src/docs/cursor-join.dox
index 51da6b174bf..5ea064a250b 100644
--- a/src/docs/cursor-join.dox
+++ b/src/docs/cursor-join.dox
@@ -14,6 +14,31 @@ Here is an example using join cursors:
Joins support various comparison operators: \c "eq", \c "gt", \c "ge", \c "lt", \c "le". Ranges with lower and upper bounds can also be specified, by joining two cursors on the same index, for example, one with \c "compare=ge" and another \c "compare=lt". In addition to joining indices, the main table can be joined so that a range of primary keys can be specified.
+By default, a join cursor returns a conjunction, that is, all keys that
+satisfy all the joined comparisons. By specifying a configuration with \c
+"operation=or", a join cursor will return a disjunction, or all keys that
+satisfy at least one of the joined comparisons. More complex joins can be
+composed by specifying another join cursor as the reference cursor in a join
+call.
+
+Here is an example using these concepts to show a conjunction of a disjunction:
+
+@snippet ex_schema.c Complex join cursors
+
All the joins should be done on the join cursor before WT_CURSOR::next is called. Calling WT_CURSOR::next on a join cursor for the first time populates any bloom filters and performs other initialization. The join cursor's key is the primary key (the key for the main table), and its value is the entire set of values of the main table. A join cursor can be created with a projection by appending \c "(col1,col2,...)" to the URI if a different set of values is needed.
+Keys returned from the join cursor are ordered according to the
+first reference cursor joined. For example, if an index cursor was joined
+first, that index determines the order of results. If the join cursor
+uses disjunctions, then the ordering of all joins determines the order.
+The first join in a conjunctive join, or all joins in a disjunctive join,
+are distinctive in that they are iterated internally as the cursor join
+returns values in order. Any bloom filters specified on the
+joins that are used for iteration are not useful, and are silently ignored.
+
+When disjunctions are used where the sets of keys overlap on these 'iteration
+joins', a join cursor will return duplicates. A join cursor never returns
+duplicates unless \c "operation=or" is used in a join configuration, or unless
+the first joined cursor is itself a join cursor that would return duplicates.
+
*/
diff --git a/src/docs/custom-file-systems.dox b/src/docs/custom-file-systems.dox
new file mode 100644
index 00000000000..d496002b0fb
--- /dev/null
+++ b/src/docs/custom-file-systems.dox
@@ -0,0 +1,47 @@
+/*! @page custom_file_systems Custom File Systems
+
+Applications can provide a custom file system implementation that will be
+used by WiredTiger to interact with the I/O subsystem using the
+WT_FILE_SYSTEM and WT_FILE_HANDLE interfaces.
+
+It is not necessary for all file system providers to implement all methods
+in the WT_FILE_SYSTEM and WT_FILE_HANDLE structures, and documentation for
+those structures indicate which methods are optional. Methods which are not
+provided should be set to NULL.
+
+Generally, function pointers should not be changed once a handle is
+created. An exception to this are the WT_FILE_HANDLE::fallocate and
+WT_FILE_HANDLE::fallocate_nolock methods, because a file system
+implementation may not know what support the system provides until file
+allocation is attempted. See the WiredTiger POSIX file system
+implementation for an example of how the fallocate method might be
+changed after initialization.
+
+WT_FILE_SYSTEM and WT_FILE_HANDLE methods are expected to return POSIX
+1003.1 or ANSI C standard error codes on failure. Custom file systems
+on Windows systems can use the WT_EXTENSION_API::map_windows_error
+method to translate Windows system errors into POSIX system errors for
+return to WiredTiger.
+
+WT_FILE_SYSTEM and WT_FILE_HANDLE methods which fail but not fatally
+(for example, a WT_FILE_HANDLE::truncate method call which fails because
+the file is currently mapped into memory), should return EBUSY.
+
+WT_FILE_SYSTEM and WT_FILE_HANDLE methods which fail fatally, but not
+in all cases (for example, a WT_FILE_HANDLE::fadvise method call which
+only supports ::WT_FILE_HANDLE_WILLNEED), should return ENOTSUP.
+
+Additionally, custom file system functions may return ::WT_PANIC to
+shut down the system.
+
+Unless explicitly stated otherwise, WiredTiger may invoke methods on the
+WT_FILE_SYSTEM and WT_FILE_HANDLE interfaces from multiple threads
+concurrently. It is the responsibility of the implementation to protect
+any shared data.
+
+See @ex_ref{ex_file_system.c} for an example implementation of a custom
+file system; the WiredTiger code for a POSIX standard file system is in
+the public domain and may also be useful as a starting point for a custom
+file system implementation.
+
+*/
diff --git a/src/docs/error-handling.dox b/src/docs/error-handling.dox
index d91a126ee21..62be498fc15 100644
--- a/src/docs/error-handling.dox
+++ b/src/docs/error-handling.dox
@@ -52,6 +52,9 @@ This error indicates an underlying problem that requires the application exit an
@par <code>WT_RUN_RECOVERY</code>
This error is generated when wiredtiger_open is configured to return an error if recovery is required to use the database.
+@par <code>WT_CACHE_FULL</code>
+This error is only generated when wiredtiger_open is configured to run in-memory, and an insert or update operation requires more than the configured cache size to complete. The operation may be retried; if a transaction is in progress, it should be rolled back and the operation retried in a new transaction.
+
@if IGNORE_BUILT_BY_API_ERR_END
@endif
diff --git a/src/docs/examples.dox b/src/docs/examples.dox
index 3ed7357b52c..c5a106a00c9 100644
--- a/src/docs/examples.dox
+++ b/src/docs/examples.dox
@@ -9,9 +9,6 @@ Show how to configure and use asynchronous operations.
A more complex schema based on a call center example, showing how to map
some SQL constructs onto the WiredTiger API.
-@example ex_config.c
-Shows how to configure some properties of the database and tables.
-
@example ex_cursor.c
Shows some common cursor types and operations.
@@ -55,4 +52,7 @@ Shows how to access the database log files.
@example ex_thread.c
Shows how to access a database with multiple threads.
+@example ex_file_system.c
+Shows how to extend WiredTiger with a custom file-system implementation.
+
*/
diff --git a/src/docs/in-memory.dox b/src/docs/in-memory.dox
new file mode 100644
index 00000000000..df221dc34d6
--- /dev/null
+++ b/src/docs/in-memory.dox
@@ -0,0 +1,12 @@
+/*! @m_page{{c,java},in_memory,In-memory databases}
+
+The ::wiredtiger_open \c in_memory configuration changes WiredTiger to
+run in cache without writing to a backing disk. Data is limited to the
+configured cache size.
+
+If \c in_memory is configured, WT_CURSOR::insert and WT_CURSOR::update
+methods may return an additional error, ::WT_CACHE_FULL, indicating the
+insert or update operation requires more than the configured cache size
+to complete. If a transaction is in progress, it should be rolled back.
+
+ */
diff --git a/src/docs/programming.dox b/src/docs/programming.dox
index f717f4ed1fe..81e612e8ee8 100644
--- a/src/docs/programming.dox
+++ b/src/docs/programming.dox
@@ -41,6 +41,7 @@ each of which is ordered by one or more columns.
- @subpage compact
- @subpage checkpoint
- @subpage durability
+- @subpage in_memory
- @subpage cursor_join
- @subpage cursor_log
- @ref transaction_named_snapshots
@@ -55,6 +56,7 @@ each of which is ordered by one or more columns.
- @subpage custom_collators
- @subpage custom_extractors
- @subpage custom_data_sources
+- @subpage custom_file_systems
- @subpage helium
@m_endif
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index efc306568cd..96fe04d7426 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -25,6 +25,7 @@ EBUSY
ECMA
EINVAL
ENCRYPTOR
+ENOTSUP
EmpId
Encryptors
Facebook
@@ -80,6 +81,7 @@ Seward's
SiH
TXT
URIs
+WILLNEED
WiredTiger
WiredTiger's
WiredTigerCheckpoint
@@ -178,6 +180,8 @@ desc
destructor
destructors
dev
+disjunction
+disjunctions
distclean
dl
dll
@@ -208,6 +212,7 @@ errno
exe
fadvise
failchk
+fallocate
fd's
fdatasync
fieldname
@@ -331,6 +336,7 @@ nocase
nocasecoll
nodup
noflush
+nolock
nolocking
nommap
nop
diff --git a/src/docs/tune-cache.dox b/src/docs/tune-cache.dox
index c9603085905..505da436277 100644
--- a/src/docs/tune-cache.dox
+++ b/src/docs/tune-cache.dox
@@ -11,9 +11,9 @@ The cache size for the database is normally configured by setting the
function. The cache size can be adjusted after the open call with
WT_CONNECTION::reconfigure.
-An example of setting a cache size to 500MB:
+An example of setting a cache size to 5GB:
-@snippet ex_config.c configure cache size
+@snippet ex_all.c Open a connection
The effectiveness of the chosen cache size can be measured by reviewing
the page eviction statistics for the database.
diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox
index 6d8dcab8f65..e06272d117c 100644
--- a/src/docs/wtperf.dox
+++ b/src/docs/wtperf.dox
@@ -232,6 +232,8 @@ operation,two for every second operation, three for every third
operation etc.
@par sess_config (string, default=)
session configuration string
+@par session_count_idle (unsigned int, default=0)
+number of idle sessions to create. Default 0.
@par table_config (string, default=key_format=S,value_format=S,type=lsm,exclusive=true,allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb,split_pct=100)
table configuration string
@par table_count (unsigned int, default=1)
diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c
index 4b9e2442f32..b0cd50cc655 100644
--- a/src/evict/evict_file.c
+++ b/src/evict/evict_file.c
@@ -87,7 +87,10 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
__wt_page_can_evict(session, ref, NULL));
__wt_ref_out(session, ref);
break;
- WT_ILLEGAL_VALUE_ERR(session);
+ case WT_SYNC_CHECKPOINT:
+ case WT_SYNC_WRITE_LEAVES:
+ WT_ERR(__wt_illegal_value(session, NULL));
+ break;
}
}
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 360a3f69cd2..8ea487bbf83 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -9,16 +9,15 @@
#include "wt_internal.h"
static int __evict_clear_all_walks(WT_SESSION_IMPL *);
-static int __evict_clear_walks(WT_SESSION_IMPL *);
+static int __evict_helper(WT_SESSION_IMPL *);
static int WT_CDECL __evict_lru_cmp(const void *, const void *);
static int __evict_lru_pages(WT_SESSION_IMPL *, bool);
static int __evict_lru_walk(WT_SESSION_IMPL *);
static int __evict_page(WT_SESSION_IMPL *, bool);
static int __evict_pass(WT_SESSION_IMPL *);
-static int __evict_walk(WT_SESSION_IMPL *);
-static int __evict_walk_file(WT_SESSION_IMPL *, u_int, u_int *);
-static WT_THREAD_RET __evict_worker(void *);
-static int __evict_server_work(WT_SESSION_IMPL *);
+static int __evict_server(WT_SESSION_IMPL *, bool *);
+static int __evict_walk(WT_SESSION_IMPL *, uint32_t);
+static int __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int *);
/*
* __evict_read_gen --
@@ -32,6 +31,11 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
uint64_t read_gen;
btree = entry->btree;
+
+ /* Never prioritize empty slots. */
+ if (entry->ref == NULL)
+ return (UINT64_MAX);
+
page = entry->ref->page;
/* Any page set to the oldest generation should be discarded. */
@@ -66,15 +70,14 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
* Qsort function: sort the eviction array.
*/
static int WT_CDECL
-__evict_lru_cmp(const void *a_arg, const void *b_arg)
+__evict_lru_cmp(const void *a, const void *b)
{
- const WT_EVICT_ENTRY *a = a_arg, *b = b_arg;
- uint64_t a_score, b_score;
+ uint64_t a_lru, b_lru;
- a_score = (a->ref == NULL ? UINT64_MAX : a->score);
- b_score = (b->ref == NULL ? UINT64_MAX : b->score);
+ a_lru = __evict_read_gen(a);
+ b_lru = __evict_read_gen(b);
- return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1);
+ return ((a_lru < b_lru) ? -1 : (a_lru == b_lru) ? 0 : 1);
}
/*
@@ -104,7 +107,8 @@ __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_CACHE *cache;
WT_EVICT_ENTRY *evict;
- uint32_t i, elem;
+ uint32_t i, elem, q;
+ bool found;
WT_ASSERT(session,
__wt_ref_is_root(ref) || ref->state == WT_REF_LOCKED);
@@ -114,18 +118,25 @@ __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref)
return;
cache = S2C(session)->cache;
- __wt_spin_lock(session, &cache->evict_lock);
-
- elem = cache->evict_max;
- for (i = 0, evict = cache->evict_queue; i < elem; i++, evict++)
- if (evict->ref == ref) {
- __evict_list_clear(session, evict);
- break;
- }
-
- WT_ASSERT(session, !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU));
+ __wt_spin_lock(session, &cache->evict_queue_lock);
+
+ found = false;
+ for (q = 0; q < WT_EVICT_QUEUE_MAX && !found; q++) {
+ __wt_spin_lock(session, &cache->evict_queues[q].evict_lock);
+ elem = cache->evict_queues[q].evict_max;
+ for (i = 0, evict = cache->evict_queues[q].evict_queue;
+ i < elem; i++, evict++)
+ if (evict->ref == ref) {
+ found = true;
+ __evict_list_clear(session, evict);
+ break;
+ }
+ __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock);
+ }
+ WT_ASSERT(session,
+ !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU));
- __wt_spin_unlock(session, &cache->evict_lock);
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
}
/*
@@ -141,6 +152,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session)
conn = S2C(session);
cache = conn->cache;
+#ifdef HAVE_VERBOSE
if (WT_VERBOSE_ISSET(session, WT_VERB_EVICTSERVER)) {
uint64_t bytes_inuse, bytes_max;
@@ -154,104 +166,159 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session)
bytes_inuse <= bytes_max ? "<=" : ">",
bytes_max / WT_MEGABYTE));
}
+#endif
return (__wt_cond_auto_signal(session, cache->evict_cond));
}
/*
- * __evict_server --
- * Thread to evict pages from the cache.
+ * __evict_thread_run --
+ * General wrapper for any eviction thread.
*/
static WT_THREAD_RET
-__evict_server(void *arg)
+__evict_thread_run(void *arg)
{
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_SESSION_IMPL *session;
-#ifdef HAVE_DIAGNOSTIC
- struct timespec now, stuck_ts;
-#endif
- uint64_t pages_evicted = 0;
- u_int spins;
+ bool did_work;
session = arg;
conn = S2C(session);
cache = conn->cache;
+#ifdef HAVE_DIAGNOSTIC
+ if (session == conn->evict_session)
+ WT_ERR(__wt_epoch(
+ session, &cache->stuck_ts)); /* -Wuninitialized */
+#endif
while (F_ISSET(conn, WT_CONN_EVICTION_RUN)) {
- /* Evict pages from the cache as needed. */
- WT_ERR(__evict_pass(session));
-
- if (!F_ISSET(conn, WT_CONN_EVICTION_RUN))
- break;
-
- /*
- * Clear the walks so we don't pin pages while asleep,
- * otherwise we can block applications evicting large pages.
- */
- if (!F_ISSET(cache, WT_CACHE_STUCK)) {
- for (spins = 0; (ret = __wt_spin_trylock(
- session, &conn->dhandle_lock)) == EBUSY &&
- !F_ISSET(cache, WT_CACHE_CLEAR_WALKS);
- spins++) {
- if (spins < WT_THOUSAND)
- __wt_yield();
- else
- __wt_sleep(0, WT_THOUSAND);
- }
+ if (conn->evict_tid_set &&
+ __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) {
/*
- * If we gave up acquiring the lock, that indicates a
- * session is waiting for us to clear walks. Do that
- * as part of a normal pass (without the handle list
- * lock) to avoid deadlock.
+ * Cannot use WT_WITH_PASS_LOCK because this is a try
+ * lock. Fix when that is supported. We set the flag
+ * on both sessions because we may call clear_walk when
+ * we are walking with the walk session, locked.
*/
- if (ret == EBUSY)
- continue;
- WT_ERR(ret);
- ret = __evict_clear_all_walks(session);
- __wt_spin_unlock(session, &conn->dhandle_lock);
+ F_SET(session, WT_SESSION_LOCKED_PASS);
+ F_SET(cache->walk_session, WT_SESSION_LOCKED_PASS);
+ ret = __evict_server(session, &did_work);
+ F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS);
+ F_CLR(session, WT_SESSION_LOCKED_PASS);
+ __wt_spin_unlock(session, &cache->evict_pass_lock);
WT_ERR(ret);
+ WT_ERR(__wt_verbose(
+ session, WT_VERB_EVICTSERVER, "sleeping"));
+ /* Don't rely on signals: check periodically. */
+ WT_ERR(__wt_cond_auto_wait(
+ session, cache->evict_cond, did_work));
+ WT_ERR(__wt_verbose(
+ session, WT_VERB_EVICTSERVER, "waking"));
+ } else
+ WT_ERR(__evict_helper(session));
+ }
- /* Next time we wake up, reverse the sweep direction. */
- cache->flags ^= WT_CACHE_WALK_REVERSE;
- pages_evicted = 0;
- } else if (pages_evicted != cache->pages_evict) {
- pages_evicted = cache->pages_evict;
+ if (session == conn->evict_session) {
+ /*
+ * The eviction server is shutting down: in case any trees are
+ * still open, clear all walks now so that they can be closed.
+ */
+ WT_WITH_PASS_LOCK(session, ret,
+ ret = __evict_clear_all_walks(session));
+ WT_ERR(ret);
+ }
+ WT_ERR(__wt_verbose(
+ session, WT_VERB_EVICTSERVER, "cache eviction thread exiting"));
+
+ /*
+ * The only two cases when eviction workers are expected to stop are
+ * when recovery is finished or when the connection is closing. Check
+ * otherwise fewer eviction worker threads may be running than
+ * expected.
+ */
+ WT_ASSERT(session, F_ISSET(conn, WT_CONN_CLOSING | WT_CONN_RECOVERING));
+ if (0) {
+err: WT_PANIC_MSG(session, ret, "cache eviction thread error");
+ }
+ return (WT_THREAD_RET_VALUE);
+}
+
+/*
+ * __evict_server --
+ * Thread to evict pages from the cache.
+ */
+static int
+__evict_server(WT_SESSION_IMPL *session, bool *did_work)
+{
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
#ifdef HAVE_DIAGNOSTIC
- WT_ERR(__wt_epoch(session, &stuck_ts));
- } else {
- /* After being stuck for 5 minutes, give up. */
- WT_ERR(__wt_epoch(session, &now));
- if (WT_TIMEDIFF_SEC(now, stuck_ts) > 300) {
- __wt_err(session, ETIMEDOUT,
- "Cache stuck for too long, giving up");
- (void)__wt_cache_dump(session, NULL);
- WT_ERR(ETIMEDOUT);
- }
+ struct timespec now;
#endif
- }
+ uint64_t orig_pages_evicted;
+ u_int spins;
- WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping"));
- /* Don't rely on signals: check periodically. */
- WT_ERR(__wt_cond_auto_wait(
- session, cache->evict_cond, pages_evicted != 0));
- WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "waking"));
- }
+ conn = S2C(session);
+ cache = conn->cache;
+ WT_ASSERT(session, did_work != NULL);
+ *did_work = false;
+ orig_pages_evicted = cache->pages_evicted;
+
+ /* Evict pages from the cache as needed. */
+ WT_RET(__evict_pass(session));
+
+ if (!F_ISSET(conn, WT_CONN_EVICTION_RUN))
+ return (0);
/*
- * The eviction server is shutting down: in case any trees are still
- * open, clear all walks now so that they can be closed.
+ * Clear the walks so we don't pin pages while asleep,
+ * otherwise we can block applications evicting large pages.
*/
- WT_ERR(__evict_clear_all_walks(session));
-
- WT_ERR(__wt_verbose(
- session, WT_VERB_EVICTSERVER, "cache eviction server exiting"));
+ if (!F_ISSET(cache, WT_CACHE_STUCK)) {
+ for (spins = 0; (ret = __wt_spin_trylock(
+ session, &conn->dhandle_lock)) == EBUSY &&
+ cache->pass_intr == 0; spins++) {
+ if (spins < WT_THOUSAND)
+ __wt_yield();
+ else
+ __wt_sleep(0, WT_THOUSAND);
+ }
+ /*
+ * If we gave up acquiring the lock, that indicates a
+ * session is waiting for us to clear walks. Do that
+ * as part of a normal pass (without the handle list
+ * lock) to avoid deadlock.
+ */
+ if (ret == EBUSY)
+ return (0);
+ WT_RET(ret);
+ ret = __evict_clear_all_walks(session);
+ __wt_spin_unlock(session, &conn->dhandle_lock);
+ WT_RET(ret);
- if (0) {
-err: WT_PANIC_MSG(session, ret, "cache eviction server error");
+ /* Next time we wake up, reverse the sweep direction. */
+ cache->flags ^= WT_CACHE_WALK_REVERSE;
+ cache->pages_evicted = 0;
+ } else if (cache->pages_evicted != cache->pages_evict) {
+ cache->pages_evicted = cache->pages_evict;
+#ifdef HAVE_DIAGNOSTIC
+ WT_RET(__wt_epoch(session, &cache->stuck_ts));
+ } else {
+ /* After being stuck for 5 minutes, give up. */
+ WT_RET(__wt_epoch(session, &now));
+ if (WT_TIMEDIFF_SEC(now, cache->stuck_ts) > 300) {
+ __wt_err(session, ETIMEDOUT,
+ "Cache stuck for too long, giving up");
+ (void)__wt_cache_dump(session, NULL);
+ WT_RET(ETIMEDOUT);
+ }
+#endif
}
- return (WT_THREAD_RET_VALUE);
+ *did_work = cache->pages_evicted != orig_pages_evicted;
+ return (0);
}
/*
@@ -270,6 +337,7 @@ __evict_workers_resize(WT_SESSION_IMPL *session)
uint32_t i, session_flags;
conn = S2C(session);
+ workers = NULL; /* -Wconditional-uninitialized */
if (conn->evict_workers_alloc < conn->evict_workers_max) {
alloc = conn->evict_workers_alloc * sizeof(*workers);
@@ -301,7 +369,8 @@ __evict_workers_resize(WT_SESSION_IMPL *session)
++conn->evict_workers;
F_SET(&workers[i], WT_EVICT_WORKER_RUN);
WT_ERR(__wt_thread_create(workers[i].session,
- &workers[i].tid, __evict_worker, &workers[i]));
+ &workers[i].tid, __evict_thread_run,
+ workers[i].session));
}
}
@@ -355,7 +424,7 @@ __wt_evict_create(WT_SESSION_IMPL *session)
* the worker's sessions are created.
*/
WT_RET(__wt_thread_create(
- session, &conn->evict_tid, __evict_server, session));
+ session, &conn->evict_tid, __evict_thread_run, session));
conn->evict_tid_set = true;
return (0);
@@ -425,39 +494,22 @@ __wt_evict_destroy(WT_SESSION_IMPL *session)
}
/*
- * __evict_worker --
+ * __evict_helper --
* Thread to help evict pages from the cache.
*/
-static WT_THREAD_RET
-__evict_worker(void *arg)
+static int
+__evict_helper(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- WT_EVICT_WORKER *worker;
- WT_SESSION_IMPL *session;
- worker = arg;
- session = worker->session;
- conn = S2C(session);
- cache = conn->cache;
-
- while (F_ISSET(conn, WT_CONN_EVICTION_RUN) &&
- F_ISSET(worker, WT_EVICT_WORKER_RUN)) {
- /* Don't spin in a busy loop if there is no work to do */
- if ((ret = __evict_lru_pages(session, false)) == WT_NOTFOUND)
- WT_ERR(__wt_cond_wait(
- session, cache->evict_waiter_cond, 10000));
- else
- WT_ERR(ret);
- }
- WT_ERR(__wt_verbose(
- session, WT_VERB_EVICTSERVER, "cache eviction worker exiting"));
-
- if (0) {
-err: WT_PANIC_MSG(session, ret, "cache eviction worker error");
- }
- return (WT_THREAD_RET_VALUE);
+ cache = S2C(session)->cache;
+ if ((ret = __evict_lru_pages(session, false)) == WT_NOTFOUND)
+ WT_RET(__wt_cond_wait(
+ session, cache->evict_waiter_cond, 10000));
+ else
+ WT_RET(ret);
+ return (0);
}
/*
@@ -565,12 +617,8 @@ __evict_pass(WT_SESSION_IMPL *session)
* If there is a request to clear eviction walks, do that now,
* before checking if the cache is full.
*/
- if (F_ISSET(cache, WT_CACHE_CLEAR_WALKS)) {
- F_CLR(cache, WT_CACHE_CLEAR_WALKS);
- WT_RET(__evict_clear_walks(session));
- WT_RET(__wt_cond_signal(
- session, cache->evict_waiter_cond));
- }
+ if (cache->pass_intr != 0)
+ break;
/*
* Increment the shared read generation. Do this occasionally
@@ -617,7 +665,7 @@ __evict_pass(WT_SESSION_IMPL *session)
worker = &conn->evict_workctx[conn->evict_workers++];
F_SET(worker, WT_EVICT_WORKER_RUN);
WT_RET(__wt_thread_create(session,
- &worker->tid, __evict_worker, worker));
+ &worker->tid, __evict_thread_run, worker->session));
}
WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
@@ -626,7 +674,7 @@ __evict_pass(WT_SESSION_IMPL *session)
conn->cache_size, cache->bytes_inmem, cache->bytes_dirty));
WT_RET(__evict_lru_walk(session));
- WT_RET(__evict_server_work(session));
+ WT_RET_NOTFOUND_OK(__evict_lru_pages(session, true));
/*
* If we're making progress, keep going; if we're not making
@@ -634,6 +682,8 @@ __evict_pass(WT_SESSION_IMPL *session)
* sleep, it's not something we can fix.
*/
if (pages_evicted == cache->pages_evict) {
+ WT_STAT_FAST_CONN_INCR(session,
+ cache_eviction_server_slept);
/*
* Back off if we aren't making progress: walks hold
* the handle list lock, which blocks other operations
@@ -674,11 +724,13 @@ __evict_clear_walk(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CACHE *cache;
+ WT_DECL_RET;
WT_REF *ref;
btree = S2BT(session);
cache = S2C(session)->cache;
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_PASS));
if (session->dhandle == cache->evict_file_next)
cache->evict_file_next = NULL;
@@ -690,30 +742,9 @@ __evict_clear_walk(WT_SESSION_IMPL *session)
* assert we never try to evict the current eviction walk point).
*/
btree->evict_ref = NULL;
- return (__wt_page_release(session, ref, WT_READ_NO_EVICT));
-}
-
-/*
- * __evict_clear_walks --
- * Clear the eviction walk points for any file a session is waiting on.
- */
-static int
-__evict_clear_walks(WT_SESSION_IMPL *session)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *s;
- u_int i, session_cnt;
-
- conn = S2C(session);
-
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
- if (!s->active || !F_ISSET(s, WT_SESSION_CLEAR_EVICT_WALK))
- continue;
- WT_WITH_DHANDLE(
- session, s->dhandle, WT_TRET(__evict_clear_walk(session)));
- }
+ WT_WITH_DHANDLE(cache->walk_session, session->dhandle,
+ (ret = __wt_page_release(cache->walk_session,
+ ref, WT_READ_NO_EVICT)));
return (ret);
}
@@ -738,39 +769,6 @@ __evict_clear_all_walks(WT_SESSION_IMPL *session)
}
/*
- * __evict_request_clear_walk --
- * Request that the eviction server clear the tree's current eviction
- * point.
- */
-static int
-__evict_request_clear_walk(WT_SESSION_IMPL *session)
-{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_DECL_RET;
-
- btree = S2BT(session);
- cache = S2C(session)->cache;
-
- F_SET(session, WT_SESSION_CLEAR_EVICT_WALK);
-
- while (ret == 0 && (btree->evict_ref != NULL ||
- cache->evict_file_next == session->dhandle)) {
- F_SET(cache, WT_CACHE_CLEAR_WALKS);
- ret = __wt_cond_wait(
- session, cache->evict_waiter_cond, 100000);
- }
-
- F_CLR(session, WT_SESSION_CLEAR_EVICT_WALK);
-
- /* An error is unexpected - flag the failure. */
- if (ret != 0)
- __wt_err(session, ret, "Failed to clear eviction walk point");
-
- return (ret);
-}
-
-/*
* __wt_evict_file_exclusive_on --
* Get exclusive eviction access to a file and discard any of the file's
* blocks queued for eviction.
@@ -782,7 +780,7 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session)
WT_CACHE *cache;
WT_DECL_RET;
WT_EVICT_ENTRY *evict;
- u_int i, elem;
+ u_int i, elem, q;
btree = S2BT(session);
cache = S2C(session)->cache;
@@ -807,21 +805,32 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session)
* this point.
*/
F_SET(btree, WT_BTREE_NO_EVICTION);
+ (void)__wt_atomic_add32(&cache->pass_intr, 1);
WT_FULL_BARRIER();
/* Clear any existing LRU eviction walk for the file. */
- WT_ERR(__evict_request_clear_walk(session));
+ WT_WITH_PASS_LOCK(session, ret,
+ ret = __evict_clear_walk(session));
+ (void)__wt_atomic_sub32(&cache->pass_intr, 1);
+ WT_ERR(ret);
/*
* The eviction candidate list might reference pages from the file,
* clear it. Hold the evict lock to remove queued pages from a file.
*/
- __wt_spin_lock(session, &cache->evict_lock);
- elem = cache->evict_max;
- for (i = 0, evict = cache->evict_queue; i < elem; i++, evict++)
- if (evict->btree == btree)
- __evict_list_clear(session, evict);
- __wt_spin_unlock(session, &cache->evict_lock);
+ __wt_spin_lock(session, &cache->evict_queue_lock);
+
+ for (q = 0; q < WT_EVICT_QUEUE_MAX; q++) {
+ __wt_spin_lock(session, &cache->evict_queues[q].evict_lock);
+ elem = cache->evict_queues[q].evict_max;
+ for (i = 0, evict = cache->evict_queues[q].evict_queue;
+ i < elem; i++, evict++)
+ if (evict->btree == btree)
+ __evict_list_clear(session, evict);
+ __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock);
+ }
+
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
/*
* We have disabled further eviction: wait for concurrent LRU eviction
@@ -870,6 +879,7 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session)
__wt_spin_unlock(session, &cache->evict_walk_lock);
}
+#define APP_EVICT_THRESHOLD 3 /* Threshold to help evict */
/*
* __evict_lru_pages --
* Get pages from the LRU queue to evict.
@@ -877,7 +887,27 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session)
static int
__evict_lru_pages(WT_SESSION_IMPL *session, bool is_server)
{
+ WT_CACHE *cache;
WT_DECL_RET;
+ uint64_t app_evict_percent, total_evict;
+
+ /*
+ * The server will not help evict if the workers are coping with
+ * eviction workload, that is, if fewer than the threshold of the
+ * pages are evicted by application threads.
+ */
+ if (is_server && S2C(session)->evict_workers > 1) {
+ cache = S2C(session)->cache;
+ total_evict = cache->app_evicts +
+ cache->server_evicts + cache->worker_evicts;
+ app_evict_percent = (100 * cache->app_evicts) /
+ (total_evict + 1);
+ if (app_evict_percent < APP_EVICT_THRESHOLD) {
+ WT_STAT_FAST_CONN_INCR(session,
+ cache_eviction_server_not_evicting);
+ return (0);
+ }
+ }
/*
* Reconcile and discard some pages: EBUSY is returned if a page fails
@@ -897,23 +927,26 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
WT_DECL_RET;
- uint64_t read_gen_oldest;
- uint32_t candidates, entries;
+ WT_EVICT_QUEUE *evict_queue;
+ uint64_t cutoff, read_gen_oldest;
+ uint32_t candidates, entries, queue_index;
cache = S2C(session)->cache;
+ queue_index = cache->evict_queue_fill++ % WT_EVICT_QUEUE_MAX;
+ evict_queue = &cache->evict_queues[queue_index];
/* Get some more pages to consider for eviction. */
- if ((ret = __evict_walk(session)) != 0)
+ if ((ret = __evict_walk(cache->walk_session, queue_index)) != 0)
return (ret == EBUSY ? 0 : ret);
/* Sort the list into LRU order and restart. */
- __wt_spin_lock(session, &cache->evict_lock);
+ __wt_spin_lock(session, &evict_queue->evict_lock);
- entries = cache->evict_entries;
- qsort(cache->evict_queue,
+ entries = evict_queue->evict_entries;
+ qsort(evict_queue->evict_queue,
entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp);
- while (entries > 0 && cache->evict_queue[entries - 1].ref == NULL)
+ while (entries > 0 && evict_queue->evict_queue[entries - 1].ref == NULL)
--entries;
/*
@@ -922,9 +955,10 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
* candidates so we never end up with more candidates than entries.
*/
while (entries > WT_EVICT_WALK_BASE)
- __evict_list_clear(session, &cache->evict_queue[--entries]);
+ __evict_list_clear(session,
+ &evict_queue->evict_queue[--entries]);
- cache->evict_entries = entries;
+ evict_queue->evict_entries = entries;
if (entries == 0) {
/*
@@ -932,9 +966,12 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
* Make sure application threads don't read past the end of the
* candidate list, or they may race with the next walk.
*/
- cache->evict_candidates = 0;
+ evict_queue->evict_candidates = 0;
+ __wt_spin_unlock(session, &evict_queue->evict_lock);
+ __wt_spin_lock(session, &cache->evict_queue_lock);
cache->evict_current = NULL;
- __wt_spin_unlock(session, &cache->evict_lock);
+ cache->evict_current_queue = NULL;
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
return (0);
}
@@ -945,7 +982,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
* Take all candidates if we only gathered pages with an oldest
* read generation set.
*/
- cache->evict_candidates = entries;
+ evict_queue->evict_candidates = entries;
} else {
/*
* Find the oldest read generation we have in the queue, used
@@ -955,7 +992,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
read_gen_oldest = WT_READGEN_OLDEST;
for (candidates = 0; candidates < entries; ++candidates) {
read_gen_oldest =
- cache->evict_queue[candidates].score;
+ __evict_read_gen(
+ &evict_queue->evict_queue[candidates]);
if (read_gen_oldest != WT_READGEN_OLDEST)
break;
}
@@ -964,68 +1002,58 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
* Take all candidates if we only gathered pages with an oldest
* read generation set.
*
- * We normally never take more than 50% of the entries but if
- * 50% of the entries were at the oldest read generation, take
- * all of them.
+ * We normally never take more than 50% of the entries; if 50%
+ * of the entries were at the oldest read generation, take them.
*/
if (read_gen_oldest == WT_READGEN_OLDEST)
- cache->evict_candidates = entries;
+ evict_queue->evict_candidates = entries;
else if (candidates >= entries / 2)
- cache->evict_candidates = candidates;
+ evict_queue->evict_candidates = candidates;
else {
+ /* Save the calculated oldest generation. */
+ cache->read_gen_oldest = read_gen_oldest;
+
+ /* Find the bottom 25% of read generations. */
+ cutoff =
+ (3 * read_gen_oldest + __evict_read_gen(
+ &evict_queue->evict_queue[entries - 1])) / 4;
+
/*
- * Take all of the urgent pages plus a third of
- * ordinary candidates (which could be expressed as
- * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the
- * steady state, we want to get as many candidates as
- * the eviction walk adds to the queue.
- *
- * That said, if there is only one entry, which is
- * normal when populating an empty file, don't exclude
- * it.
+ * Don't take less than 10% or more than 50% of entries,
+ * regardless. That said, if there is only one entry,
+ * which is normal when populating an empty file, don't
+ * exclude it.
*/
- cache->evict_candidates =
- 1 + candidates + ((entries - candidates) - 1) / 3;
- cache->read_gen_oldest = read_gen_oldest;
+ for (candidates = 1 + entries / 10;
+ candidates < entries / 2;
+ candidates++)
+ if (__evict_read_gen(
+ &evict_queue->evict_queue[candidates]) >
+ cutoff)
+ break;
+ evict_queue->evict_candidates = candidates;
}
}
- cache->evict_current = cache->evict_queue;
- __wt_spin_unlock(session, &cache->evict_lock);
-
+ __wt_spin_unlock(session, &evict_queue->evict_lock);
/*
- * The eviction server thread doesn't do any actual eviction if there
- * are multiple eviction workers running.
+ * Now we can set the next queue.
*/
- WT_RET(__wt_cond_signal(session, cache->evict_waiter_cond));
-
- return (0);
-}
-
-/*
- * __evict_server_work --
- * Evict pages from the cache based on their read generation.
- */
-static int
-__evict_server_work(WT_SESSION_IMPL *session)
-{
- WT_CACHE *cache;
-
- cache = S2C(session)->cache;
+ __wt_spin_lock(session, &cache->evict_queue_lock);
+ if (cache->evict_current == NULL)
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_empty);
+ else
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_not_empty);
- if (S2C(session)->evict_workers > 1) {
- WT_STAT_FAST_CONN_INCR(
- session, cache_eviction_server_not_evicting);
+ cache->evict_current = evict_queue->evict_queue;
+ cache->evict_current_queue = evict_queue;
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
- /*
- * If there are candidates queued, give other threads a chance
- * to access them before gathering more.
- */
- if (cache->evict_candidates > 10 &&
- cache->evict_current != NULL)
- __wt_yield();
- } else
- WT_RET_NOTFOUND_OK(__evict_lru_pages(session, true));
+ /*
+ * Signal any application or helper threads that may be waiting
+ * to help with eviction.
+ */
+ WT_RET(__wt_cond_signal(session, cache->evict_waiter_cond));
return (0);
}
@@ -1035,14 +1063,16 @@ __evict_server_work(WT_SESSION_IMPL *session)
* Fill in the array by walking the next set of pages.
*/
static int
-__evict_walk(WT_SESSION_IMPL *session)
+__evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index)
{
WT_BTREE *btree;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- u_int max_entries, prev_slot, retries, slot, start_slot, spins;
+ WT_EVICT_QUEUE *evict_queue;
+ u_int max_entries, prev_slot, retries;
+ u_int slot, start_slot, spins;
bool dhandle_locked, incr;
conn = S2C(session);
@@ -1052,24 +1082,20 @@ __evict_walk(WT_SESSION_IMPL *session)
dhandle_locked = incr = false;
retries = 0;
- if (cache->evict_current == NULL)
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_empty);
- else
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_not_empty);
-
/*
* Set the starting slot in the queue and the maximum pages added
* per walk.
*/
- start_slot = slot = cache->evict_entries;
- max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots);
+ evict_queue = &cache->evict_queues[queue_index];
+ start_slot = slot = evict_queue->evict_entries;
+ max_entries = slot + WT_EVICT_WALK_INCR;
retry: while (slot < max_entries && ret == 0) {
/*
* If another thread is waiting on the eviction server to clear
* the walk point in a tree, give up.
*/
- if (F_ISSET(cache, WT_CACHE_CLEAR_WALKS))
+ if (cache->pass_intr != 0)
break;
/*
@@ -1079,7 +1105,7 @@ retry: while (slot < max_entries && ret == 0) {
if (!dhandle_locked) {
for (spins = 0; (ret = __wt_spin_trylock(
session, &conn->dhandle_lock)) == EBUSY &&
- !F_ISSET(cache, WT_CACHE_CLEAR_WALKS);
+ cache->pass_intr == 0;
spins++) {
if (spins < WT_THOUSAND)
__wt_yield();
@@ -1145,6 +1171,7 @@ retry: while (slot < max_entries && ret == 0) {
* useful in the past.
*/
if (btree->evict_walk_period != 0 &&
+ evict_queue->evict_entries >= WT_EVICT_WALK_INCR &&
btree->evict_walk_skips++ < btree->evict_walk_period)
continue;
btree->evict_walk_skips = 0;
@@ -1171,7 +1198,7 @@ retry: while (slot < max_entries && ret == 0) {
cache->evict_file_next = dhandle;
WT_WITH_DHANDLE(session, dhandle,
ret = __evict_walk_file(
- session, max_entries, &slot));
+ session, queue_index, &slot));
WT_ASSERT(session, session->split_gen == 0);
}
__wt_spin_unlock(session, &cache->evict_walk_lock);
@@ -1204,17 +1231,17 @@ retry: while (slot < max_entries && ret == 0) {
* Try two passes through all the files, give up when we have some
* candidates and we aren't finding more.
*/
- if (!F_ISSET(cache, WT_CACHE_CLEAR_WALKS) && ret == 0 &&
+ if (cache->pass_intr == 0 && ret == 0 &&
slot < max_entries && (retries < 2 ||
(retries < 10 &&
!FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) &&
- (slot == cache->evict_entries || slot > start_slot)))) {
+ (slot == evict_queue->evict_entries || slot > start_slot)))) {
start_slot = slot;
++retries;
goto retry;
}
- cache->evict_entries = slot;
+ evict_queue->evict_entries = slot;
return (ret);
}
@@ -1223,24 +1250,20 @@ retry: while (slot < max_entries && ret == 0) {
* Initialize a WT_EVICT_ENTRY structure with a given page.
*/
static void
-__evict_init_candidate(
- WT_SESSION_IMPL *session, WT_EVICT_ENTRY *evict, WT_REF *ref)
+__evict_init_candidate(WT_SESSION_IMPL *session,
+ WT_EVICT_QUEUE *evict_queue, WT_EVICT_ENTRY *evict, WT_REF *ref)
{
- WT_CACHE *cache;
u_int slot;
- cache = S2C(session)->cache;
-
/* Keep track of the maximum slot we are using. */
- slot = (u_int)(evict - cache->evict_queue);
- if (slot >= cache->evict_max)
- cache->evict_max = slot + 1;
+ slot = (u_int)(evict - evict_queue->evict_queue);
+ if (slot >= evict_queue->evict_max)
+ evict_queue->evict_max = slot + 1;
if (evict->ref != NULL)
__evict_list_clear(session, evict);
- evict->btree = S2BT(session);
evict->ref = ref;
- evict->score = __evict_read_gen(evict);
+ evict->btree = S2BT(session);
/* Mark the page on the list; set last to flush the other updates. */
F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU);
@@ -1251,65 +1274,34 @@ __evict_init_candidate(
* Get a few page eviction candidates from a single underlying file.
*/
static int
-__evict_walk_file(WT_SESSION_IMPL *session, u_int max_entries, u_int *slotp)
+__evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp)
{
WT_BTREE *btree;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_EVICT_ENTRY *end, *evict, *start;
+ WT_EVICT_QUEUE *evict_queue;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
WT_REF *ref;
- uint64_t btree_inuse, bytes_per_slot, cache_inuse;
uint64_t pages_seen, refs_walked;
- uint32_t remaining_slots, target_pages, total_slots, walk_flags;
+ uint32_t walk_flags;
int internal_pages, restarts;
bool enough, modified;
conn = S2C(session);
btree = S2BT(session);
cache = conn->cache;
+ evict_queue = &cache->evict_queues[queue_index];
internal_pages = restarts = 0;
enough = false;
- /*
- * Figure out how many slots to fill from this tree.
- * Note that some care is taken in the calculation to avoid overflow.
- */
- start = cache->evict_queue + *slotp;
- btree_inuse = __wt_btree_bytes_inuse(session);
- cache_inuse = __wt_cache_bytes_inuse(cache);
- remaining_slots = max_entries - *slotp;
- total_slots = max_entries - cache->evict_entries;
- target_pages = (uint32_t)(btree_inuse /
- (cache_inuse / total_slots));
-
- /*
- * The target number of pages for this tree is proportional to the
- * space it is taking up in cache. Round to the nearest number of
- * slots so we assign all of the slots to a tree filling 99+% of the
- * cache (and only have to walk it once).
- */
- bytes_per_slot = cache_inuse / total_slots;
- target_pages = (uint32_t)(
- (btree_inuse + bytes_per_slot / 2) / bytes_per_slot);
- if (target_pages == 0) {
- /*
- * Randomly walk trees with a tiny fraction of the cache in
- * case there are so many trees that none of them use enough of
- * the cache to be allocated slots.
- */
- if (__wt_random(&session->rnd) / (double)UINT32_MAX >
- btree_inuse / (double)cache_inuse)
- return (0);
- target_pages = 10;
- }
-
+ start = evict_queue->evict_queue + *slotp;
+ end = start + WT_EVICT_WALK_PER_FILE;
if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
- target_pages > remaining_slots)
- target_pages = remaining_slots;
- end = start + target_pages;
+ end > evict_queue->evict_queue + cache->evict_slots)
+ end = evict_queue->evict_queue + cache->evict_slots;
walk_flags =
WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT;
@@ -1430,7 +1422,7 @@ fast: /* If the page can't be evicted, give up. */
}
WT_ASSERT(session, evict->ref == NULL);
- __evict_init_candidate(session, evict, ref);
+ __evict_init_candidate(session, evict_queue, evict, ref);
++evict;
if (WT_PAGE_IS_INTERNAL(page))
@@ -1472,6 +1464,43 @@ fast: /* If the page can't be evicted, give up. */
}
/*
+ * __evict_check_entry_size --
+ * Check if the size of an entry is too large for this thread to evict.
+ * We use this so that the server thread doesn't get stalled evicting
+ * a very large page.
+ */
+static bool
+__evict_check_entry_size(WT_SESSION_IMPL *session, WT_EVICT_ENTRY *entry)
+{
+ WT_CACHE *cache;
+ WT_PAGE *page;
+ WT_REF *ref;
+ uint64_t max;
+
+ cache = S2C(session)->cache;
+
+ if (cache->pages_evict == 0)
+ return (true);
+
+ max = (cache->bytes_evict / cache->pages_evict) * 4;
+ if ((ref = entry->ref) != NULL) {
+ if ((page = ref->page) == NULL)
+ return (true);
+ /*
+ * If this page is more than four times the average evicted page
+ * size then return false. Return true in all other cases.
+ * XXX Should we care here if the page is dirty? Probably...
+ */
+ if (page->memory_footprint > max) {
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_server_toobig);
+ return (false);
+ }
+ }
+ return (true);
+}
+
+/*
* __evict_get_ref --
* Get a page for eviction.
*/
@@ -1481,6 +1510,7 @@ __evict_get_ref(
{
WT_CACHE *cache;
WT_EVICT_ENTRY *evict;
+ WT_EVICT_QUEUE *evict_queue;
uint32_t candidates;
cache = S2C(session)->cache;
@@ -1488,39 +1518,63 @@ __evict_get_ref(
*refp = NULL;
/*
- * Avoid the LRU lock if no pages are available. If there are pages
- * available, spin until we get the lock. If this function returns
- * without getting a page to evict, application threads assume there
- * are no more pages available and will attempt to wake the eviction
- * server.
+ * Avoid the LRU lock if no pages are available.
*/
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref);
+ if (cache->evict_current == NULL) {
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty);
+ return (WT_NOTFOUND);
+ }
+ __wt_spin_lock(session, &cache->evict_queue_lock);
+ /*
+ * Verify there are still pages available.
+ */
+ if (cache->evict_current == NULL) {
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty2);
+ return (WT_NOTFOUND);
+ }
+ /*
+ * We got the queue lock, which should be fast, and now we want to
+ * get the lock on the individual queue. We know that the shared
+ * queue fields cannot change now.
+ */
+ evict_queue = cache->evict_current_queue;
for (;;) {
- if (cache->evict_current == NULL)
- return (WT_NOTFOUND);
- if (__wt_spin_trylock(session, &cache->evict_lock) == 0)
+ if (__wt_spin_trylock(session, &evict_queue->evict_lock) == 0)
break;
- if (!F_ISSET(session, WT_SESSION_INTERNAL))
+ if (!F_ISSET(session, WT_SESSION_INTERNAL)) {
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
return (WT_NOTFOUND);
+ }
__wt_yield();
}
-
/*
* Only evict half of the pages before looking for more. The remainder
* are left to eviction workers (if configured), or application threads
* if necessary.
*/
- candidates = cache->evict_candidates;
+ candidates = evict_queue->evict_candidates;
if (is_server && candidates > 1)
candidates /= 2;
/* Get the next page queued for eviction. */
- for (evict = cache->evict_current;
- evict >= cache->evict_queue &&
- evict < cache->evict_queue + candidates;
- ++evict) {
- if (evict->ref == NULL)
- continue;
+ while ((evict = cache->evict_current) != NULL &&
+ evict < evict_queue->evict_queue + candidates &&
+ evict->ref != NULL) {
WT_ASSERT(session, evict->btree != NULL);
+ /*
+ * If the server is helping out and encounters an entry that
+ * is too large, it stops helping. Evicting a very large
+ * page in the server thread could stall eviction from finding
+ * new work.
+ */
+ if (is_server && S2C(session)->evict_workers > 1 &&
+ !__evict_check_entry_size(session, evict))
+ break;
+
+ /* Move to the next item. */
+ ++cache->evict_current;
/*
* Lock the page while holding the eviction mutex to prevent
@@ -1551,12 +1605,10 @@ __evict_get_ref(
}
/* Clear the current pointer if there are no more candidates. */
- if (evict == NULL || evict + 1 >=
- cache->evict_queue + cache->evict_candidates)
+ if (evict >= evict_queue->evict_queue + evict_queue->evict_candidates)
cache->evict_current = NULL;
- else
- cache->evict_current = evict + 1;
- __wt_spin_unlock(session, &cache->evict_lock);
+ __wt_spin_unlock(session, &evict_queue->evict_lock);
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
return ((*refp == NULL) ? WT_NOTFOUND : 0);
}
@@ -1569,27 +1621,34 @@ static int
__evict_page(WT_SESSION_IMPL *session, bool is_server)
{
WT_BTREE *btree;
+ WT_CACHE *cache;
WT_DECL_RET;
WT_REF *ref;
WT_RET(__evict_get_ref(session, is_server, &btree, &ref));
WT_ASSERT(session, ref->state == WT_REF_LOCKED);
+ cache = S2C(session)->cache;
/*
* An internal session flags either the server itself or an eviction
* worker thread.
*/
- if (is_server)
- WT_STAT_FAST_CONN_INCR(
- session, cache_eviction_server_evicting);
- else if (F_ISSET(session, WT_SESSION_INTERNAL))
- WT_STAT_FAST_CONN_INCR(
- session, cache_eviction_worker_evicting);
- else {
+ if (F_ISSET(session, WT_SESSION_INTERNAL)) {
+ if (is_server) {
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_server_evicting);
+ cache->server_evicts++;
+ } else {
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_worker_evicting);
+ cache->worker_evicts++;
+ }
+ } else {
if (__wt_page_is_modified(ref->page))
WT_STAT_FAST_CONN_INCR(
session, cache_eviction_app_dirty);
WT_STAT_FAST_CONN_INCR(session, cache_eviction_app);
+ cache->app_evicts++;
}
/*
@@ -1685,7 +1744,6 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
/* Evict a page. */
switch (ret = __evict_page(session, false)) {
case 0:
- cache->app_evicts++;
if (txn_busy)
return (0);
/* FALLTHROUGH */
@@ -1738,9 +1796,9 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session)
int
__wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
{
+ FILE *fp;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle, *saved_dhandle;
- WT_FH *fh;
WT_PAGE *page;
WT_REF *next_walk;
uint64_t dirty_bytes, dirty_pages, intl_bytes, intl_pages;
@@ -1752,13 +1810,12 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
total_bytes = 0;
if (ofile == NULL)
- fh = WT_STDERR(session);
- else
- WT_RET(__wt_open(session, ofile, WT_FILE_TYPE_REGULAR,
- WT_OPEN_CREATE | WT_STREAM_WRITE, &fh));
+ fp = stderr;
+ else if ((fp = fopen(ofile, "w")) == NULL)
+ return (EIO);
/* Note: odd string concatenation avoids spelling errors. */
- (void)__wt_fprintf(session, fh, "==========\n" "cache dump\n");
+ (void)fprintf(fp, "==========\n" "cache dump\n");
saved_dhandle = session->dhandle;
TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
@@ -1797,24 +1854,22 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
session->dhandle = NULL;
if (dhandle->checkpoint == NULL)
- (void)__wt_fprintf(session, fh,
- "%s(<live>): \n", dhandle->name);
+ (void)fprintf(fp, "%s(<live>): \n", dhandle->name);
else
- (void)__wt_fprintf(session, fh,
- "%s(checkpoint=%s): \n",
+ (void)fprintf(fp, "%s(checkpoint=%s): \n",
dhandle->name, dhandle->checkpoint);
if (intl_pages != 0)
- (void)__wt_fprintf(session, fh,
+ (void)fprintf(fp,
"\t" "internal pages: %" PRIu64 " pages, %" PRIu64
" max, %" PRIu64 "MB total\n",
intl_pages, max_intl_bytes, intl_bytes >> 20);
if (leaf_pages != 0)
- (void)__wt_fprintf(session, fh,
+ (void)fprintf(fp,
"\t" "leaf pages: %" PRIu64 " pages, %" PRIu64
" max, %" PRIu64 "MB total\n",
leaf_pages, max_leaf_bytes, leaf_bytes >> 20);
if (dirty_pages != 0)
- (void)__wt_fprintf(session, fh,
+ (void)fprintf(fp,
"\t" "dirty pages: %" PRIu64 " pages, %" PRIu64
" max, %" PRIu64 "MB total\n",
dirty_pages, max_dirty_bytes, dirty_bytes >> 20);
@@ -1830,13 +1885,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
if (conn->cache->overhead_pct != 0)
total_bytes +=
(total_bytes * (uint64_t)conn->cache->overhead_pct) / 100;
- (void)__wt_fprintf(session, fh,
+ (void)fprintf(fp,
"cache dump: total found = %" PRIu64
"MB vs tracked inuse %" PRIu64 "MB\n",
total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20);
- (void)__wt_fprintf(session, fh, "==========\n");
- if (ofile != NULL)
- WT_RET(__wt_close(session, &fh));
+ (void)fprintf(fp, "==========\n");
+ if (ofile != NULL && fclose(fp) != 0)
+ return (EIO);
return (0);
}
#endif
diff --git a/src/include/api.h b/src/include/api.h
index c6a5af40698..50b2eab83b8 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -118,7 +118,7 @@
#define JOINABLE_CURSOR_CALL_CHECK(cur) \
if (F_ISSET(cur, WT_CURSTD_JOINED)) \
- WT_ERR(__wt_curindex_joined(cur))
+ WT_ERR(__wt_curjoin_joined(cur))
#define JOINABLE_CURSOR_API_CALL(cur, s, n, bt) \
CURSOR_API_CALL(cur, s, n, bt); \
diff --git a/src/include/bitstring.i b/src/include/bitstring.i
index 0d30e55d1ef..08746beb9b9 100644
--- a/src/include/bitstring.i
+++ b/src/include/bitstring.i
@@ -261,10 +261,10 @@ __bit_getv(uint8_t *bitf, uint64_t entry, uint8_t width)
* Return a record number's bit-field value.
*/
static inline uint8_t
-__bit_getv_recno(WT_PAGE *page, uint64_t recno, uint8_t width)
+__bit_getv_recno(WT_REF *ref, uint64_t recno, uint8_t width)
{
return (__bit_getv(
- page->pg_fix_bitf, recno - page->pg_fix_recno, width));
+ ref->page->pg_fix_bitf, recno - ref->ref_recno, width));
}
/*
@@ -305,13 +305,3 @@ __bit_setv(uint8_t *bitf, uint64_t entry, uint8_t width, uint8_t value)
__BIT_SET(1, 0x01);
}
}
-
-/*
- * __bit_setv_recno --
- * Set a record number's bit-field value.
- */
-static inline void
-__bit_setv_recno(WT_PAGE *page, uint64_t recno, uint8_t width, uint8_t value)
-{
- __bit_setv(page->pg_fix_bitf, recno - page->pg_fix_recno, width, value);
-}
diff --git a/src/include/block.h b/src/include/block.h
index e964fb4e8c2..a8080c1651c 100644
--- a/src/include/block.h
+++ b/src/include/block.h
@@ -174,6 +174,7 @@ struct __wt_bm {
int (*compact_start)(WT_BM *, WT_SESSION_IMPL *);
int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *);
+ int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t);
int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
int (*read)
(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t);
@@ -196,9 +197,9 @@ struct __wt_bm {
WT_BLOCK *block; /* Underlying file */
- void *map; /* Mapped region */
- size_t maplen;
- void *mappingcookie;
+ void *map; /* Mapped region */
+ size_t maplen;
+ void *mapped_cookie;
/*
* There's only a single block manager handle that can be written, all
@@ -224,8 +225,6 @@ struct __wt_block {
wt_off_t size; /* File size */
wt_off_t extend_size; /* File extended size */
wt_off_t extend_len; /* File extend chunk size */
- bool nowait_sync_available; /* File can flush asynchronously */
- bool preload_available; /* File pages can be preloaded */
/* Configuration information, set when the file is opened. */
uint32_t allocfirst; /* Allocation is first-fit */
@@ -262,6 +261,7 @@ struct __wt_block {
/* Verification support */
bool verify; /* If performing verification */
+ bool verify_layout; /* Print out file layout information */
bool verify_strict; /* Fail hard on any error */
wt_off_t verify_size; /* Checkpoint's file size */
WT_EXTLIST verify_alloc; /* Verification allocation list */
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 7cdf2bef43a..9700b6f4761 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -251,6 +251,7 @@ struct __wt_page_modify {
*/
union {
WT_ADDR replace; /* Single, written replacement block */
+#undef mod_replace
#define mod_replace u1.replace
struct { /* Multiple replacement blocks */
@@ -295,7 +296,9 @@ struct __wt_page_modify {
} *multi;
uint32_t multi_entries; /* Multiple blocks element count */
} m;
+#undef mod_multi
#define mod_multi u1.m.multi
+#undef mod_multi_entries
#define mod_multi_entries u1.m.multi_entries
} u1;
@@ -318,6 +321,7 @@ struct __wt_page_modify {
*/
WT_PAGE *root_split; /* Linked list of root split pages */
} intl;
+#undef mod_root_split
#define mod_root_split u2.intl.root_split
struct {
/*
@@ -344,10 +348,24 @@ struct __wt_page_modify {
* write any implicitly created deleted records for the page.
*/
uint64_t split_recno;
- } leaf;
-#define mod_append u2.leaf.append
-#define mod_update u2.leaf.update
-#define mod_split_recno u2.leaf.split_recno
+ } column_leaf;
+#undef mod_col_append
+#define mod_col_append u2.column_leaf.append
+#undef mod_col_update
+#define mod_col_update u2.column_leaf.update
+#undef mod_col_split_recno
+#define mod_col_split_recno u2.column_leaf.split_recno
+ struct {
+ /* Inserted items for row-store. */
+ WT_INSERT_HEAD **insert;
+
+ /* Updated items for row-stores. */
+ WT_UPDATE **update;
+ } row_leaf;
+#undef mod_row_insert
+#define mod_row_insert u2.row_leaf.insert
+#undef mod_row_update
+#define mod_row_update u2.row_leaf.update
} u2;
/*
@@ -433,7 +451,6 @@ struct __wt_page {
* doesn't read it multiple times).
*/
struct {
- uint64_t recno; /* Starting recno */
WT_REF *parent_ref; /* Parent reference */
struct __wt_page_index {
@@ -442,8 +459,7 @@ struct __wt_page {
WT_REF **index;
} * volatile __index; /* Collated children */
} intl;
-#undef pg_intl_recno
-#define pg_intl_recno u.intl.recno
+#undef pg_intl_parent_ref
#define pg_intl_parent_ref u.intl.parent_ref
/*
@@ -482,40 +498,19 @@ struct __wt_page {
/* Row-store leaf page. */
struct {
- /*
- * The column-store leaf page modification structures
- * live in the WT_PAGE_MODIFY structure to keep the
- * WT_PAGE structure as small as possible for read-only
- * pages. For consistency, we could move the row-store
- * modification structures into WT_PAGE_MODIFY too, but
- * that doesn't shrink WT_PAGE any further and it would
- * require really ugly naming inside of WT_PAGE_MODIFY
- * to avoid growing that structure.
- */
- WT_INSERT_HEAD **ins; /* Inserts */
- WT_UPDATE **upd; /* Updates */
-
WT_ROW *d; /* Key/value pairs */
uint32_t entries; /* Entries */
} row;
#undef pg_row_d
#define pg_row_d u.row.d
-#undef pg_row_ins
-#define pg_row_ins u.row.ins
-#undef pg_row_upd
-#define pg_row_upd u.row.upd
#undef pg_row_entries
#define pg_row_entries u.row.entries
/* Fixed-length column-store leaf page. */
struct {
- uint64_t recno; /* Starting recno */
-
uint8_t *bitf; /* Values */
uint32_t entries; /* Entries */
} col_fix;
-#undef pg_fix_recno
-#define pg_fix_recno u.col_fix.recno
#undef pg_fix_bitf
#define pg_fix_bitf u.col_fix.bitf
#undef pg_fix_entries
@@ -523,8 +518,6 @@ struct __wt_page {
/* Variable-length column-store leaf page. */
struct {
- uint64_t recno; /* Starting recno */
-
WT_COL *d; /* Values */
/*
@@ -537,8 +530,6 @@ struct __wt_page {
uint32_t entries; /* Entries */
} col_var;
-#undef pg_var_recno
-#define pg_var_recno u.col_var.recno
#undef pg_var_d
#define pg_var_d u.col_var.d
#undef pg_var_repeats
@@ -732,6 +723,10 @@ struct __wt_ref {
uint64_t recno; /* Column-store: starting recno */
void *ikey; /* Row-store: key */
} key;
+#undef ref_recno
+#define ref_recno key.recno
+#undef ref_ikey
+#define ref_ikey key.ikey
WT_PAGE_DELETED *page_del; /* Deleted on-disk page information */
};
@@ -1007,12 +1002,15 @@ struct __wt_insert_head {
* of pointers and the specific structure exist, else NULL.
*/
#define WT_ROW_INSERT_SLOT(page, slot) \
- ((page)->pg_row_ins == NULL ? NULL : (page)->pg_row_ins[slot])
+ ((page)->modify == NULL || \
+ (page)->modify->mod_row_insert == NULL ? \
+ NULL : (page)->modify->mod_row_insert[slot])
#define WT_ROW_INSERT(page, ip) \
WT_ROW_INSERT_SLOT(page, WT_ROW_SLOT(page, ip))
#define WT_ROW_UPDATE(page, ip) \
- ((page)->pg_row_upd == NULL ? \
- NULL : (page)->pg_row_upd[WT_ROW_SLOT(page, ip)])
+ ((page)->modify == NULL || \
+ (page)->modify->mod_row_update == NULL ? \
+ NULL : (page)->modify->mod_row_update[WT_ROW_SLOT(page, ip)])
/*
* WT_ROW_INSERT_SMALLEST references an additional slot past the end of the
* the "one per WT_ROW slot" insert array. That's because the insert array
@@ -1020,8 +1018,9 @@ struct __wt_insert_head {
* original page.
*/
#define WT_ROW_INSERT_SMALLEST(page) \
- ((page)->pg_row_ins == NULL ? \
- NULL : (page)->pg_row_ins[(page)->pg_row_entries])
+ ((page)->modify == NULL || \
+ (page)->modify->mod_row_insert == NULL ? \
+ NULL : (page)->modify->mod_row_insert[(page)->pg_row_entries])
/*
* The column-store leaf page update lists are arrays of pointers to structures,
@@ -1029,8 +1028,9 @@ struct __wt_insert_head {
* of pointers and the specific structure exist, else NULL.
*/
#define WT_COL_UPDATE_SLOT(page, slot) \
- ((page)->modify == NULL || (page)->modify->mod_update == NULL ? \
- NULL : (page)->modify->mod_update[slot])
+ ((page)->modify == NULL || \
+ (page)->modify->mod_col_update == NULL ? \
+ NULL : (page)->modify->mod_col_update[slot])
#define WT_COL_UPDATE(page, ip) \
WT_COL_UPDATE_SLOT(page, WT_COL_SLOT(page, ip))
@@ -1046,8 +1046,9 @@ struct __wt_insert_head {
* appends.
*/
#define WT_COL_APPEND(page) \
- ((page)->modify != NULL && (page)->modify->mod_append != NULL ? \
- (page)->modify->mod_append[0] : NULL)
+ ((page)->modify == NULL || \
+ (page)->modify->mod_col_append == NULL ? \
+ NULL : (page)->modify->mod_col_append[0])
/* WT_FIX_FOREACH walks fixed-length bit-fields on a disk page. */
#define WT_FIX_FOREACH(btree, dsk, v, i) \
diff --git a/src/include/btree.h b/src/include/btree.h
index 96097115afd..fd921677751 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -129,8 +129,6 @@ struct __wt_btree {
uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
uint64_t write_gen; /* Write generation */
- uint64_t bytes_inmem; /* Cache bytes in memory. */
-
WT_REF *evict_ref; /* Eviction thread's location */
uint64_t evict_priority; /* Relative priority of cached pages */
u_int evict_walk_period; /* Skip this many LRU walks */
diff --git a/src/include/btree.i b/src/include/btree.i
index 03f27861e75..e0102a11511 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -55,27 +55,6 @@ __wt_btree_block_free(
}
/*
- * __wt_btree_bytes_inuse --
- * Return the number of bytes in use.
- */
-static inline uint64_t
-__wt_btree_bytes_inuse(WT_SESSION_IMPL *session)
-{
- WT_CACHE *cache;
- uint64_t bytes_inuse;
-
- cache = S2C(session)->cache;
-
- /* Adjust the cache size to take allocation overhead into account. */
- bytes_inuse = S2BT(session)->bytes_inmem;
- if (cache->overhead_pct != 0)
- bytes_inuse +=
- (bytes_inuse * (uint64_t)cache->overhead_pct) / 100;
-
- return (bytes_inuse);
-}
-
-/*
* __wt_cache_page_inmem_incr --
* Increment a page's memory footprint in the cache.
*/
@@ -87,7 +66,6 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
WT_ASSERT(session, size < WT_EXABYTE);
cache = S2C(session)->cache;
- (void)__wt_atomic_add64(&S2BT(session)->bytes_inmem, size);
(void)__wt_atomic_add64(&cache->bytes_inmem, size);
(void)__wt_atomic_addsize(&page->memory_footprint, size);
if (__wt_page_is_modified(page)) {
@@ -218,8 +196,6 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
WT_ASSERT(session, size < WT_EXABYTE);
__wt_cache_decr_check_uint64(
- session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem");
- __wt_cache_decr_check_uint64(
session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem");
__wt_cache_decr_check_size(
session, &page->memory_footprint, size, "WT_PAGE.memory_footprint");
@@ -298,9 +274,8 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
modify = page->modify;
/* Update the bytes in-memory to reflect the eviction. */
- __wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem,
- page->memory_footprint, "WT_BTREE.bytes_inmem");
- __wt_cache_decr_check_uint64(session, &cache->bytes_inmem,
+ __wt_cache_decr_check_uint64(session,
+ &cache->bytes_inmem,
page->memory_footprint, "WT_CACHE.bytes_inmem");
/* Update the bytes_internal value to reflect the eviction */
@@ -536,8 +511,8 @@ __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep)
/*
* An internal page key is in one of two places: if we instantiated the
- * key (for example, when reading the page), WT_REF.key.ikey references
- * a WT_IKEY structure, otherwise WT_REF.key.ikey references an on-page
+ * key (for example, when reading the page), WT_REF.ref_ikey references
+ * a WT_IKEY structure, otherwise WT_REF.ref_ikey references an on-page
* key offset/length pair.
*
* Now the magic: allocated memory must be aligned to store any standard
@@ -561,14 +536,14 @@ __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep)
#define WT_IK_DECODE_KEY_LEN(v) ((v) >> 32)
#define WT_IK_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 1)
#define WT_IK_DECODE_KEY_OFFSET(v) (((v) & 0xFFFFFFFF) >> 1)
- v = (uintptr_t)ref->key.ikey;
+ v = (uintptr_t)ref->ref_ikey;
if (v & WT_IK_FLAG) {
*(void **)keyp =
WT_PAGE_REF_OFFSET(page, WT_IK_DECODE_KEY_OFFSET(v));
*sizep = WT_IK_DECODE_KEY_LEN(v);
} else {
- *(void **)keyp = WT_IKEY_DATA(ref->key.ikey);
- *sizep = ((WT_IKEY *)ref->key.ikey)->size;
+ *(void **)keyp = WT_IKEY_DATA(ref->ref_ikey);
+ *sizep = ((WT_IKEY *)ref->ref_ikey)->size;
}
}
@@ -587,7 +562,7 @@ __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack)
v = WT_IK_ENCODE_KEY_LEN(unpack->size) |
WT_IK_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) |
WT_IK_FLAG;
- ref->key.ikey = (void *)v;
+ ref->ref_ikey = (void *)v;
}
/*
@@ -602,8 +577,8 @@ __wt_ref_key_instantiated(WT_REF *ref)
/*
* See the comment in __wt_ref_key for an explanation of the magic.
*/
- v = (uintptr_t)ref->key.ikey;
- return (v & WT_IK_FLAG ? NULL : ref->key.ikey);
+ v = (uintptr_t)ref->ref_ikey;
+ return (v & WT_IK_FLAG ? NULL : ref->ref_ikey);
}
/*
@@ -616,10 +591,10 @@ __wt_ref_key_clear(WT_REF *ref)
/*
* The key union has 2 8B fields; this is equivalent to:
*
- * ref->key.recno = WT_RECNO_OOB;
- * ref->key.ikey = NULL;
+ * ref->ref_recno = WT_RECNO_OOB;
+ * ref->ref_ikey = NULL;
*/
- ref->key.recno = 0;
+ ref->ref_recno = 0;
}
/*
@@ -1385,7 +1360,7 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_CONNECTION_IMPL *conn;
WT_HAZARD *hp;
WT_SESSION_IMPL *s;
- uint32_t i, hazard_size, session_cnt;
+ uint32_t i, j, hazard_size, max, session_cnt;
conn = S2C(session);
@@ -1397,15 +1372,28 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page)
* come or go, we'll check the slots for all of the sessions that could
* have been active when we started our check.
*/
+ WT_STAT_FAST_CONN_INCR(session, cache_hazard_checks);
WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
+ for (s = conn->sessions, i = 0, j = 0, max = 0;
+ i < session_cnt; ++s, ++i) {
if (!s->active)
continue;
WT_ORDERED_READ(hazard_size, s->hazard_size);
- for (hp = s->hazard; hp < s->hazard + hazard_size; ++hp)
- if (hp->page == page)
+ if (s->hazard_size > max) {
+ max = s->hazard_size;
+ WT_STAT_FAST_CONN_SET(session,
+ cache_hazard_max, max);
+ }
+ for (hp = s->hazard; hp < s->hazard + hazard_size; ++hp) {
+ ++j;
+ if (hp->page == page) {
+ WT_STAT_FAST_CONN_INCRV(session,
+ cache_hazard_walks, j);
return (hp);
+ }
+ }
}
+ WT_STAT_FAST_CONN_INCRV(session, cache_hazard_walks, j);
return (NULL);
}
diff --git a/src/include/btree_cmp.i b/src/include/btree_cmp.i
index 1993c1be293..23a462e4e50 100644
--- a/src/include/btree_cmp.i
+++ b/src/include/btree_cmp.i
@@ -52,8 +52,8 @@ __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
for (; len > 0;
len -= WT_VECTOR_SIZE,
userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) {
- u = _mm_load_si128((__m128i *)userp);
- t = _mm_load_si128((__m128i *)treep);
+ u = _mm_load_si128((const __m128i *)userp);
+ t = _mm_load_si128((const __m128i *)treep);
res_eq = _mm_cmpeq_epi8(u, t);
if (_mm_movemask_epi8(res_eq) != 65535)
break;
@@ -62,8 +62,8 @@ __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
for (; len > 0;
len -= WT_VECTOR_SIZE,
userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) {
- u = _mm_loadu_si128((__m128i *)userp);
- t = _mm_loadu_si128((__m128i *)treep);
+ u = _mm_loadu_si128((const __m128i *)userp);
+ t = _mm_loadu_si128((const __m128i *)treep);
res_eq = _mm_cmpeq_epi8(u, t);
if (_mm_movemask_epi8(res_eq) != 65535)
break;
@@ -123,8 +123,8 @@ __wt_lex_compare_skip(
tsz = tree_item->size;
len = WT_MIN(usz, tsz) - *matchp;
- userp = (uint8_t *)user_item->data + *matchp;
- treep = (uint8_t *)tree_item->data + *matchp;
+ userp = (const uint8_t *)user_item->data + *matchp;
+ treep = (const uint8_t *)tree_item->data + *matchp;
#ifdef HAVE_X86INTRIN_H
/* Use vector instructions if we'll execute at least 2 of them. */
@@ -139,8 +139,8 @@ __wt_lex_compare_skip(
len -= WT_VECTOR_SIZE,
userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE,
*matchp += WT_VECTOR_SIZE) {
- u = _mm_load_si128((__m128i *)userp);
- t = _mm_load_si128((__m128i *)treep);
+ u = _mm_load_si128((const __m128i *)userp);
+ t = _mm_load_si128((const __m128i *)treep);
res_eq = _mm_cmpeq_epi8(u, t);
if (_mm_movemask_epi8(res_eq) != 65535)
break;
@@ -150,8 +150,8 @@ __wt_lex_compare_skip(
len -= WT_VECTOR_SIZE,
userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE,
*matchp += WT_VECTOR_SIZE) {
- u = _mm_loadu_si128((__m128i *)userp);
- t = _mm_loadu_si128((__m128i *)treep);
+ u = _mm_loadu_si128((const __m128i *)userp);
+ t = _mm_loadu_si128((const __m128i *)treep);
res_eq = _mm_cmpeq_epi8(u, t);
if (_mm_movemask_epi8(res_eq) != 65535)
break;
diff --git a/src/include/cache.h b/src/include/cache.h
index f683ed6b0f8..f4a35de7201 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -13,6 +13,7 @@
#define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal
pages by this many increments of the
read generation. */
+#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */
#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
@@ -23,7 +24,19 @@
struct __wt_evict_entry {
WT_BTREE *btree; /* Enclosing btree object */
WT_REF *ref; /* Page to flush/evict */
- uint64_t score; /* Relative eviction priority */
+};
+
+#define WT_EVICT_QUEUE_MAX 2
+/*
+ * WT_EVICT_QUEUE --
+ * Encapsulation of an eviction candidate queue.
+ */
+struct __wt_evict_queue {
+ WT_SPINLOCK evict_lock; /* Eviction LRU queue */
+ WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */
+ uint32_t evict_candidates; /* LRU list pages to evict */
+ uint32_t evict_entries; /* LRU entries in the queue */
+ volatile uint32_t evict_max; /* LRU maximum eviction slot used */
};
/*
@@ -63,14 +76,20 @@ struct __wt_cache {
uint64_t bytes_overflow; /* Bytes of overflow pages */
uint64_t bytes_evict; /* Bytes/pages discarded by eviction */
uint64_t pages_evict;
+ uint64_t pages_evicted; /* Pages evicted during a pass */
uint64_t bytes_dirty; /* Bytes/pages currently dirty */
uint64_t pages_dirty;
uint64_t bytes_read; /* Bytes read into memory */
- uint64_t app_evicts; /* Pages evicted by user threads */
uint64_t app_waits; /* User threads waited for cache */
+ uint64_t app_evicts; /* Pages evicted by user threads */
+ uint64_t server_evicts; /* Pages evicted by server thread */
+ uint64_t worker_evicts; /* Pages evicted by worker threads */
uint64_t evict_max_page_size; /* Largest page seen at eviction */
+#ifdef HAVE_DIAGNOSTIC
+ struct timespec stuck_ts; /* Stuck timestamp */
+#endif
/*
* Read information.
@@ -83,7 +102,6 @@ struct __wt_cache {
* Eviction thread information.
*/
WT_CONDVAR *evict_cond; /* Eviction server condition */
- WT_SPINLOCK evict_lock; /* Eviction LRU queue */
WT_SPINLOCK evict_walk_lock; /* Eviction walk location */
/* Condition signalled when the eviction server populates the queue */
WT_CONDVAR *evict_waiter_cond;
@@ -98,11 +116,13 @@ struct __wt_cache {
/*
* LRU eviction list information.
*/
- WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */
+ WT_SPINLOCK evict_pass_lock; /* Eviction pass lock */
+ WT_SESSION_IMPL *walk_session; /* Eviction pass session */
+ WT_SPINLOCK evict_queue_lock; /* Eviction current queue lock */
+ WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX];
+ WT_EVICT_QUEUE *evict_current_queue;/* LRU current queue in use */
WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */
- uint32_t evict_candidates; /* LRU list pages to evict */
- uint32_t evict_entries; /* LRU entries in the queue */
- volatile uint32_t evict_max; /* LRU maximum eviction slot used */
+ uint32_t evict_queue_fill; /* LRU eviction queue index to fill */
uint32_t evict_slots; /* LRU list eviction slots */
WT_DATA_HANDLE
*evict_file_next; /* LRU next file to search */
@@ -130,19 +150,28 @@ struct __wt_cache {
#define WT_EVICT_PASS_DIRTY 0x04
#define WT_EVICT_PASS_WOULD_BLOCK 0x08
uint32_t state;
+ /*
+ * Pass interrupt counter.
+ */
+ uint32_t pass_intr; /* Interrupt eviction pass. */
/*
* Flags.
*/
#define WT_CACHE_POOL_MANAGER 0x01 /* The active cache pool manager */
#define WT_CACHE_POOL_RUN 0x02 /* Cache pool thread running */
-#define WT_CACHE_CLEAR_WALKS 0x04 /* Clear eviction walks */
-#define WT_CACHE_STUCK 0x08 /* Eviction server is stuck */
-#define WT_CACHE_WALK_REVERSE 0x10 /* Scan backwards for candidates */
-#define WT_CACHE_WOULD_BLOCK 0x20 /* Pages that would block apps */
+#define WT_CACHE_STUCK 0x04 /* Eviction server is stuck */
+#define WT_CACHE_WALK_REVERSE 0x08 /* Scan backwards for candidates */
+#define WT_CACHE_WOULD_BLOCK 0x10 /* Pages that would block apps */
uint32_t flags;
};
+#define WT_WITH_PASS_LOCK(session, ret, op) do { \
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_PASS)); \
+ WT_WITH_LOCK(session, ret, \
+ &cache->evict_pass_lock, WT_SESSION_LOCKED_PASS, op); \
+} while (0)
+
/*
* WT_CACHE_POOL --
* A structure that represents a shared cache.
diff --git a/src/include/cache.i b/src/include/cache.i
index 8cf7555e716..72c8307756d 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -166,6 +166,13 @@ __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
cache = conn->cache;
/*
+ * If the connection is closing we do not need eviction from an
+ * application thread. The eviction subsystem is already closed.
+ */
+ if (F_ISSET(conn, WT_CONN_CLOSING))
+ return (false);
+
+ /*
* Avoid division by zero if the cache size has not yet been set in a
* shared cache.
*/
@@ -179,6 +186,15 @@ __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
pct_full = (u_int)((100 * bytes_inuse) / bytes_max);
if (pct_fullp != NULL)
*pct_fullp = pct_full;
+ /*
+ * If the connection is closing we do not need eviction from an
+ * application thread. The eviction subsystem is already closed.
+ * We return here because some callers depend on the percent full
+ * having been filled in.
+ */
+ if (F_ISSET(conn, WT_CONN_CLOSING))
+ return (false);
+
if (pct_full > cache->eviction_trigger)
return (true);
diff --git a/src/include/cell.i b/src/include/cell.i
index 481d2a29764..c130768e595 100644
--- a/src/include/cell.i
+++ b/src/include/cell.i
@@ -183,9 +183,9 @@ __wt_cell_pack_addr(WT_CELL *cell, u_int cell_type, uint64_t recno, size_t size)
p = cell->__chunk + 1;
if (recno == WT_RECNO_OOB)
- cell->__chunk[0] = cell_type; /* Type */
+ cell->__chunk[0] = (uint8_t)cell_type; /* Type */
else {
- cell->__chunk[0] = cell_type | WT_CELL_64V;
+ cell->__chunk[0] = (uint8_t)(cell_type | WT_CELL_64V);
(void)__wt_vpack_uint(&p, 0, recno); /* Record number */
}
(void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */
@@ -207,8 +207,8 @@ __wt_cell_pack_data(WT_CELL *cell, uint64_t rle, size_t size)
*/
if (rle < 2 && size <= WT_CELL_SHORT_MAX) {
byte = (uint8_t)size; /* Type + length */
- cell->__chunk[0] =
- (byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT;
+ cell->__chunk[0] = (uint8_t)
+ ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT);
return (1);
}
@@ -331,8 +331,8 @@ __wt_cell_pack_int_key(WT_CELL *cell, size_t size)
/* Short keys have 6 bits of data length in the descriptor byte. */
if (size <= WT_CELL_SHORT_MAX) {
byte = (uint8_t)size;
- cell->__chunk[0] =
- (byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT;
+ cell->__chunk[0] = (uint8_t)
+ ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT);
return (1);
}
@@ -358,14 +358,14 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size)
if (size <= WT_CELL_SHORT_MAX) {
if (prefix == 0) {
byte = (uint8_t)size; /* Type + length */
- cell->__chunk[0] =
- (byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT;
+ cell->__chunk[0] = (uint8_t)
+ ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT);
return (1);
} else {
byte = (uint8_t)size; /* Type + length */
- cell->__chunk[0] =
- (byte << WT_CELL_SHORT_SHIFT) |
- WT_CELL_KEY_SHORT_PFX;
+ cell->__chunk[0] = (uint8_t)
+ ((byte << WT_CELL_SHORT_SHIFT) |
+ WT_CELL_KEY_SHORT_PFX);
cell->__chunk[1] = prefix; /* Prefix */
return (2);
}
@@ -585,8 +585,8 @@ restart:
WT_CELL_LEN_CHK(cell, 0);
unpack->cell = cell;
unpack->v = 0;
- unpack->raw = __wt_cell_type_raw(cell);
- unpack->type = __wt_cell_type(cell);
+ unpack->raw = (uint8_t)__wt_cell_type_raw(cell);
+ unpack->type = (uint8_t)__wt_cell_type(cell);
unpack->ovfl = 0;
/*
diff --git a/src/include/column.i b/src/include/column.i
index d64e68420a5..d15f874b281 100644
--- a/src/include/column.i
+++ b/src/include/column.i
@@ -209,9 +209,12 @@ __col_insert_search(WT_INSERT_HEAD *ins_head,
* Return the last record number for a variable-length column-store page.
*/
static inline uint64_t
-__col_var_last_recno(WT_PAGE *page)
+__col_var_last_recno(WT_REF *ref)
{
WT_COL_RLE *repeat;
+ WT_PAGE *page;
+
+ page = ref->page;
/*
* If there's an append list, there may be more records on the page.
@@ -220,7 +223,7 @@ __col_var_last_recno(WT_PAGE *page)
*/
if (page->pg_var_nrepeats == 0)
return (page->pg_var_entries == 0 ? 0 :
- page->pg_var_recno + (page->pg_var_entries - 1));
+ ref->ref_recno + (page->pg_var_entries - 1));
repeat = &page->pg_var_repeats[page->pg_var_nrepeats - 1];
return ((repeat->recno + repeat->rle) - 1 +
@@ -232,15 +235,19 @@ __col_var_last_recno(WT_PAGE *page)
* Return the last record number for a fixed-length column-store page.
*/
static inline uint64_t
-__col_fix_last_recno(WT_PAGE *page)
+__col_fix_last_recno(WT_REF *ref)
{
+ WT_PAGE *page;
+
+ page = ref->page;
+
/*
* If there's an append list, there may be more records on the page.
* This function ignores those records, our callers must handle that
* explicitly, if they care.
*/
- return (page->pg_fix_entries == 0 ? 0 :
- page->pg_fix_recno + (page->pg_fix_entries - 1));
+ return (page->pg_fix_entries == 0 ?
+ 0 : ref->ref_recno + (page->pg_fix_entries - 1));
}
/*
@@ -248,12 +255,15 @@ __col_fix_last_recno(WT_PAGE *page)
* Search a variable-length column-store page for a record.
*/
static inline WT_COL *
-__col_var_search(WT_PAGE *page, uint64_t recno, uint64_t *start_recnop)
+__col_var_search(WT_REF *ref, uint64_t recno, uint64_t *start_recnop)
{
WT_COL_RLE *repeat;
+ WT_PAGE *page;
uint64_t start_recno;
uint32_t base, indx, limit, start_indx;
+ page = ref->page;
+
/*
* Find the matching slot.
*
@@ -285,7 +295,7 @@ __col_var_search(WT_PAGE *page, uint64_t recno, uint64_t *start_recnop)
*/
if (base == 0) {
start_indx = 0;
- start_recno = page->pg_var_recno;
+ start_recno = ref->ref_recno;
} else {
repeat = page->pg_var_repeats + (base - 1);
start_indx = repeat->indx + 1;
diff --git a/src/include/config.h b/src/include/config.h
index 48a255134af..486aa50e86c 100644
--- a/src/include/config.h
+++ b/src/include/config.h
@@ -59,41 +59,42 @@ struct __wt_config_parser_impl {
#define WT_CONFIG_ENTRY_WT_CONNECTION_load_extension 7
#define WT_CONFIG_ENTRY_WT_CONNECTION_open_session 8
#define WT_CONFIG_ENTRY_WT_CONNECTION_reconfigure 9
-#define WT_CONFIG_ENTRY_WT_CURSOR_close 10
-#define WT_CONFIG_ENTRY_WT_CURSOR_reconfigure 11
-#define WT_CONFIG_ENTRY_WT_SESSION_begin_transaction 12
-#define WT_CONFIG_ENTRY_WT_SESSION_checkpoint 13
-#define WT_CONFIG_ENTRY_WT_SESSION_close 14
-#define WT_CONFIG_ENTRY_WT_SESSION_commit_transaction 15
-#define WT_CONFIG_ENTRY_WT_SESSION_compact 16
-#define WT_CONFIG_ENTRY_WT_SESSION_create 17
-#define WT_CONFIG_ENTRY_WT_SESSION_drop 18
-#define WT_CONFIG_ENTRY_WT_SESSION_join 19
-#define WT_CONFIG_ENTRY_WT_SESSION_log_flush 20
-#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 21
-#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 22
-#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 23
-#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 24
-#define WT_CONFIG_ENTRY_WT_SESSION_rename 25
-#define WT_CONFIG_ENTRY_WT_SESSION_reset 26
-#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 27
-#define WT_CONFIG_ENTRY_WT_SESSION_salvage 28
-#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 29
-#define WT_CONFIG_ENTRY_WT_SESSION_strerror 30
-#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 31
-#define WT_CONFIG_ENTRY_WT_SESSION_truncate 32
-#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 33
-#define WT_CONFIG_ENTRY_WT_SESSION_verify 34
-#define WT_CONFIG_ENTRY_colgroup_meta 35
-#define WT_CONFIG_ENTRY_file_config 36
-#define WT_CONFIG_ENTRY_file_meta 37
-#define WT_CONFIG_ENTRY_index_meta 38
-#define WT_CONFIG_ENTRY_lsm_meta 39
-#define WT_CONFIG_ENTRY_table_meta 40
-#define WT_CONFIG_ENTRY_wiredtiger_open 41
-#define WT_CONFIG_ENTRY_wiredtiger_open_all 42
-#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 43
-#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 44
+#define WT_CONFIG_ENTRY_WT_CONNECTION_set_file_system 10
+#define WT_CONFIG_ENTRY_WT_CURSOR_close 11
+#define WT_CONFIG_ENTRY_WT_CURSOR_reconfigure 12
+#define WT_CONFIG_ENTRY_WT_SESSION_begin_transaction 13
+#define WT_CONFIG_ENTRY_WT_SESSION_checkpoint 14
+#define WT_CONFIG_ENTRY_WT_SESSION_close 15
+#define WT_CONFIG_ENTRY_WT_SESSION_commit_transaction 16
+#define WT_CONFIG_ENTRY_WT_SESSION_compact 17
+#define WT_CONFIG_ENTRY_WT_SESSION_create 18
+#define WT_CONFIG_ENTRY_WT_SESSION_drop 19
+#define WT_CONFIG_ENTRY_WT_SESSION_join 20
+#define WT_CONFIG_ENTRY_WT_SESSION_log_flush 21
+#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 22
+#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 23
+#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 24
+#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 25
+#define WT_CONFIG_ENTRY_WT_SESSION_rename 26
+#define WT_CONFIG_ENTRY_WT_SESSION_reset 27
+#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 28
+#define WT_CONFIG_ENTRY_WT_SESSION_salvage 29
+#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 30
+#define WT_CONFIG_ENTRY_WT_SESSION_strerror 31
+#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 32
+#define WT_CONFIG_ENTRY_WT_SESSION_truncate 33
+#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 34
+#define WT_CONFIG_ENTRY_WT_SESSION_verify 35
+#define WT_CONFIG_ENTRY_colgroup_meta 36
+#define WT_CONFIG_ENTRY_file_config 37
+#define WT_CONFIG_ENTRY_file_meta 38
+#define WT_CONFIG_ENTRY_index_meta 39
+#define WT_CONFIG_ENTRY_lsm_meta 40
+#define WT_CONFIG_ENTRY_table_meta 41
+#define WT_CONFIG_ENTRY_wiredtiger_open 42
+#define WT_CONFIG_ENTRY_wiredtiger_open_all 43
+#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 44
+#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 45
/*
* configuration section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/include/connection.h b/src/include/connection.h
index c2b1dd68c18..0e0c357279a 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -145,20 +145,6 @@ struct __wt_named_extractor {
} while (0)
/*
- * Macros to ensure the file handle is inserted or removed from both the
- * main queue and the hashed queue.
- */
-#define WT_CONN_FILE_INSERT(conn, fh, bucket) do { \
- TAILQ_INSERT_HEAD(&(conn)->fhqh, fh, q); \
- TAILQ_INSERT_HEAD(&(conn)->fhhash[bucket], fh, hashq); \
-} while (0)
-
-#define WT_CONN_FILE_REMOVE(conn, fh, bucket) do { \
- TAILQ_REMOVE(&(conn)->fhqh, fh, q); \
- TAILQ_REMOVE(&(conn)->fhhash[bucket], fh, hashq); \
-} while (0)
-
-/*
* WT_CONNECTION_IMPL --
* Implementation of WT_CONNECTION
*/
@@ -333,7 +319,7 @@ struct __wt_connection_impl {
bool stat_tid_set; /* Statistics log thread set */
WT_CONDVAR *stat_cond; /* Statistics log wait mutex */
const char *stat_format; /* Statistics log timestamp format */
- WT_FH *stat_fh; /* Statistics log file handle */
+ WT_FSTREAM *stat_fs; /* Statistics log stream */
char *stat_path; /* Statistics log path format */
char **stat_sources; /* Statistics log list of objects */
const char *stat_stamp; /* Statistics log entry timestamp */
@@ -366,7 +352,6 @@ struct __wt_connection_impl {
uint32_t txn_logsync; /* Log sync configuration */
WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */
- uint64_t meta_uri_hash; /* Metadata file name hash */
WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
wt_thread_t sweep_tid; /* Handle sweep thread */
@@ -414,32 +399,26 @@ struct __wt_connection_impl {
wt_off_t data_extend_len; /* file_extend data length */
wt_off_t log_extend_len; /* file_extend log length */
- /* O_DIRECT/FILE_FLAG_NO_BUFFERING file type flags */
- uint32_t direct_io;
- uint32_t write_through; /* FILE_FLAG_WRITE_THROUGH type flags */
+#define WT_DIRECT_IO_CHECKPOINT 0x01 /* Checkpoints */
+#define WT_DIRECT_IO_DATA 0x02 /* Data files */
+#define WT_DIRECT_IO_LOG 0x04 /* Log files */
+ uint32_t direct_io; /* O_DIRECT, FILE_FLAG_NO_BUFFERING */
+
+ uint32_t write_through; /* FILE_FLAG_WRITE_THROUGH */
+
bool mmap; /* mmap configuration */
int page_size; /* OS page size for mmap alignment */
uint32_t verbose;
- void *inmemory; /* In-memory configuration cookie */
-
#define WT_STDERR(s) (&S2C(s)->wt_stderr)
#define WT_STDOUT(s) (&S2C(s)->wt_stdout)
- WT_FH wt_stderr, wt_stdout;
+ WT_FSTREAM wt_stderr, wt_stdout;
/*
- * OS library/system call jump table, to support in-memory and readonly
- * configurations as well as special devices with other non-POSIX APIs.
+ * File system interface abstracted to support alternative file system
+ * implementations.
*/
- int (*file_directory_list)(WT_SESSION_IMPL *,
- const char *, const char *, uint32_t, char ***, u_int *);
- int (*file_directory_sync)(WT_SESSION_IMPL *, const char *);
- int (*file_exist)(WT_SESSION_IMPL *, const char *, bool *);
- int (*file_remove)(WT_SESSION_IMPL *, const char *);
- int (*file_rename)(WT_SESSION_IMPL *, const char *, const char *);
- int (*file_size)(WT_SESSION_IMPL *, const char *, bool, wt_off_t *);
- int (*handle_open)(WT_SESSION_IMPL *,
- WT_FH *, const char *, uint32_t, uint32_t);
+ WT_FILE_SYSTEM *file_system;
uint32_t flags;
};
diff --git a/src/include/ctype.i b/src/include/ctype.i
new file mode 100644
index 00000000000..b4a1ad9f318
--- /dev/null
+++ b/src/include/ctype.i
@@ -0,0 +1,69 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include <ctype.h>
+
+/*
+ * __wt_isalnum --
+ * Wrap the ctype function without sign extension.
+ */
+static inline bool
+__wt_isalnum(u_char c)
+{
+ return (isalnum(c) != 0);
+}
+
+/*
+ * __wt_isalpha --
+ * Wrap the ctype function without sign extension.
+ */
+static inline bool
+__wt_isalpha(u_char c)
+{
+ return (isalpha(c) != 0);
+}
+
+/*
+ * __wt_isdigit --
+ * Wrap the ctype function without sign extension.
+ */
+static inline bool
+__wt_isdigit(u_char c)
+{
+ return (isdigit(c) != 0);
+}
+
+/*
+ * __wt_isprint --
+ * Wrap the ctype function without sign extension.
+ */
+static inline bool
+__wt_isprint(u_char c)
+{
+ return (isprint(c) != 0);
+}
+
+/*
+ * __wt_isspace --
+ * Wrap the ctype function without sign extension.
+ */
+static inline bool
+__wt_isspace(u_char c)
+{
+ return (isspace(c) != 0);
+}
+
+/*
+ * __wt_tolower --
+ * Wrap the ctype function without sign extension.
+ */
+static inline u_char
+__wt_tolower(u_char c)
+{
+ return ((u_char)tolower(c));
+}
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 1d2ce1bfd82..6357523a03f 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -67,7 +67,7 @@ struct __wt_cursor_backup {
WT_CURSOR iface;
size_t next; /* Cursor position */
- WT_FH *bfh; /* Backup file */
+ WT_FSTREAM *bfs; /* Backup file stream */
uint32_t maxid; /* Maximum log file ID seen */
WT_CURSOR_BACKUP_ENTRY *list; /* List of files to be copied. */
@@ -284,18 +284,50 @@ struct __wt_cursor_index {
uint8_t *cg_needvalue;
};
+/*
+ * A join iterator structure is used to generate candidate primary keys. It
+ * is the responsibility of the caller of the iterator to filter these
+ * primary key against the other conditions of the join before returning
+ * them the caller of WT_CURSOR::next.
+ *
+ * For a conjunction join (the default), entry_count will be 1, meaning that
+ * the iterator only consumes the first entry (WT_CURSOR_JOIN_ENTRY). That
+ * is, it successively returns primary keys from a cursor for the first
+ * index that was joined. When the values returned by that cursor are
+ * exhausted, the iterator has completed. For a disjunction join,
+ * exhausting a cursor just means that the iterator advances to the next
+ * entry. If the next entry represents an index, a new cursor is opened and
+ * primary keys from that index are then successively returned.
+ *
+ * When positioned on an entry that represents a nested join, a new child
+ * iterator is created that will be bound to the nested WT_CURSOR_JOIN.
+ * That iterator is then used to generate candidate primary keys. When its
+ * iteration is completed, that iterator is destroyed and the parent
+ * iterator advances to the next entry. Thus, depending on how deeply joins
+ * are nested, a similarly deep stack of iterators is created.
+ */
struct __wt_cursor_join_iter {
WT_SESSION_IMPL *session;
WT_CURSOR_JOIN *cjoin;
WT_CURSOR_JOIN_ENTRY *entry;
+ WT_CURSOR_JOIN_ITER *child;
WT_CURSOR *cursor; /* has null projection */
- WT_CURSOR *main; /* main table with projection */
WT_ITEM *curkey; /* primary key */
WT_ITEM idxkey;
+ u_int entry_pos; /* the current entry */
+ u_int entry_count; /* entries to walk */
+ u_int end_pos; /* the current endpoint */
+ u_int end_count; /* endpoints to walk */
+ u_int end_skip; /* when testing for inclusion */
+ /* can we skip current end? */
bool positioned;
- bool isequal; /* advancing means we're done */
+ bool is_equal;
};
+/*
+ * A join endpoint represents a positioned cursor that is 'captured' by a
+ * WT_SESSION::join call.
+ */
struct __wt_cursor_join_endpoint {
WT_ITEM key;
uint8_t recno_buf[10]; /* holds packed recno */
@@ -313,9 +345,17 @@ struct __wt_cursor_join_endpoint {
((endp)->flags & \
(WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | WT_CURJOIN_END_LT))
+/*
+ * Each join entry typically represents an index's participation in a join.
+ * For example, if 'k' is an index, then "t.k > 10 && t.k < 20" would be
+ * represented by a single entry, with two endpoints. When the index and
+ * subjoin fields are NULL, the join is on the main table. When subjoin is
+ * non-NULL, there is a nested join clause.
+ */
struct __wt_cursor_join_entry {
WT_INDEX *index;
WT_CURSOR *main; /* raw main table cursor */
+ WT_CURSOR_JOIN *subjoin; /* a nested join clause */
WT_BLOOM *bloom; /* Bloom filter handle */
char *repack_format; /* target format for repack */
uint32_t bloom_bit_count; /* bits per item in bloom */
@@ -339,15 +379,17 @@ struct __wt_cursor_join {
WT_TABLE *table;
const char *projection;
- WT_CURSOR_JOIN_ITER *iter;
+ WT_CURSOR *main; /* main table with projection */
+ WT_CURSOR_JOIN *parent; /* parent of nested group */
+ WT_CURSOR_JOIN_ITER *iter; /* chain of iterators */
WT_CURSOR_JOIN_ENTRY *entries;
size_t entries_allocated;
u_int entries_next;
uint8_t recno_buf[10]; /* holds packed recno */
-#define WT_CURJOIN_ERROR 0x01 /* Error in initialization */
-#define WT_CURJOIN_INITIALIZED 0x02 /* Successful initialization */
-#define WT_CURJOIN_SKIP_FIRST_LEFT 0x04 /* First check not needed */
+#define WT_CURJOIN_DISJUNCTION 0x01 /* Entries are or-ed */
+#define WT_CURJOIN_ERROR 0x02 /* Error in initialization */
+#define WT_CURJOIN_INITIALIZED 0x04 /* Successful initialization */
uint8_t flags;
};
diff --git a/src/include/dhandle.h b/src/include/dhandle.h
index 8b313428d06..9a11594c893 100644
--- a/src/include/dhandle.h
+++ b/src/include/dhandle.h
@@ -82,7 +82,8 @@ struct __wt_data_handle {
#define WT_DHANDLE_DISCARD 0x02 /* Discard on release */
#define WT_DHANDLE_DISCARD_FORCE 0x04 /* Force discard on release */
#define WT_DHANDLE_EXCLUSIVE 0x08 /* Need exclusive access */
-#define WT_DHANDLE_LOCK_ONLY 0x10 /* Handle only used as a lock */
-#define WT_DHANDLE_OPEN 0x20 /* Handle is open */
+#define WT_DHANDLE_IS_METADATA 0x10 /* Metadata handle */
+#define WT_DHANDLE_LOCK_ONLY 0x20 /* Handle only used as a lock */
+#define WT_DHANDLE_OPEN 0x40 /* Handle is open */
uint32_t flags;
};
diff --git a/src/include/extern.h b/src/include/extern.h
index f2b13023386..b0c0f6eccad 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -41,8 +41,8 @@ extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, W
extern int __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el);
extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, const char *extname, bool track_size);
extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el);
-extern int __wt_block_map( WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp, void **mappingcookie);
-extern int __wt_block_unmap( WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen, void **mappingcookie);
+extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep);
+extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie);
extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp);
extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename);
extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize);
@@ -118,9 +118,9 @@ extern int __wt_debug_offset_blind( WT_SESSION_IMPL *session, wt_off_t offset, c
extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, uint32_t cksum, const char *ofile);
extern int __wt_debug_disk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile);
extern int __wt_debug_tree_shape( WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile);
-extern int __wt_debug_tree_all( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile);
-extern int __wt_debug_tree( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile);
-extern int __wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile);
+extern int __wt_debug_tree_all( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile);
+extern int __wt_debug_tree( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile);
+extern int __wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile);
extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp);
extern void __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref);
extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all);
@@ -134,7 +134,7 @@ extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]);
extern int __wt_btree_close(WT_SESSION_IMPL *session);
extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno);
extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size);
-extern int __wt_btree_new_leaf_page( WT_SESSION_IMPL *session, uint64_t recno, WT_PAGE **pagep);
+extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep);
extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on);
extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session);
extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session);
@@ -144,11 +144,10 @@ extern const char *__wt_page_type_string(u_int type);
extern const char *__wt_cell_type_string(uint8_t type);
extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf);
extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf);
-extern const char *__wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf);
extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store);
extern int __wt_ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, void *cookie, WT_CELL_UNPACK *vpack);
extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell);
-extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint64_t recno, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep);
+extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep);
extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, size_t memsize, uint32_t flags, WT_PAGE **pagep);
extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, const uint8_t *addr, size_t addr_size);
extern int
@@ -203,6 +202,8 @@ extern int __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp);
extern int __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags);
extern int __wt_las_sweep(WT_SESSION_IMPL *session);
+extern uint32_t __wt_cksum(const void *chunk, size_t len);
+extern void __wt_cksum_init(void);
extern int __wt_config_initn( WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len);
extern int __wt_config_init(WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str);
extern int __wt_config_subinit( WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item);
@@ -283,19 +284,19 @@ extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **c
extern int __wt_curfile_update_check(WT_CURSOR *cursor);
extern int __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap, WT_CURSOR **cursorp);
extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
-extern int __wt_curindex_joined(WT_CURSOR *cursor);
extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
+extern int __wt_curjoin_joined(WT_CURSOR *cursor);
extern int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count);
extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap);
extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor);
-extern size_t __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode);
+extern size_t __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode);
extern int __wt_json_column_init(WT_CURSOR *cursor, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf);
extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, const char **tokstart, size_t *toklen);
extern const char *__wt_json_tokname(int toktype);
extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item);
extern ssize_t __wt_json_strlen(const char *src, size_t srclen);
-extern int __wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen);
+extern int __wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src, size_t srclen);
extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp);
extern int __wt_schema_create_final( WT_SESSION_IMPL *session, char *cfg_arg[], char **value_ret);
extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
@@ -356,7 +357,6 @@ extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn);
extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp);
extern void __wt_log_written_reset(WT_SESSION_IMPL *session);
extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, bool active_only);
-extern void __wt_log_files_free(WT_SESSION_IMPL *session, char **files, u_int count);
extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id);
extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot);
extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest);
@@ -460,7 +460,6 @@ extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_ses
extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value);
extern int __wt_metadata_get_ckptlist( WT_SESSION *session, const char *name, WT_CKPT **ckptbasep);
extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase);
-extern void __wt_metadata_init(WT_SESSION_IMPL *session);
extern int __wt_metadata_cursor_open( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp);
extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp);
extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp);
@@ -484,6 +483,31 @@ extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session);
extern int __wt_turtle_init(WT_SESSION_IMPL *session);
extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep);
extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value);
+extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path);
+extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path);
+extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name);
+extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to);
+extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to);
+extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
+extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp);
+extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
+extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
+extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
+extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp);
+extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg);
+extern int __wt_errno(void);
+extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen);
+extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error);
+extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name);
+extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp);
+extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp);
+extern int __wt_close_connection_close(WT_SESSION_IMPL *session);
+extern int __wt_os_inmemory(WT_SESSION_IMPL *session);
+extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp);
+extern int __wt_os_stdio(WT_SESSION_IMPL *session);
+extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr);
+extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base);
extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t size, const char *fmt, ...);
extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *sizep, const char *fmt, ...);
extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer, size_t size, const char *fmt, ...);
@@ -569,6 +593,7 @@ extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR
extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config);
extern int __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]);
extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop);
+extern const char *__wt_session_strerror(WT_SESSION *wt_session, int error);
extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp);
extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp);
extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config);
@@ -580,8 +605,6 @@ extern void __wt_session_close_cache(WT_SESSION_IMPL *session);
extern int __wt_session_get_btree(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags);
extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint);
extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]);
-extern uint32_t __wt_cksum(const void *chunk, size_t len);
-extern void __wt_cksum_init(void);
extern int __wt_cond_auto_alloc( WT_SESSION_IMPL *session, const char *name, bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp);
extern int __wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
extern int __wt_cond_auto_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled);
@@ -599,7 +622,14 @@ extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_
extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
extern const char *__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error);
extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v);
-extern void __wt_assert(WT_SESSION_IMPL *session, int error, const char *file_name, int line_number, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 5, 6)));
+extern void
+__wt_assert(WT_SESSION_IMPL *session,
+ int error, const char *file_name, int line_number, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 5, 6)))
+#ifdef HAVE_DIAGNOSTIC
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn))
+#endif
+;
extern int __wt_panic(WT_SESSION_IMPL *session);
extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name);
extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri);
@@ -648,6 +678,8 @@ extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state);
extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size);
extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
+extern const char *__wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf);
+extern const char *__wt_buf_set_size( WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf);
extern int
__wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp
#ifdef HAVE_DIAGNOSTIC
@@ -712,70 +744,3 @@ extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM
extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops);
extern int __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_recover(WT_SESSION_IMPL *session);
-extern bool __wt_absolute_path(const char *path);
-extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp);
-extern bool __wt_has_priv(void);
-extern const char *__wt_path_separator(void);
-extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen);
-extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
-extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp);
-extern int __wt_close_connection_close(WT_SESSION_IMPL *session);
-extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp);
-extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
-extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
-extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled);
-extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to);
-extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh);
-extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp);
-extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret);
-extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
-extern int __wt_errno(void);
-extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path);
-extern int __wt_get_vm_pagesize(void);
-extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp);
-extern int __wt_getlasterror(void);
-extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh);
-extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr);
-extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp);
-extern int __wt_map_error_rdonly(int error);
-extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path);
-extern int __wt_once(void (*init_routine)(void));
-extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp);
-extern int __wt_os_cleanup(WT_SESSION_IMPL *session);
-extern int __wt_os_init(WT_SESSION_IMPL *session);
-extern int __wt_os_inmemory(WT_SESSION_IMPL *session);
-extern int __wt_os_inmemory_cleanup(WT_SESSION_IMPL *session);
-extern int __wt_os_posix(WT_SESSION_IMPL *session);
-extern int __wt_os_posix_cleanup(WT_SESSION_IMPL *session);
-extern int __wt_os_stdio(WT_SESSION_IMPL *session);
-extern int __wt_os_win(WT_SESSION_IMPL *session);
-extern int __wt_os_win_cleanup(WT_SESSION_IMPL *session);
-extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp);
-extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len);
-extern int __wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie);
-extern int __wt_posix_map_discard( WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size);
-extern int __wt_posix_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size);
-extern int __wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie);
-extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
-extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
-extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
-extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name);
-extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to);
-extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp);
-extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to);
-extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg);
-extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid);
-extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp);
-extern int __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie);
-extern int __wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size);
-extern int __wt_win_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size);
-extern int __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie);
-extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base);
-extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
-extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg);
-extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh);
-extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
-extern void __wt_stream_set_line_buffer(FILE *fp);
-extern void __wt_stream_set_no_buffer(FILE *fp);
-extern void __wt_thread_id(char *buf, size_t buflen);
-extern void __wt_yield(void);
diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h
new file mode 100644
index 00000000000..6fde537f36b
--- /dev/null
+++ b/src/include/extern_posix.h
@@ -0,0 +1,31 @@
+/* DO NOT EDIT: automatically built by dist/s_prototypes. */
+
+extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp);
+extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count);
+extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp);
+extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret);
+extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh);
+extern int __wt_posix_file_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, wt_off_t len);
+extern int __wt_os_posix(WT_SESSION_IMPL *session);
+extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp);
+extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep);
+extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie);
+extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie);
+extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie);
+extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp);
+extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled);
+extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
+extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
+extern int __wt_once(void (*init_routine)(void));
+extern int __wt_get_vm_pagesize(void);
+extern bool __wt_absolute_path(const char *path);
+extern const char *__wt_path_separator(void);
+extern bool __wt_has_priv(void);
+extern void __wt_stream_set_line_buffer(FILE *fp);
+extern void __wt_stream_set_no_buffer(FILE *fp);
+extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
+extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg);
+extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid);
+extern void __wt_thread_id(char *buf, size_t buflen);
+extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
+extern void __wt_yield(void);
diff --git a/src/include/extern_win.h b/src/include/extern_win.h
new file mode 100644
index 00000000000..c5c2624db2c
--- /dev/null
+++ b/src/include/extern_win.h
@@ -0,0 +1,32 @@
+/* DO NOT EDIT: automatically built by dist/s_prototypes. */
+
+extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp);
+extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count);
+extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp);
+extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret);
+extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh);
+extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep);
+extern int __wt_os_win(WT_SESSION_IMPL *session);
+extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp);
+extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep);
+extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie);
+extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp);
+extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled);
+extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
+extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
+extern int __wt_once(void (*init_routine)(void));
+extern int __wt_get_vm_pagesize(void);
+extern bool __wt_absolute_path(const char *path);
+extern const char *__wt_path_separator(void);
+extern bool __wt_has_priv(void);
+extern void __wt_stream_set_line_buffer(FILE *fp);
+extern void __wt_stream_set_no_buffer(FILE *fp);
+extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
+extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg);
+extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid);
+extern void __wt_thread_id(char *buf, size_t buflen);
+extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
+extern DWORD __wt_getlasterror(void);
+extern int __wt_map_windows_error(DWORD windows_error);
+extern const char *__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error);
+extern void __wt_yield(void);
diff --git a/src/include/flags.h b/src/include/flags.h
index 7682af5a4b8..f134af69d29 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -13,22 +13,18 @@
#define WT_CONN_LSM_MERGE 0x00000100
#define WT_CONN_PANIC 0x00000200
#define WT_CONN_READONLY 0x00000400
-#define WT_CONN_SERVER_ASYNC 0x00000800
-#define WT_CONN_SERVER_CHECKPOINT 0x00001000
-#define WT_CONN_SERVER_LSM 0x00002000
-#define WT_CONN_SERVER_RUN 0x00004000
-#define WT_CONN_SERVER_STATISTICS 0x00008000
-#define WT_CONN_SERVER_SWEEP 0x00010000
-#define WT_CONN_WAS_BACKUP 0x00020000
+#define WT_CONN_RECOVERING 0x00000800
+#define WT_CONN_SERVER_ASYNC 0x00001000
+#define WT_CONN_SERVER_CHECKPOINT 0x00002000
+#define WT_CONN_SERVER_LSM 0x00004000
+#define WT_CONN_SERVER_RUN 0x00008000
+#define WT_CONN_SERVER_STATISTICS 0x00010000
+#define WT_CONN_SERVER_SWEEP 0x00020000
+#define WT_CONN_WAS_BACKUP 0x00040000
#define WT_EVICTING 0x00000001
#define WT_EVICT_IN_MEMORY 0x00000002
#define WT_EVICT_LOOKASIDE 0x00000004
#define WT_EVICT_UPDATE_RESTORE 0x00000008
-#define WT_FILE_TYPE_CHECKPOINT 0x00000001
-#define WT_FILE_TYPE_DATA 0x00000002
-#define WT_FILE_TYPE_DIRECTORY 0x00000004
-#define WT_FILE_TYPE_LOG 0x00000008
-#define WT_FILE_TYPE_REGULAR 0x00000010
#define WT_LOGSCAN_FIRST 0x00000001
#define WT_LOGSCAN_FROM_CKP 0x00000002
#define WT_LOGSCAN_ONE 0x00000004
@@ -52,11 +48,11 @@
#define WT_READ_TRUNCATE 0x00000800
#define WT_READ_WONT_NEED 0x00001000
#define WT_SESSION_CAN_WAIT 0x00000001
-#define WT_SESSION_CLEAR_EVICT_WALK 0x00000002
-#define WT_SESSION_INTERNAL 0x00000004
-#define WT_SESSION_LOCKED_CHECKPOINT 0x00000008
-#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000010
-#define WT_SESSION_LOCKED_METADATA 0x00000020
+#define WT_SESSION_INTERNAL 0x00000002
+#define WT_SESSION_LOCKED_CHECKPOINT 0x00000004
+#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000008
+#define WT_SESSION_LOCKED_METADATA 0x00000010
+#define WT_SESSION_LOCKED_PASS 0x00000020
#define WT_SESSION_LOCKED_SCHEMA 0x00000040
#define WT_SESSION_LOCKED_SLOT 0x00000080
#define WT_SESSION_LOCKED_TABLE 0x00000100
diff --git a/src/include/log.h b/src/include/log.h
index f84b147cb70..870c046252c 100644
--- a/src/include/log.h
+++ b/src/include/log.h
@@ -46,10 +46,12 @@ union __wt_lsn {
*/
#define WT_IS_INIT_LSN(l) ((l)->file_offset == ((uint64_t)1 << 32))
/*
- * XXX Original tested INT32_MAX.
+ * Original tested INT32_MAX. But if we read one from an older
+ * release we may see UINT32_MAX.
*/
#define WT_IS_MAX_LSN(lsn) \
- ((lsn)->l.file == UINT32_MAX && (lsn)->l.offset == INT32_MAX)
+ ((lsn)->l.file == UINT32_MAX && \
+ ((lsn)->l.offset == INT32_MAX || (lsn)->l.offset == UINT32_MAX))
/*
* Both of the macros below need to change if the content of __wt_lsn
@@ -254,7 +256,6 @@ struct __wt_log {
#ifdef HAVE_DIAGNOSTIC
uint64_t write_calls; /* Calls to log_write */
#endif
-
#define WT_LOG_OPENED 0x01 /* Log subsystem successfully open */
uint32_t flags;
};
diff --git a/src/include/meta.h b/src/include/meta.h
index ac0f5fedac4..63c79dbc72e 100644
--- a/src/include/meta.h
+++ b/src/include/meta.h
@@ -14,8 +14,10 @@
#define WT_USERCONFIG "WiredTiger.config" /* User configuration */
+#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */
#define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */
#define WT_INCREMENTAL_BACKUP "WiredTiger.ibackup" /* Incremental backup */
+#define WT_INCREMENTAL_SRC "WiredTiger.isrc" /* Incremental source */
#define WT_METADATA_TURTLE "WiredTiger.turtle" /* Metadata metadata */
#define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */
@@ -32,8 +34,7 @@
* when diagnostic is enabled.
*/
#define WT_IS_METADATA(session, dh) \
- ((dh)->name_hash == S2C(session)->meta_uri_hash && \
- strcmp((dh)->name, WT_METAFILE_URI) == 0)
+ F_ISSET((dh), WT_DHANDLE_IS_METADATA)
#define WT_METAFILE_ID 0 /* Metadata file ID */
#define WT_METADATA_VERSION "WiredTiger version" /* Version keys */
diff --git a/src/include/misc.h b/src/include/misc.h
index 07d52c61eac..1121b7dfa75 100644
--- a/src/include/misc.h
+++ b/src/include/misc.h
@@ -12,6 +12,8 @@
*/
#define WT_UNUSED(var) (void)(var)
+#define WT_DIVIDER "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
/* Basic constants. */
#define WT_THOUSAND (1000)
#define WT_MILLION (1000000)
@@ -31,12 +33,12 @@
*/
#define WT_STORE_SIZE(s) ((uint32_t)(s))
#define WT_PTRDIFF(end, begin) \
- ((size_t)((uint8_t *)(end) - (uint8_t *)(begin)))
+ ((size_t)((const uint8_t *)(end) - (const uint8_t *)(begin)))
#define WT_PTRDIFF32(end, begin) \
WT_STORE_SIZE(WT_PTRDIFF((end), (begin)))
#define WT_BLOCK_FITS(p, len, begin, maxlen) \
- ((uint8_t *)(p) >= (uint8_t *)(begin) && \
- ((uint8_t *)(p) + (len) <= (uint8_t *)(begin) + (maxlen)))
+ ((const uint8_t *)(p) >= (const uint8_t *)(begin) && \
+ ((const uint8_t *)(p) + (len) <= (const uint8_t *)(begin) + (maxlen)))
#define WT_PTR_IN_RANGE(p, begin, maxlen) \
WT_BLOCK_FITS((p), 1, (begin), (maxlen))
@@ -96,8 +98,9 @@
* the caller remember to put the & operator on the pointer.
*/
#define __wt_free(session, p) do { \
- if ((p) != NULL) \
- __wt_free_int(session, (void *)&(p)); \
+ void *__p = &(p); \
+ if (*(void **)__p != NULL) \
+ __wt_free_int(session, __p); \
} while (0)
#ifdef HAVE_DIAGNOSTIC
#define __wt_overwrite_and_free(session, p) do { \
diff --git a/src/include/misc.i b/src/include/misc.i
index 114b711ac88..eaa7a328ff1 100644
--- a/src/include/misc.i
+++ b/src/include/misc.i
@@ -70,248 +70,3 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...)
return (0);
#endif
}
-
-/*
- * __wt_dirlist --
- * Get a list of files from a directory.
- */
-static inline int
-__wt_dirlist(WT_SESSION_IMPL *session, const char *dir,
- const char *prefix, uint32_t flags, char ***dirlist, u_int *countp)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS,
- "%s: directory-list: %s prefix %s",
- dir, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude",
- prefix == NULL ? "all" : prefix));
-
- return (S2C(session)->file_directory_list(
- session, dir, prefix, flags, dirlist, countp));
-}
-
-/*
- * __wt_directory_sync --
- * Flush a directory to ensure file creation is durable.
- */
-static inline int
-__wt_directory_sync(WT_SESSION_IMPL *session, const char *name)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "%s: directory-sync", name));
-
- return (S2C(session)->file_directory_sync(session, name));
-}
-
-/*
- * __wt_exist --
- * Return if the file exists.
- */
-static inline int
-__wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
-{
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name));
-
- return (S2C(session)->file_exist(session, name, existp));
-}
-
-/*
- * __wt_remove --
- * POSIX remove.
- */
-static inline int
-__wt_remove(WT_SESSION_IMPL *session, const char *name)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name));
-
- return (S2C(session)->file_remove(session, name));
-}
-
-/*
- * __wt_rename --
- * POSIX rename.
- */
-static inline int
-__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to));
-
- return (S2C(session)->file_rename(session, from, to));
-}
-
-/*
- * __wt_filesize_name --
- * Get the size of a file in bytes, by file name.
- */
-static inline int
-__wt_filesize_name(
- WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep)
-{
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name));
-
- return (S2C(session)->file_size(session, name, silent, sizep));
-}
-
-/*
- * __wt_directory_sync_fh --
- * Flush a directory file handle to ensure file creation is durable.
- *
- * We don't use the normal sync path because many file systems don't require
- * this step and we don't want to penalize them.
- */
-static inline int
-__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
- return (fh->fh_sync(session, fh, true));
-}
-
-/*
- * __wt_fallocate --
- * Extend a file.
- */
-static inline int
-__wt_fallocate(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
-
- WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-allocate: %" PRIuMAX " at %" PRIuMAX,
- fh->name, (uintmax_t)len, (uintmax_t)offset));
-
- return (fh->fh_allocate(session, fh, offset, len));
-}
-
-/*
- * __wt_file_lock --
- * Lock/unlock a file.
- */
-static inline int
-__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock)
-{
- WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-lock: %s", fh->name, lock ? "lock" : "unlock"));
-
- return (fh->fh_lock(session, fh, lock));
-}
-
-/*
- * __wt_vfprintf --
- * ANSI C vfprintf.
- */
-static inline int
-__wt_vfprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
-{
- WT_RET(__wt_verbose(
- session, WT_VERB_HANDLEOPS, "%s: handle-printf", fh->name));
-
- return (fh->fh_printf(session, fh, fmt, ap));
-}
-
-/*
- * __wt_fprintf --
- * ANSI C fprintf.
- */
-static inline int
-__wt_fprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
-{
- WT_DECL_RET;
- va_list ap;
-
- va_start(ap, fmt);
- ret = __wt_vfprintf(session, fh, fmt, ap);
- va_end(ap);
-
- return (ret);
-}
-
-/*
- * __wt_read --
- * POSIX pread.
- */
-static inline int
-__wt_read(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
-{
- WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX,
- fh->name, len, (uintmax_t)offset));
-
- WT_STAT_FAST_CONN_INCR(session, read_io);
-
- return (fh->fh_read(session, fh, offset, len, buf));
-}
-
-/*
- * __wt_filesize --
- * Get the size of a file in bytes, by file handle.
- */
-static inline int
-__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
-{
- WT_RET(__wt_verbose(
- session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->name));
-
- return (fh->fh_size(session, fh, sizep));
-}
-
-/*
- * __wt_fsync --
- * POSIX fflush/fsync.
- */
-static inline int
-__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
-{
- WT_RET(__wt_verbose(
- session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->name));
-
- return (fh->fh_sync(session, fh, block));
-}
-
-/*
- * __wt_ftruncate --
- * POSIX ftruncate.
- */
-static inline int
-__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
- WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-truncate: %" PRIuMAX,
- fh->name, (uintmax_t)len));
-
- return (fh->fh_truncate(session, fh, len));
-}
-
-/*
- * __wt_write --
- * POSIX pwrite.
- */
-static inline int
-__wt_write(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
- WT_STRING_MATCH(fh->name,
- WT_SINGLETHREAD, strlen(WT_SINGLETHREAD)));
-
- WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX,
- fh->name, len, (uintmax_t)offset));
-
- WT_STAT_FAST_CONN_INCR(session, write_io);
-
- return (fh->fh_write(session, fh, offset, len, buf));
-}
diff --git a/src/include/os.h b/src/include/os.h
index 44cceee6c40..7a8e47ed81f 100644
--- a/src/include/os.h
+++ b/src/include/os.h
@@ -6,36 +6,32 @@
* See the file LICENSE for redistribution information.
*/
-/*
- * Number of directory entries can grow dynamically.
- */
-#define WT_DIR_ENTRY 32
-
-#define WT_DIRLIST_EXCLUDE 0x1 /* Exclude files matching prefix */
-#define WT_DIRLIST_INCLUDE 0x2 /* Include files matching prefix */
+#define WT_SYSCALL(call, ret) do { \
+ /* \
+ * A call returning 0 indicates success; any call where \
+ * 0 is not the only successful return must provide an \
+ * expression evaluating to 0 in all successful cases. \
+ */ \
+ if (((ret) = (call)) == 0) \
+ break; \
+ /* \
+ * The call's error was either returned by the call or \
+ * is in errno, and there are cases where it depends on \
+ * the software release as to which it is (for example, \
+ * posix_fadvise on FreeBSD and OS X). Failing calls \
+ * must either return a non-zero error value, or -1 if \
+ * the error value is in errno. (The WiredTiger errno \
+ * function returns WT_ERROR if errno is 0, which isn't \
+ * ideal but won't discard the failure.) \
+ */ \
+ if ((ret) == -1) \
+ (ret) = __wt_errno(); \
+} while (0)
#define WT_SYSCALL_RETRY(call, ret) do { \
int __retry; \
for (__retry = 0; __retry < 10; ++__retry) { \
- /* \
- * A call returning 0 indicates success; any call where \
- * 0 is not the only successful return must provide an \
- * expression evaluating to 0 in all successful cases. \
- */ \
- if (((ret) = (call)) == 0) \
- break; \
- /* \
- * The call's error was either returned by the call or \
- * is in errno, and there are cases where it depends on \
- * the software release as to which it is (for example, \
- * posix_fadvise on FreeBSD and OS X). Failing calls \
- * must either return a non-zero error value, or -1 if \
- * the error value is in errno. (The WiredTiger errno \
- * function returns WT_ERROR if errno is 0, which isn't \
- * ideal but won't discard the failure.) \
- */ \
- if ((ret) == -1) \
- (ret) = __wt_errno(); \
+ WT_SYSCALL(call, ret); \
switch (ret) { \
case EAGAIN: \
case EBUSY: \
@@ -70,81 +66,97 @@
(t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1)
/*
- * The underlying OS calls return ENOTSUP if posix_fadvise functionality isn't
- * available, but WiredTiger uses the POSIX flag names in the API. Use distinct
- * values so the underlying code can distinguish.
+ * Macros to ensure a file handle is inserted or removed from both the main and
+ * the hashed queue, used by connection-level and in-memory data structures.
*/
-#ifndef POSIX_FADV_DONTNEED
-#define POSIX_FADV_DONTNEED 0x01
-#endif
-#ifndef POSIX_FADV_WILLNEED
-#define POSIX_FADV_WILLNEED 0x02
-#endif
+#define WT_FILE_HANDLE_INSERT(h, fh, bucket) do { \
+ TAILQ_INSERT_HEAD(&(h)->fhqh, fh, q); \
+ TAILQ_INSERT_HEAD(&(h)->fhhash[bucket], fh, hashq); \
+} while (0)
-#define WT_OPEN_CREATE 0x001 /* Create is OK */
-#define WT_OPEN_EXCLUSIVE 0x002 /* Exclusive open */
-#define WT_OPEN_FIXED 0x004 /* Path isn't relative to home */
-#define WT_OPEN_READONLY 0x008 /* Readonly open */
-#define WT_STREAM_APPEND 0x010 /* Open a stream: append */
-#define WT_STREAM_LINE_BUFFER 0x020 /* Line buffer the stream */
-#define WT_STREAM_READ 0x040 /* Open a stream: read */
-#define WT_STREAM_WRITE 0x080 /* Open a stream: write */
+#define WT_FILE_HANDLE_REMOVE(h, fh, bucket) do { \
+ TAILQ_REMOVE(&(h)->fhqh, fh, q); \
+ TAILQ_REMOVE(&(h)->fhhash[bucket], fh, hashq); \
+} while (0)
struct __wt_fh {
+ /*
+ * There is a file name field in both the WT_FH and WT_FILE_HANDLE
+ * structures, which isn't ideal. There would be compromises to keeping
+ * a single copy: If it were in WT_FH, file systems could not access
+ * the name field, if it were just in the WT_FILE_HANDLE internal
+ * WiredTiger code would need to maintain a string inside a structure
+ * that is owned by the user (since we care about the content of the
+ * file name). Keeping two copies seems most reasonable.
+ */
const char *name; /* File name */
- uint64_t name_hash; /* Hash of name */
- TAILQ_ENTRY(__wt_fh) q; /* List of open handles */
- TAILQ_ENTRY(__wt_fh) hashq; /* Hashed list of handles */
- u_int ref; /* Reference count */
+ uint64_t name_hash; /* hash of name */
+ TAILQ_ENTRY(__wt_fh) q; /* internal queue */
+ TAILQ_ENTRY(__wt_fh) hashq; /* internal hash queue */
+ u_int ref; /* reference count */
+
+ WT_FILE_HANDLE *handle;
+};
+
+#ifdef _WIN32
+struct __wt_file_handle_win {
+ WT_FILE_HANDLE iface;
/*
- * Underlying file system handle support.
+ * Windows specific file handle fields
*/
-#ifdef _WIN32
HANDLE filehandle; /* Windows file handle */
HANDLE filehandle_secondary; /* Windows file handle
for file size changes */
+ bool direct_io; /* O_DIRECT configured */
+};
+
#else
+
+struct __wt_file_handle_posix {
+ WT_FILE_HANDLE iface;
+
+ /*
+ * POSIX specific file handle fields
+ */
int fd; /* POSIX file handle */
+
+ bool direct_io; /* O_DIRECT configured */
+};
#endif
- FILE *fp; /* ANSI C stdio handle */
+
+struct __wt_file_handle_inmem {
+ WT_FILE_HANDLE iface;
/*
- * Underlying in-memory handle support.
+ * In memory specific file handle fields
*/
- size_t off; /* Read/write offset */
+ uint64_t name_hash; /* hash of name */
+ TAILQ_ENTRY(__wt_file_handle_inmem) q; /* internal queue, hash queue */
+ TAILQ_ENTRY(__wt_file_handle_inmem) hashq;
+
WT_ITEM buf; /* Data */
+ u_int ref; /* Reference count */
+};
- bool direct_io; /* O_DIRECT configured */
+struct __wt_fstream {
+ const char *name; /* Stream name */
- enum { /* file extend configuration */
- WT_FALLOCATE_AVAILABLE,
- WT_FALLOCATE_NOT_AVAILABLE,
- WT_FALLOCATE_POSIX,
- WT_FALLOCATE_STD,
- WT_FALLOCATE_SYS } fallocate_available;
- bool fallocate_requires_locking;
+ FILE *fp; /* stdio FILE stream */
+ WT_FH *fh; /* WT file handle */
+ wt_off_t off; /* Read/write offset */
+ wt_off_t size; /* File size */
+ WT_ITEM buf; /* Data */
-#define WT_FH_FLUSH_ON_CLOSE 0x01 /* Flush when closing */
-#define WT_FH_IN_MEMORY 0x02 /* In-memory, don't remove */
+#define WT_STREAM_APPEND 0x01 /* Open a stream for append */
+#define WT_STREAM_READ 0x02 /* Open a stream for read */
+#define WT_STREAM_WRITE 0x04 /* Open a stream for write */
uint32_t flags;
- int (*fh_advise)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t, int);
- int (*fh_allocate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t);
- int (*fh_close)(WT_SESSION_IMPL *, WT_FH *);
- int (*fh_getc)(WT_SESSION_IMPL *, WT_FH *, int *);
- int (*fh_lock)(WT_SESSION_IMPL *, WT_FH *, bool);
- int (*fh_map)(WT_SESSION_IMPL *, WT_FH *, void *, size_t *, void **);
- int (*fh_map_discard)(WT_SESSION_IMPL *, WT_FH *, void *, size_t);
- int (*fh_map_preload)(WT_SESSION_IMPL *, WT_FH *, const void *, size_t);
- int (*fh_map_unmap)(
- WT_SESSION_IMPL *, WT_FH *, void *, size_t, void **);
- int (*fh_printf)(WT_SESSION_IMPL *, WT_FH *, const char *, va_list);
- int (*fh_read)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, void *);
- int (*fh_size)(WT_SESSION_IMPL *, WT_FH *, wt_off_t *);
- int (*fh_sync)(WT_SESSION_IMPL *, WT_FH *, bool);
- int (*fh_truncate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t);
- int (*fh_write)(
- WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, const void *);
+ int (*close)(WT_SESSION_IMPL *, WT_FSTREAM *);
+ int (*fstr_flush)(WT_SESSION_IMPL *, WT_FSTREAM *);
+ int (*fstr_getline)(WT_SESSION_IMPL *, WT_FSTREAM *, WT_ITEM *);
+ int (*fstr_printf)(
+ WT_SESSION_IMPL *, WT_FSTREAM *, const char *, va_list);
};
diff --git a/src/include/os_fhandle.i b/src/include/os_fhandle.i
new file mode 100644
index 00000000000..313bf8eca3f
--- /dev/null
+++ b/src/include/os_fhandle.i
@@ -0,0 +1,176 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+/*
+ * __wt_fsync --
+ * POSIX fsync.
+ */
+static inline int
+__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+{
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->handle->name));
+
+ handle = fh->handle;
+ /*
+ * There is no way to check when the non-blocking sync-file-range is
+ * complete, but we track the time taken in the call for completeness.
+ */
+ WT_STAT_FAST_CONN_INCR_ATOMIC(session, fsync_active);
+ WT_STAT_FAST_CONN_INCR(session, fsync_io);
+ if (block)
+ ret = (handle->fh_sync == NULL ? 0 :
+ handle->fh_sync(handle, (WT_SESSION *)session));
+ else
+ ret = (handle->fh_sync_nowait == NULL ? 0 :
+ handle->fh_sync_nowait(handle, (WT_SESSION *)session));
+ WT_STAT_FAST_CONN_DECR_ATOMIC(session, fsync_active);
+ return (ret);
+}
+
+/*
+ * __wt_fallocate --
+ * Extend a file.
+ */
+static inline int
+__wt_fallocate(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
+{
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-allocate: %" PRIuMAX " at %" PRIuMAX,
+ fh->handle->name, (uintmax_t)len, (uintmax_t)offset));
+
+ /*
+ * Our caller is responsible for handling any locking issues, all we
+ * have to do is find a function to call.
+ *
+ * Be cautious, the underlying system might have configured the nolock
+ * flavor, that failed, and we have to fallback to the locking flavor.
+ */
+ handle = fh->handle;
+ if (handle->fh_allocate_nolock != NULL) {
+ if ((ret = handle->fh_allocate_nolock(
+ handle, (WT_SESSION *)session, offset, len)) == 0)
+ return (0);
+ WT_RET_ERROR_OK(ret, ENOTSUP);
+ }
+ if (handle->fh_allocate != NULL)
+ return (handle->fh_allocate(
+ handle, (WT_SESSION *)session, offset, len));
+ return (ENOTSUP);
+}
+
+/*
+ * __wt_file_lock --
+ * Lock/unlock a file.
+ */
+static inline int
+__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock)
+{
+ WT_FILE_HANDLE *handle;
+
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-lock: %s", fh->handle->name, lock ? "lock" : "unlock"));
+
+ handle = fh->handle;
+ return (handle->fh_lock == NULL ? 0 :
+ handle->fh_lock(handle, (WT_SESSION*)session, lock));
+}
+
+/*
+ * __wt_read --
+ * POSIX pread.
+ */
+static inline int
+__wt_read(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+{
+ WT_DECL_RET;
+
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX,
+ fh->handle->name, len, (uintmax_t)offset));
+
+ WT_STAT_FAST_CONN_INCR_ATOMIC(session, read_active);
+ WT_STAT_FAST_CONN_INCR(session, read_io);
+
+ ret = fh->handle->fh_read(
+ fh->handle, (WT_SESSION *)session, offset, len, buf);
+
+ WT_STAT_FAST_CONN_DECR_ATOMIC(session, read_active);
+ return (ret);
+}
+
+/*
+ * __wt_filesize --
+ * Get the size of a file in bytes, by file handle.
+ */
+static inline int
+__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+{
+ WT_RET(__wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->handle->name));
+
+ return (fh->handle->fh_size(fh->handle, (WT_SESSION *)session, sizep));
+}
+
+/*
+ * __wt_ftruncate --
+ * POSIX ftruncate.
+ */
+static inline int
+__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-truncate: %" PRIuMAX,
+ fh->handle->name, (uintmax_t)len));
+
+ return (fh->handle->fh_truncate(
+ fh->handle, (WT_SESSION *)session, len));
+}
+
+/*
+ * __wt_write --
+ * POSIX pwrite.
+ */
+static inline int
+__wt_write(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+{
+ WT_DECL_RET;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
+ WT_STRING_MATCH(fh->name,
+ WT_SINGLETHREAD, strlen(WT_SINGLETHREAD)));
+
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX,
+ fh->handle->name, len, (uintmax_t)offset));
+
+ WT_STAT_FAST_CONN_INCR_ATOMIC(session, write_active);
+ WT_STAT_FAST_CONN_INCR(session, write_io);
+
+ ret = fh->handle->fh_write(
+ fh->handle, (WT_SESSION *)session, offset, len, buf);
+
+ WT_STAT_FAST_CONN_DECR_ATOMIC(session, write_active);
+ return (ret);
+}
diff --git a/src/include/os_fs.i b/src/include/os_fs.i
new file mode 100644
index 00000000000..88ee71d953a
--- /dev/null
+++ b/src/include/os_fs.i
@@ -0,0 +1,244 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+/*
+ * __wt_fs_directory_list --
+ * Get a list of files from a directory.
+ */
+static inline int
+__wt_fs_directory_list(WT_SESSION_IMPL *session,
+ const char *dir, const char *prefix, char ***dirlistp, u_int *countp)
+{
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *path;
+
+ *dirlistp = NULL;
+ *countp = 0;
+
+ WT_RET(__wt_verbose(session, WT_VERB_FILEOPS,
+ "%s: directory-list: %s prefix %s",
+ dir, prefix == NULL ? "all" : prefix));
+
+ WT_RET(__wt_filename(session, dir, &path));
+
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_directory_list(
+ file_system, wt_session, path, prefix, dirlistp, countp);
+
+ __wt_free(session, path);
+ return (ret);
+}
+
+/*
+ * __wt_fs_directory_list_free --
+ * Free memory allocated by __wt_fs_directory_list.
+ */
+static inline int
+__wt_fs_directory_list_free(
+ WT_SESSION_IMPL *session, char ***dirlistp, u_int count)
+{
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+
+ if (*dirlistp != NULL) {
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_directory_list_free(
+ file_system, wt_session, *dirlistp, count);
+ }
+
+ *dirlistp = NULL;
+ return (ret);
+}
+
+/*
+ * __wt_fs_directory_sync --
+ * Flush a directory to ensure file creation is durable.
+ */
+static inline int
+__wt_fs_directory_sync(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *copy, *dir;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(
+ session, WT_VERB_FILEOPS, "%s: directory-sync", name));
+
+ /*
+ * POSIX 1003.1 does not require that fsync of a file handle ensures the
+ * entry in the directory containing the file has also reached disk (and
+ * there are historic Linux filesystems requiring it). If the underlying
+ * filesystem method is set, do an explicit fsync on a file descriptor
+ * for the directory to be sure.
+ *
+ * directory-sync is not a required call, no method means the call isn't
+ * needed.
+ */
+ file_system = S2C(session)->file_system;
+ if (file_system->fs_directory_sync == NULL)
+ return (0);
+
+ copy = NULL;
+ if (name == NULL || strchr(name, '/') == NULL)
+ name = S2C(session)->home;
+ else {
+ /*
+ * File name construction should not return a path without any
+ * slash separator, but caution isn't unreasonable.
+ */
+ WT_RET(__wt_filename(session, name, &copy));
+ if ((dir = strrchr(copy, '/')) == NULL)
+ name = S2C(session)->home;
+ else {
+ dir[1] = '\0';
+ name = copy;
+ }
+ }
+
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_directory_sync(file_system, wt_session, name);
+
+ __wt_free(session, copy);
+ return (ret);
+}
+
+/*
+ * __wt_fs_exist --
+ * Return if the file exists.
+ */
+static inline int
+__wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
+{
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *path;
+
+ WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name));
+
+ WT_RET(__wt_filename(session, name, &path));
+
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_exist(file_system, wt_session, path, existp);
+
+ __wt_free(session, path);
+ return (ret);
+}
+
+/*
+ * __wt_fs_remove --
+ * POSIX remove.
+ */
+static inline int
+__wt_fs_remove(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *path;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name));
+
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * It is a layering violation to retrieve a WT_FH here, but it is a
+ * useful diagnostic to ensure WiredTiger doesn't have the handle open.
+ */
+ if (__wt_handle_is_open(session, name))
+ WT_RET_MSG(session, EINVAL,
+ "%s: file-remove: file has open handles", name);
+#endif
+
+ WT_RET(__wt_filename(session, name, &path));
+
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_remove(file_system, wt_session, path);
+
+ __wt_free(session, path);
+ return (ret);
+}
+
+/*
+ * __wt_fs_rename --
+ * POSIX rename.
+ */
+static inline int
+__wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+{
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *from_path, *to_path;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(
+ session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to));
+
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * It is a layering violation to retrieve a WT_FH here, but it is a
+ * useful diagnostic to ensure WiredTiger doesn't have the handle open.
+ */
+ if (__wt_handle_is_open(session, from))
+ WT_RET_MSG(session, EINVAL,
+ "%s: file-rename: file has open handles", from);
+ if (__wt_handle_is_open(session, to))
+ WT_RET_MSG(session, EINVAL,
+ "%s: file-rename: file has open handles", to);
+#endif
+
+ from_path = to_path = NULL;
+ WT_ERR(__wt_filename(session, from, &from_path));
+ WT_ERR(__wt_filename(session, to, &to_path));
+
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_rename(
+ file_system, wt_session, from_path, to_path);
+
+err: __wt_free(session, from_path);
+ __wt_free(session, to_path);
+ return (ret);
+}
+
+/*
+ * __wt_fs_size --
+ * Get the size of a file in bytes, by file name.
+ */
+static inline int
+__wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
+{
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *path;
+
+ WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name));
+
+ WT_RET(__wt_filename(session, name, &path));
+
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_size(file_system, wt_session, path, sizep);
+
+ __wt_free(session, path);
+ return (ret);
+}
diff --git a/src/include/os_fstream.i b/src/include/os_fstream.i
new file mode 100644
index 00000000000..8c0fdadbdb0
--- /dev/null
+++ b/src/include/os_fstream.i
@@ -0,0 +1,97 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+/*
+ * __wt_getline --
+ * Get a line from a stream.
+ */
+static inline int
+__wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf)
+{
+ return (fstr->fstr_getline(session, fstr, buf));
+}
+
+/*
+ * __wt_fclose --
+ * Close a stream.
+ */
+static inline int
+__wt_fclose(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp)
+{
+ WT_FSTREAM *fstr;
+
+ if ((fstr = *fstrp) == NULL)
+ return (0);
+ *fstrp = NULL;
+ return (fstr->close(session, fstr));
+}
+
+/*
+ * __wt_fflush --
+ * Flush a stream.
+ */
+static inline int
+__wt_fflush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr)
+{
+ return (fstr->fstr_flush(session, fstr));
+}
+
+/*
+ * __wt_vfprintf --
+ * ANSI C vfprintf.
+ */
+static inline int
+__wt_vfprintf(
+ WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap)
+{
+ WT_RET(__wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: handle-printf", fstr->name));
+
+ return (fstr->fstr_printf(session, fstr, fmt, ap));
+}
+
+/*
+ * __wt_fprintf --
+ * ANSI C fprintf.
+ */
+static inline int
+__wt_fprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
+{
+ WT_DECL_RET;
+ va_list ap;
+
+ va_start(ap, fmt);
+ ret = __wt_vfprintf(session, fstr, fmt, ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+/*
+ * __wt_sync_and_rename --
+ * Flush and close a stream, then swap it into place.
+ */
+static inline int
+__wt_sync_and_rename(WT_SESSION_IMPL *session,
+ WT_FSTREAM **fstrp, const char *from, const char *to)
+{
+ WT_DECL_RET;
+ WT_FSTREAM *fstr;
+
+ fstr = *fstrp;
+ *fstrp = NULL;
+
+ /* Flush to disk and close the handle. */
+ WT_TRET(__wt_fflush(session, fstr));
+ WT_TRET(__wt_fsync(session, fstr->fh, true));
+ WT_TRET(__wt_fclose(session, &fstr));
+ WT_RET(ret);
+
+ return (__wt_rename_and_sync_directory(session, from, to));
+}
diff --git a/src/include/packing.i b/src/include/packing.i
index 35b2ddc43db..d662c60d221 100644
--- a/src/include/packing.i
+++ b/src/include/packing.i
@@ -138,7 +138,7 @@ __pack_next(WT_PACK *pack, WT_PACK_VALUE *pv)
next: if (pack->cur == pack->end)
return (WT_NOTFOUND);
- if (isdigit(*pack->cur)) {
+ if (__wt_isdigit((u_char)*pack->cur)) {
pv->havesize = 1;
pv->size = WT_STORE_SIZE(strtoul(pack->cur, &endsize, 10));
pack->cur = endsize;
@@ -260,6 +260,8 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv)
return (pv->size);
case 'j':
case 'J':
+ case 'K':
+ /* These formats are only used internally. */
if (pv->type == 'j' || pv->havesize)
s = pv->size;
else {
@@ -269,7 +271,7 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv)
len = __wt_json_strlen(pv->u.item.data,
pv->u.item.size);
WT_ASSERT(session, len >= 0);
- s = (size_t)len + 1;
+ s = (size_t)len + (pv->type == 'K' ? 0 : 1);
}
return (s);
case 's':
@@ -357,18 +359,22 @@ __pack_write(
break;
case 'j':
case 'J':
+ case 'K':
+ /* These formats are only used internally. */
s = pv->u.item.size;
if ((pv->type == 'j' || pv->havesize) && pv->size < s) {
s = pv->size;
pad = 0;
} else if (pv->havesize)
pad = pv->size - s;
+ else if (pv->type == 'K')
+ pad = 0;
else
pad = 1;
if (s > 0) {
oldp = *pp;
- WT_RET(__wt_json_strncpy((char **)pp, maxlen,
- pv->u.item.data, s));
+ WT_RET(__wt_json_strncpy((WT_SESSION *)session,
+ (char **)pp, maxlen, pv->u.item.data, s));
maxlen -= (size_t)(*pp - oldp);
}
if (pad > 0) {
@@ -534,7 +540,7 @@ __unpack_read(WT_SESSION_IMPL *session,
break;
case 'R':
WT_SIZE_CHECK_UNPACK(sizeof(uint64_t), maxlen);
- pv->u.u = *(uint64_t *)*pp;
+ pv->u.u = *(const uint64_t *)*pp;
*pp += sizeof(uint64_t);
break;
default:
diff --git a/src/include/session.h b/src/include/session.h
index 7fdb7fc2548..aa51dae58c4 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -198,7 +198,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
((s)->hazard == NULL)
/* The number of hazard pointers grows dynamically. */
-#define WT_HAZARD_INCR 10
+#define WT_HAZARD_INCR 1
uint32_t hazard_size; /* Allocated slots in hazard array. */
uint32_t nhazard; /* Count of active hazard pointers */
WT_HAZARD *hazard; /* Hazard pointer array */
diff --git a/src/include/stat.h b/src/include/stat.h
index e728b634c6e..57126af8aa4 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -144,10 +144,16 @@ __wt_stats_clear(void *stats_arg, int slot)
#define WT_STAT_DECRV(session, stats, fld, value) \
(stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value)
+#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) \
+ __wt_atomic_addi64( \
+ &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value))
#define WT_STAT_DECR(session, stats, fld) \
WT_STAT_DECRV(session, stats, fld, 1)
#define WT_STAT_INCRV(session, stats, fld, value) \
(stats)[WT_STATS_SLOT_ID(session)]->fld += (int64_t)(value)
+#define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) \
+ __wt_atomic_subi64( \
+ &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value))
#define WT_STAT_INCR(session, stats, fld) \
WT_STAT_INCRV(session, stats, fld, 1)
#define WT_STAT_SET(session, stats, fld, value) do { \
@@ -164,12 +170,20 @@ __wt_stats_clear(void *stats_arg, int slot)
} while (0)
#define WT_STAT_FAST_DECR(session, stats, fld) \
WT_STAT_FAST_DECRV(session, stats, fld, 1)
+#define WT_STAT_FAST_DECRV_ATOMIC(session, stats, fld, value) do { \
+ if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \
+ WT_STAT_DECRV_ATOMIC(session, stats, fld, value); \
+} while (0)
#define WT_STAT_FAST_INCRV(session, stats, fld, value) do { \
if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \
WT_STAT_INCRV(session, stats, fld, value); \
} while (0)
#define WT_STAT_FAST_INCR(session, stats, fld) \
WT_STAT_FAST_INCRV(session, stats, fld, 1)
+#define WT_STAT_FAST_INCRV_ATOMIC(session, stats, fld, value) do { \
+ if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \
+ WT_STAT_INCRV_ATOMIC(session, stats, fld, value); \
+} while (0)
#define WT_STAT_FAST_SET(session, stats, fld, value) do { \
if (FLD_ISSET(S2C(session)->stat_flags, WT_CONN_STAT_FAST)) \
WT_STAT_SET(session, stats, fld, value); \
@@ -180,10 +194,14 @@ __wt_stats_clear(void *stats_arg, int slot)
*/
#define WT_STAT_FAST_CONN_DECR(session, fld) \
WT_STAT_FAST_DECR(session, S2C(session)->stats, fld)
+#define WT_STAT_FAST_CONN_DECR_ATOMIC(session, fld) \
+ WT_STAT_FAST_DECRV_ATOMIC(session, S2C(session)->stats, fld, 1)
#define WT_STAT_FAST_CONN_DECRV(session, fld, value) \
WT_STAT_FAST_DECRV(session, S2C(session)->stats, fld, value)
#define WT_STAT_FAST_CONN_INCR(session, fld) \
WT_STAT_FAST_INCR(session, S2C(session)->stats, fld)
+#define WT_STAT_FAST_CONN_INCR_ATOMIC(session, fld) \
+ WT_STAT_FAST_INCRV_ATOMIC(session, S2C(session)->stats, fld, 1)
#define WT_STAT_FAST_CONN_INCRV(session, fld, value) \
WT_STAT_FAST_INCRV(session, S2C(session)->stats, fld, value)
#define WT_STAT_FAST_CONN_SET(session, fld, value) \
@@ -261,17 +279,25 @@ struct __wt_connection_stats {
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
+ int64_t cache_eviction_get_ref;
+ int64_t cache_eviction_get_ref_empty;
+ int64_t cache_eviction_get_ref_empty2;
int64_t cache_eviction_aggressive_set;
int64_t cache_eviction_queue_empty;
int64_t cache_eviction_queue_not_empty;
int64_t cache_eviction_server_evicting;
int64_t cache_eviction_server_not_evicting;
+ int64_t cache_eviction_server_toobig;
+ int64_t cache_eviction_server_slept;
int64_t cache_eviction_slow;
int64_t cache_eviction_worker_evicting;
int64_t cache_eviction_force_fail;
int64_t cache_eviction_walks_active;
int64_t cache_eviction_walks_started;
int64_t cache_eviction_hazard;
+ int64_t cache_hazard_checks;
+ int64_t cache_hazard_walks;
+ int64_t cache_hazard_max;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
int64_t cache_eviction_internal;
@@ -293,6 +319,7 @@ struct __wt_connection_stats {
int64_t cache_eviction_pages_queued_oldest;
int64_t cache_read;
int64_t cache_read_lookaside;
+ int64_t cache_pages_requested;
int64_t cache_eviction_pages_seen;
int64_t cache_eviction_fail;
int64_t cache_eviction_walk;
@@ -314,6 +341,7 @@ struct __wt_connection_stats {
int64_t cond_wait;
int64_t rwlock_read;
int64_t rwlock_write;
+ int64_t fsync_io;
int64_t read_io;
int64_t write_io;
int64_t cursor_create;
@@ -356,7 +384,9 @@ struct __wt_connection_stats {
int64_t log_write_lsn;
int64_t log_write_lsn_skip;
int64_t log_sync;
+ int64_t log_sync_duration;
int64_t log_sync_dir;
+ int64_t log_sync_dir_duration;
int64_t log_writes;
int64_t log_slot_consolidated;
int64_t log_max_filesize;
@@ -378,6 +408,9 @@ struct __wt_connection_stats {
int64_t rec_split_stashed_objects;
int64_t session_cursor_open;
int64_t session_open;
+ int64_t fsync_active;
+ int64_t read_active;
+ int64_t write_active;
int64_t page_busy_blocked;
int64_t page_forcible_evict_blocked;
int64_t page_locked_blocked;
@@ -394,6 +427,10 @@ struct __wt_connection_stats {
int64_t txn_checkpoint_time_total;
int64_t txn_checkpoint;
int64_t txn_fail_cache;
+ int64_t txn_checkpoint_fsync_post;
+ int64_t txn_checkpoint_fsync_pre;
+ int64_t txn_checkpoint_fsync_post_duration;
+ int64_t txn_checkpoint_fsync_pre_duration;
int64_t txn_pinned_range;
int64_t txn_pinned_checkpoint_range;
int64_t txn_pinned_snapshot_range;
@@ -447,7 +484,6 @@ struct __wt_dsrc_stats {
int64_t btree_compact_rewrite;
int64_t btree_row_internal;
int64_t btree_row_leaf;
- int64_t cache_bytes_inuse;
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
@@ -465,6 +501,7 @@ struct __wt_dsrc_stats {
int64_t cache_write_lookaside;
int64_t cache_read;
int64_t cache_read_lookaside;
+ int64_t cache_pages_requested;
int64_t cache_write;
int64_t cache_write_restore;
int64_t cache_eviction_clean;
@@ -514,9 +551,11 @@ struct __wt_dsrc_stats {
*/
#define WT_JOIN_STATS_BASE 3000
struct __wt_join_stats {
- int64_t accesses;
- int64_t actual_count;
+ int64_t main_access;
int64_t bloom_false_positive;
+ int64_t membership_check;
+ int64_t bloom_insert;
+ int64_t iterated;
};
/* Statistics section: END */
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 4f422af32d4..f578f4e6c08 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -71,6 +71,8 @@ struct __wt_encryptor; typedef struct __wt_encryptor WT_ENCRYPTOR;
struct __wt_event_handler; typedef struct __wt_event_handler WT_EVENT_HANDLER;
struct __wt_extension_api; typedef struct __wt_extension_api WT_EXTENSION_API;
struct __wt_extractor; typedef struct __wt_extractor WT_EXTRACTOR;
+struct __wt_file_handle; typedef struct __wt_file_handle WT_FILE_HANDLE;
+struct __wt_file_system; typedef struct __wt_file_system WT_FILE_SYSTEM;
struct __wt_item; typedef struct __wt_item WT_ITEM;
struct __wt_session; typedef struct __wt_session WT_SESSION;
@@ -421,6 +423,9 @@ struct __wt_cursor {
* @errors
* In particular, if \c overwrite is not configured and a record with
* the specified key already exists, ::WT_DUPLICATE_KEY is returned.
+ * Also, if \c in_memory is configured for the database and the insert
+ * requires more than the configured cache size to complete,
+ * ::WT_CACHE_FULL is returned.
*/
int __F(insert)(WT_CURSOR *cursor);
@@ -451,6 +456,9 @@ struct __wt_cursor {
* @errors
* In particular, if \c overwrite is not configured and no record with
* the specified key exists, ::WT_NOTFOUND is returned.
+ * Also, if \c in_memory is configured for the database and the insert
+ * requires more than the configured cache size to complete,
+ * ::WT_CACHE_FULL is returned.
*/
int __F(update)(WT_CURSOR *cursor);
@@ -1238,18 +1246,21 @@ struct __wt_session {
* @param join_cursor a cursor that was opened using a
* \c "join:" URI. It may not have been used for any operations
* other than other join calls.
- * @param ref_cursor either an index cursor having the same base table
- * as the join_cursor, or a table cursor open on the same base table.
- * The ref_cursor must be positioned.
+ * @param ref_cursor an index cursor having the same base table
+ * as the join_cursor, or a table cursor open on the same base table,
+ * or another join cursor. Unless the ref_cursor is another join
+ * cursor, it must be positioned.
*
* The ref_cursor limits the results seen by iterating the
* join_cursor to table items referred to by the key in this
* index. The set of keys referred to is modified by the compare
* config option.
*
- * Multiple join calls builds up a set of ref_cursors, and the
- * results seen by iteration are the intersection of the cursor
- * ranges participating in the join.
+ * Multiple join calls builds up a set of ref_cursors, and
+ * by default, the results seen by iteration are the intersection
+ * of the cursor ranges participating in the join. When configured
+ * with \c "operation=or", the results seen are the union of
+ * the participating cursor ranges.
*
* After the join call completes, the ref_cursor cursor may not be
* used for any purpose other than get_key and get_value. Any other
@@ -1272,6 +1283,13 @@ struct __wt_session {
* also influences evaluation order for cursors in the join. When the
* count is equal for multiple bloom filters in a composition of joins\,
* the bloom filter may be shared., an integer; default \c .}
+ * @config{operation, the operation applied between this and other
+ * joined cursors. When "operation=and" is specified\, all the
+ * conditions implied by joins must be satisfied for an entry to be
+ * returned by the join cursor; when "operation=or" is specified\, only
+ * one must be satisfied. All cursors joined to a join cursor must have
+ * matching operations., a string\, chosen from the following options:
+ * \c "and"\, \c "or"; default \c "and".}
* @config{strategy, when set to bloom\, a bloom filter is created and
* populated for this index. This has an up front cost but may reduce
* the number of accesses to the main table when iterating the joined
@@ -1448,15 +1466,16 @@ struct __wt_session {
* @config{dump_blocks, Display the contents of on-disk blocks as they
* are verified\, using the application's message handler\, intended for
* debugging., a boolean flag; default \c false.}
+ * @config{dump_layout, Display the layout of the files as they are
+ * verified\, using the application's message handler\, intended for
+ * debugging; requires optional support from the block manager., a
+ * boolean flag; default \c false.}
* @config{dump_offsets, Display the contents of specific on-disk
* blocks\, using the application's message handler\, intended for
* debugging., a list of strings; default empty.}
* @config{dump_pages, Display the contents of in-memory pages as they
* are verified\, using the application's message handler\, intended for
* debugging., a boolean flag; default \c false.}
- * @config{dump_shape, Display the shape of the tree after
- * verification\, using the application's message handler\, intended for
- * debugging., a boolean flag; default \c false.}
* @config{strict, Treat any verification problem as an error; by
* default\, verify will warn\, but not fail\, in the case of errors
* that won't affect future behavior (for example\, a leaked block)., a
@@ -1830,7 +1849,7 @@ struct __wt_connection {
* @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the path to a directory into
* which the log files are written. If the value is not an absolute
* path name\, the files are created relative to the database home., a
- * string; default empty.}
+ * string; default \c ".".}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc,
* pre-allocate log files., a boolean flag; default \c true.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;recover, run recovery or error if
@@ -2015,6 +2034,10 @@ struct __wt_connection {
* @configstart{WT_CONNECTION.load_extension, see dist/api_data.py}
* @config{config, configuration string passed to the entry point of the
* extension as its WT_CONFIG_ARG argument., a string; default empty.}
+ * @config{early_load, whether this extension should be loaded at the
+ * beginning of ::wiredtiger_open. Only applicable to extensions loaded
+ * via the wiredtiger_open configurations string., a boolean flag;
+ * default \c false.}
* @config{entry, the entry point of the extension\, called to
* initialize the extension when it is loaded. The signature of the
* function must match ::wiredtiger_extension_init., a string; default
@@ -2126,6 +2149,23 @@ struct __wt_connection {
WT_EXTRACTOR *extractor, const char *config);
/*!
+ * Configure a custom file system.
+ *
+ * This method can only be called from an early loaded extension
+ * module. The application must first implement the WT_FILE_SYSTEM
+ * interface and then register the implementation with WiredTiger:
+ *
+ * @snippet ex_file_system.c WT_FILE_SYSTEM register
+ *
+ * @param connection the connection handle
+ * @param fs the populated file system structure
+ * @configempty{WT_CONNECTION.set_file_system, see dist/api_data.py}
+ * @errors
+ */
+ int __F(set_file_system)(
+ WT_CONNECTION *connection, WT_FILE_SYSTEM *fs, const char *config);
+
+ /*!
* Return a reference to the WiredTiger extension functions.
*
* @snippet ex_data_source.c WT_EXTENSION_API declaration
@@ -2286,6 +2326,8 @@ struct __wt_connection {
* @config{ ),,}
* @config{hazard_max, maximum number of simultaneous hazard pointers per
* session handle., an integer greater than or equal to 15; default \c 1000.}
+ * @config{in_memory, keep data in-memory only. See @ref in_memory for more
+ * information., a boolean flag; default \c false.}
* @config{log = (, enable logging. Enabling logging uses three sessions from
* the configured session_max., a set of related configuration options defined
* below.}
@@ -2303,7 +2345,7 @@ struct __wt_connection {
* integer between 100KB and 2GB; default \c 100MB.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the path to a directory into which the
* log files are written. If the value is not an absolute path name\, the files
- * are created relative to the database home., a string; default empty.}
+ * are created relative to the database home., a string; default \c ".".}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a boolean
* flag; default \c true.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;recover, run recovery
@@ -3000,19 +3042,15 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp);
* if recovery is required to use the database.
*/
#define WT_RUN_RECOVERY -31806
-/*! @cond internal */
/*!
* Operation would overflow cache.
- * This error is generated when wiredtiger_open is configured to run in-memory,
- * and an insert or update operation requires more than the configured cache
- * size to complete.
+ * This error is only generated when wiredtiger_open is configured to run in-
+ * memory, and an insert or update operation requires more than the configured
+ * cache size to complete. The operation may be retried; if a transaction is in
+ * progress, it should be rolled back and the operation retried in a new
+ * transaction.
*/
#define WT_CACHE_FULL -31807
-/*! @endcond */
-/*! @cond internal */
-/*! Permission denied (internal). */
-#define WT_PERM_DENIED -31808
-/*! @endcond */
/*
* Error return section: END
* DO NOT EDIT: automatically built by dist/api_err.py.
@@ -3035,7 +3073,7 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp);
/*******************************************
* Forward structure declarations for the extension API
*******************************************/
-struct __wt_config_arg; typedef struct __wt_config_arg WT_CONFIG_ARG;
+struct __wt_config_arg; typedef struct __wt_config_arg WT_CONFIG_ARG;
/*!
* The interface implemented by applications to provide custom ordering of
@@ -3566,7 +3604,7 @@ struct __wt_encryptor {
* number of bytes needed.
*
* @param[out] expansion_constantp the additional number of bytes needed
- * when encrypting.
+ * when encrypting.
* @returns zero for success, non-zero to indicate an error.
*
* @snippet nop_encrypt.c WT_ENCRYPTOR sizing
@@ -3585,8 +3623,7 @@ struct __wt_encryptor {
* is used instead of this one for any callbacks.
*
* @param[in] encrypt_config the "encryption" portion of the
- * configuration from the wiredtiger_open or
- * WT_SESSION::create call
+ * configuration from the wiredtiger_open or WT_SESSION::create call
* @param[out] customp the new modified encryptor, or NULL.
* @returns zero for success, non-zero to indicate an error.
*/
@@ -3661,6 +3698,466 @@ struct __wt_extractor {
int (*terminate)(WT_EXTRACTOR *extractor, WT_SESSION *session);
};
+#if !defined(SWIG)
+/*! WT_FILE_SYSTEM::open_file file types */
+typedef enum {
+ WT_OPEN_FILE_TYPE_CHECKPOINT, /*!< open a data file checkpoint */
+ WT_OPEN_FILE_TYPE_DATA, /*!< open a data file */
+ WT_OPEN_FILE_TYPE_DIRECTORY, /*!< open a directory */
+ WT_OPEN_FILE_TYPE_LOG, /*!< open a log file */
+ WT_OPEN_FILE_TYPE_REGULAR /*!< open a regular file */
+} WT_OPEN_FILE_TYPE;
+
+/*! WT_FILE_SYSTEM::open_file flags: create if does not exist */
+#define WT_OPEN_CREATE 0x001
+/*! WT_FILE_SYSTEM::open_file flags: direct I/O requested */
+#define WT_OPEN_DIRECTIO 0x002
+/*! WT_FILE_SYSTEM::open_file flags: error if exclusive use not available */
+#define WT_OPEN_EXCLUSIVE 0x004
+#ifndef DOXYGEN
+#define WT_OPEN_FIXED 0x008 /* Path not home relative (internal) */
+#endif
+/*! WT_FILE_SYSTEM::open_file flags: open is read-only */
+#define WT_OPEN_READONLY 0x010
+
+/*!
+ * The interface implemented by applications to provide a custom file system
+ * implementation.
+ *
+ * <b>Thread safety:</b> WiredTiger may invoke methods on the WT_FILE_SYSTEM
+ * interface from multiple threads concurrently. It is the responsibility of
+ * the implementation to protect any shared data.
+ *
+ * Applications register implementations with WiredTiger by calling
+ * WT_CONNECTION::add_file_system. See @ref custom_file_systems for more
+ * information.
+ *
+ * @snippet ex_file_system.c WT_FILE_SYSTEM register
+ */
+struct __wt_file_system {
+ /*!
+ * Return a list of file names for the named directory.
+ *
+ * @errors
+ *
+ * @param file_system the WT_FILE_SYSTEM
+ * @param session the current WiredTiger session
+ * @param directory the name of the directory
+ * @param prefix if not NULL, only files with names matching the prefix
+ * are returned
+ * @param[out] dirlist the method returns an allocated array of
+ * individually allocated strings, one for each entry in the
+ * directory.
+ * @param[out] countp the method the number of entries returned
+ */
+ int (*fs_directory_list)(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *directory, const char *prefix,
+ char ***dirlist, uint32_t *countp);
+
+ /*!
+ * Free memory allocated by WT_FILE_SYSTEM::directory_list.
+ *
+ * @errors
+ *
+ * @param file_system the WT_FILE_SYSTEM
+ * @param session the current WiredTiger session
+ * @param dirlist array returned by WT_FILE_SYSTEM::directory_list
+ * @param count count returned by WT_FILE_SYSTEM::directory_list
+ */
+ int (*fs_directory_list_free)(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, char **dirlist, uint32_t count);
+
+ /*!
+ * Flush the named directory.
+ *
+ * This method is not required for readonly file systems or file systems
+ * where it is not necessary to flush a file's directory to ensure the
+ * durability of file system operations, and should be set to NULL when
+ * not required by the file system.
+ *
+ * @errors
+ *
+ * @param file_system the WT_FILE_SYSTEM
+ * @param session the current WiredTiger session
+ * @param directory the name of the directory
+ */
+ int (*fs_directory_sync)(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *directory);
+
+ /*!
+ * Return if the named file system object exists.
+ *
+ * @errors
+ *
+ * @param file_system the WT_FILE_SYSTEM
+ * @param session the current WiredTiger session
+ * @param name the name of the file
+ * @param[out] existp If the named file system object exists
+ */
+ int (*fs_exist)(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *name, bool *existp);
+
+ /*!
+ * Open a handle for a named file system object
+ *
+ * @errors
+ *
+ * @param file_system the WT_FILE_SYSTEM
+ * @param session the current WiredTiger session
+ * @param name the name of the file system object
+ * @param file_type the type of the file
+ * The file type is provided to allow optimization for different file
+ * access patterns.
+ * @param flags flags indicating how to open the file, one or more of
+ * ::WT_OPEN_CREATE, ::WT_OPEN_DIRECTIO, ::WT_OPEN_EXCLUSIVE or
+ * ::WT_OPEN_READONLY.
+ * @param[out] file_handlep the handle to the newly opened file. File
+ * system implementations must allocate memory for the handle and
+ * the WT_FILE_HANDLE::name field, and fill in the WT_FILE_HANDLE::
+ * fields. Applications wanting to associate private information
+ * with the WT_FILE_HANDLE:: structure should declare and allocate
+ * their own structure as a superset of a WT_FILE_HANDLE:: structure.
+ */
+ int (*fs_open_file)(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
+ const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ WT_FILE_HANDLE **file_handlep);
+
+ /*!
+ * Remove a named file system object
+ *
+ * This method is not required for readonly file systems and should be
+ * set to NULL when not required by the file system.
+ *
+ * @errors
+ *
+ * @param file_system the WT_FILE_SYSTEM
+ * @param session the current WiredTiger session
+ * @param name the name of the file system object
+ */
+ int (*fs_remove)(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name);
+
+ /*!
+ * Rename a named file system object
+ *
+ * This method is not required for readonly file systems and should be
+ * set to NULL when not required by the file system.
+ *
+ * @errors
+ *
+ * @param file_system the WT_FILE_SYSTEM
+ * @param session the current WiredTiger session
+ * @param from the original name of the object
+ * @param to the new name for the object
+ */
+ int (*fs_rename)(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *from, const char *to);
+
+ /*!
+ * Return the size of a named file system object
+ *
+ * @errors
+ *
+ * @param file_system the WT_FILE_SYSTEM
+ * @param session the current WiredTiger session
+ * @param name the name of the file system object
+ * @param[out] sizep the size of the file system entry
+ */
+ int (*fs_size)(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *name, wt_off_t *sizep);
+
+ /*!
+ * A callback performed when the file system is closed and will no
+ * longer be accessed by the WiredTiger database.
+ *
+ * This method is not required and should be set to NULL when not
+ * required by the file system.
+ *
+ * The WT_FILE_SYSTEM::terminate callback is intended to allow cleanup,
+ * the handle will not be subsequently accessed by WiredTiger.
+ */
+ int (*terminate)(WT_FILE_SYSTEM *file_system, WT_SESSION *session);
+};
+
+/*! WT_FILE_HANDLE::fadvise flags: no longer need */
+#define WT_FILE_HANDLE_DONTNEED 1
+/*! WT_FILE_HANDLE::fadvise flags: will need */
+#define WT_FILE_HANDLE_WILLNEED 2
+
+/*!
+ * A file handle implementation returned by WT_FILE_SYSTEM::open_file.
+ *
+ * <b>Thread safety:</b> Unless explicitly stated otherwise, WiredTiger may
+ * invoke methods on the WT_FILE_HANDLE interface from multiple threads
+ * concurrently. It is the responsibility of the implementation to protect
+ * any shared data.
+ *
+ * See @ref custom_file_systems for more information.
+ */
+struct __wt_file_handle {
+ /*!
+ * The enclosing file system, set by WT_FILE_SYSTEM::open_file.
+ */
+ WT_FILE_SYSTEM *file_system;
+
+ /*!
+ * The name of the file, set by WT_FILE_SYSTEM::open_file.
+ */
+ char *name;
+
+ /*!
+ * Close a file handle, the handle will not be further accessed by
+ * WiredTiger.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ */
+ int (*close)(WT_FILE_HANDLE *file_handle, WT_SESSION *session);
+
+ /*!
+ * Indicate expected future use of file ranges, based on the POSIX
+ * 1003.1 standard fadvise.
+ *
+ * This method is not required, and should be set to NULL when not
+ * supported by the file.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param offset the file offset
+ * @param len the size of the advisory
+ * @param advice one of ::WT_FILE_HANDLE_WILLNEED or
+ * ::WT_FILE_HANDLE_DONTNEED.
+ */
+ int (*fh_advise)(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *session, wt_off_t offset, wt_off_t len, int advice);
+
+ /*!
+ * Ensure disk space is allocated for the file, based on the POSIX
+ * 1003.1 standard fallocate.
+ *
+ * This method is not required, and should be set to NULL when not
+ * supported by the file.
+ *
+ * This method is not called by multiple threads concurrently (on the
+ * same file handle). If the file handle's fallocate method supports
+ * concurrent calls, set the WT_FILE_HANDLE::fallocate_nolock method
+ * instead.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param offset the file offset
+ * @param len the size of the advisory
+ */
+ int (*fh_allocate)(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *session, wt_off_t, wt_off_t);
+
+ /*!
+ * Ensure disk space is allocated for the file, based on the POSIX
+ * 1003.1 standard fallocate.
+ *
+ * This method is not required, and should be set to NULL when not
+ * supported by the file.
+ *
+ * This method may be called by multiple threads concurrently (on the
+ * same file handle). If the file handle's fallocate method does not
+ * support concurrent calls, set the WT_FILE_HANDLE::fallocate method
+ * instead.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param offset the file offset
+ * @param len the size of the advisory
+ */
+ int (*fh_allocate_nolock)(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *session, wt_off_t, wt_off_t);
+
+ /*!
+ * Lock/unlock a file from the perspective of other processes running
+ * in the system.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param lock whether to lock or unlock
+ */
+ int (*fh_lock)(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, bool lock);
+
+ /*!
+ * Map a file into memory, based on the POSIX 1003.1 standard mmap.
+ *
+ * This method is not required, and should be set to NULL when not
+ * supported by the file.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param[out] mapped_regionp a reference to a memory location into
+ * which should be stored a pointer to the start of the mapped region
+ * @param[out] lengthp a reference to a memory location into which
+ * should be stored the length of the region
+ * @param[out] mapped_cookiep a reference to a memory location into
+ * which can be optionally stored a pointer to an opaque cookie
+ * which is subsequently passed to WT_FILE_HANDLE::unmap.
+ */
+ int (*fh_map)(WT_FILE_HANDLE *file_handle, WT_SESSION *session,
+ void *mapped_regionp, size_t *lengthp, void *mapped_cookiep);
+
+ /*!
+ * Unmap part of a memory mapped file, based on the POSIX 1003.1
+ * standard madvise.
+ *
+ * This method is not required, and should be set to NULL when not
+ * supported by the file.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param map a location in the mapped region unlikely to be used in the
+ * near future
+ * @param length the length of the mapped region to discard
+ * @param mapped_cookie any cookie set by the WT_FILE_HANDLE::map method
+ */
+ int (*fh_map_discard)(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *session, void *map, size_t length, void *mapped_cookie);
+
+ /*!
+ * Preload part of a memory mapped file, based on the POSIX 1003.1
+ * standard madvise.
+ *
+ * This method is not required, and should be set to NULL when not
+ * supported by the file.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param map a location in the mapped region likely to be used in the
+ * near future
+ * @param length the size of the mapped region to preload
+ * @param mapped_cookie any cookie set by the WT_FILE_HANDLE::map method
+ */
+ int (*fh_map_preload)(WT_FILE_HANDLE *file_handle, WT_SESSION *session,
+ const void *map, size_t length, void *mapped_cookie);
+
+ /*!
+ * Unmap a memory mapped file, based on the POSIX 1003.1 standard
+ * munmap.
+ *
+ * This method is only required if a valid implementation of map is
+ * provided by the file, and should be set to NULL otherwise.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param mapped_region a pointer to the start of the mapped region
+ * @param length the length of the mapped region
+ * @param mapped_cookie any cookie set by the WT_FILE_HANDLE::map method
+ */
+ int (*fh_unmap)(WT_FILE_HANDLE *file_handle, WT_SESSION *session,
+ void *mapped_region, size_t length, void *mapped_cookie);
+
+ /*!
+ * Read from a file, based on the POSIX 1003.1 standard pread.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param offset the offset in the file to start reading from
+ * @param len the amount to read
+ * @param[out] buf buffer to hold the content read from file
+ */
+ int (*fh_read)(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *session, wt_off_t offset, size_t len, void *buf);
+
+ /*!
+ * Return the size of a file.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param sizep the size of the file
+ */
+ int (*fh_size)(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep);
+
+ /*!
+ * Make outstanding file writes durable and do not return until writes
+ * are complete.
+ *
+ * This method is not required for read-only files, and should be set
+ * to NULL when not supported by the file.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ */
+ int (*fh_sync)(WT_FILE_HANDLE *file_handle, WT_SESSION *session);
+
+ /*!
+ * Schedule the outstanding file writes required for durability and
+ * return immediately.
+ *
+ * This method is not required, and should be set to NULL when not
+ * supported by the file.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ */
+ int (*fh_sync_nowait)(WT_FILE_HANDLE *file_handle, WT_SESSION *session);
+
+ /*!
+ * Lengthen or shorten a file to the specified length, based on the
+ * POSIX 1003.1 standard ftruncate.
+ *
+ * This method is not required for read-only files, and should be set
+ * to NULL when not supported by the file.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param length desired file size after truncate
+ */
+ int (*fh_truncate)(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t length);
+
+ /*!
+ * Write to a file, based on the POSIX 1003.1 standard pwrite.
+ *
+ * This method is not required for read-only files, and should be set
+ * to NULL when not supported by the file.
+ *
+ * @errors
+ *
+ * @param file_handle the WT_FILE_HANDLE
+ * @param session the current WiredTiger session
+ * @param offset offset at which to start writing
+ * @param length amount of data to write
+ * @param buf content to be written to the file
+ */
+ int (*fh_write)(WT_FILE_HANDLE *file_handle, WT_SESSION *session,
+ wt_off_t offset, size_t length, const void *buf);
+};
+#endif /* !defined(SWIG) */
+
/*!
* Entry point to an extension, called when the extension is loaded.
*
@@ -3771,285 +4268,328 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_BYTES_WRITE 1032
/*! cache: checkpoint blocked page eviction */
#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1033
+/*! cache: eviction calls to get a page */
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1034
+/*! cache: eviction calls to get a page found queue empty */
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1035
+/*! cache: eviction calls to get a page found queue empty after locking */
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1036
/*! cache: eviction currently operating in aggressive mode */
-#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1034
+#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1037
/*! cache: eviction server candidate queue empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1035
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1038
/*! cache: eviction server candidate queue not empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1036
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1039
/*! cache: eviction server evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1037
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1040
/*! cache: eviction server populating queue, but not evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1038
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1041
+/*! cache: eviction server skipped very large page */
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG 1042
+/*! cache: eviction server slept, because we did not make progress with
+ * eviction */
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1043
/*! cache: eviction server unable to reach eviction goal */
-#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1039
+#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1044
/*! cache: eviction worker thread evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1040
+#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1045
/*! cache: failed eviction of pages that exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1041
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1046
/*! cache: files with active eviction walks */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1042
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1047
/*! cache: files with new eviction walks started */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1043
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1048
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1044
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1049
+/*! cache: hazard pointer check calls */
+#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1050
+/*! cache: hazard pointer check entries walked */
+#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1051
+/*! cache: hazard pointer maximum array length */
+#define WT_STAT_CONN_CACHE_HAZARD_MAX 1052
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1045
+#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1053
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1046
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1054
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1047
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1055
/*! cache: internal pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1048
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1056
/*! cache: leaf pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1049
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1057
/*! cache: lookaside table insert calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1050
+#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1058
/*! cache: lookaside table remove calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1051
+#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1059
/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 1052
+#define WT_STAT_CONN_CACHE_BYTES_MAX 1060
/*! cache: maximum page size at eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1053
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1061
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1054
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1062
/*! cache: modified pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1055
+#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1063
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1056
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1064
/*! cache: page written requiring lookaside records */
-#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1057
+#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1065
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1058
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1066
/*! cache: pages evicted because they exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1059
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1067
/*! cache: pages evicted because they had chains of deleted items */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1060
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1068
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1061
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1069
/*! cache: pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1062
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1070
/*! cache: pages queued for urgent eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1063
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1071
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1064
+#define WT_STAT_CONN_CACHE_READ 1072
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1065
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1073
+/*! cache: pages requested from the cache */
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1074
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1066
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1075
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1067
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1076
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1068
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1077
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1069
+#define WT_STAT_CONN_CACHE_WRITE 1078
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1070
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1079
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1071
+#define WT_STAT_CONN_CACHE_OVERHEAD 1080
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1072
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1081
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1073
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1082
/*! cache: tracked bytes belonging to overflow pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1074
+#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1083
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1075
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1084
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1076
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1085
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1077
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1086
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1078
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1087
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1079
+#define WT_STAT_CONN_COND_AUTO_WAIT 1088
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1080
+#define WT_STAT_CONN_FILE_OPEN 1089
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1081
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1090
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1082
+#define WT_STAT_CONN_MEMORY_FREE 1091
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1083
+#define WT_STAT_CONN_MEMORY_GROW 1092
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1084
+#define WT_STAT_CONN_COND_WAIT 1093
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1085
+#define WT_STAT_CONN_RWLOCK_READ 1094
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1086
+#define WT_STAT_CONN_RWLOCK_WRITE 1095
+/*! connection: total fsync I/Os */
+#define WT_STAT_CONN_FSYNC_IO 1096
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1087
+#define WT_STAT_CONN_READ_IO 1097
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1088
+#define WT_STAT_CONN_WRITE_IO 1098
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1089
+#define WT_STAT_CONN_CURSOR_CREATE 1099
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1090
+#define WT_STAT_CONN_CURSOR_INSERT 1100
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1091
+#define WT_STAT_CONN_CURSOR_NEXT 1101
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1092
+#define WT_STAT_CONN_CURSOR_PREV 1102
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1093
+#define WT_STAT_CONN_CURSOR_REMOVE 1103
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1094
+#define WT_STAT_CONN_CURSOR_RESET 1104
/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1095
+#define WT_STAT_CONN_CURSOR_RESTART 1105
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1096
+#define WT_STAT_CONN_CURSOR_SEARCH 1106
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1097
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1107
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1098
+#define WT_STAT_CONN_CURSOR_UPDATE 1108
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1099
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1109
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1100
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1110
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1101
+#define WT_STAT_CONN_DH_SWEEP_REF 1111
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1102
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1112
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1103
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1113
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1104
+#define WT_STAT_CONN_DH_SWEEP_TOD 1114
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1105
+#define WT_STAT_CONN_DH_SWEEPS 1115
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1106
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1116
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1107
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1117
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1108
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1118
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1109
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1119
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1110
+#define WT_STAT_CONN_LOG_SLOT_RACES 1120
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1111
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1121
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1112
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1122
/*! log: consolidated slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1113
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1123
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1114
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1124
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1115
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1125
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1116
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1126
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1117
+#define WT_STAT_CONN_LOG_FLUSH 1127
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1118
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1128
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1119
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1129
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1120
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1130
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1121
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1131
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1122
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1132
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1123
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1133
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1124
+#define WT_STAT_CONN_LOG_SCANS 1134
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1125
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1135
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1126
+#define WT_STAT_CONN_LOG_WRITE_LSN 1136
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1127
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1137
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1128
+#define WT_STAT_CONN_LOG_SYNC 1138
+/*! log: log sync time duration (usecs) */
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1139
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1129
+#define WT_STAT_CONN_LOG_SYNC_DIR 1140
+/*! log: log sync_dir time duration (usecs) */
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1141
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1130
+#define WT_STAT_CONN_LOG_WRITES 1142
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1131
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1143
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1132
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1144
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1133
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1145
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1134
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1146
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1135
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1147
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1136
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1148
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1137
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1149
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1138
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1150
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1139
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1151
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1140
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1152
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1141
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1153
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1142
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1154
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1143
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1155
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1144
+#define WT_STAT_CONN_REC_PAGES 1156
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1145
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1157
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1146
+#define WT_STAT_CONN_REC_PAGE_DELETE 1158
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1147
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1159
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1148
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1160
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1149
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1161
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1150
+#define WT_STAT_CONN_SESSION_OPEN 1162
+/*! thread-state: active filesystem fsync calls */
+#define WT_STAT_CONN_FSYNC_ACTIVE 1163
+/*! thread-state: active filesystem read calls */
+#define WT_STAT_CONN_READ_ACTIVE 1164
+/*! thread-state: active filesystem write calls */
+#define WT_STAT_CONN_WRITE_ACTIVE 1165
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1151
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1166
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1152
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1167
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1153
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1168
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1154
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1169
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1155
+#define WT_STAT_CONN_PAGE_SLEEP 1170
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1156
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1171
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1157
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1172
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1158
+#define WT_STAT_CONN_TXN_BEGIN 1173
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1159
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1174
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1160
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1175
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1161
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1176
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1162
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1177
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1163
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1178
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1164
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1179
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1165
+#define WT_STAT_CONN_TXN_CHECKPOINT 1180
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1166
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1181
+/*! transaction: transaction fsync calls for checkpoint after allocating
+ * the transaction ID */
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1182
+/*! transaction: transaction fsync calls for checkpoint before allocating
+ * the transaction ID */
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE 1183
+/*! transaction: transaction fsync duration for checkpoint after
+ * allocating the transaction ID (usecs) */
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1184
+/*! transaction: transaction fsync duration for checkpoint before
+ * allocating the transaction ID (usecs) */
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE_DURATION 1185
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1167
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1186
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1168
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1187
/*! transaction: transaction range of IDs currently pinned by named
* snapshots */
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1169
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1188
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1170
+#define WT_STAT_CONN_TXN_SYNC 1189
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1171
+#define WT_STAT_CONN_TXN_COMMIT 1190
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1172
+#define WT_STAT_CONN_TXN_ROLLBACK 1191
/*!
* @}
@@ -4138,42 +4678,42 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038
/*! btree: row-store leaf pages */
#define WT_STAT_DSRC_BTREE_ROW_LEAF 2039
-/*! cache: bytes currently in the cache */
-#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040
/*! cache: bytes read into cache */
-#define WT_STAT_DSRC_CACHE_BYTES_READ 2041
+#define WT_STAT_DSRC_CACHE_BYTES_READ 2040
/*! cache: bytes written from cache */
-#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2042
+#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2041
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2043
+#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2042
/*! cache: data source pages selected for eviction unable to be evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2044
+#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2043
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2045
+#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2044
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2046
+#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2045
/*! cache: in-memory page splits */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2047
+#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2046
/*! cache: internal pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2048
+#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2047
/*! cache: internal pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2049
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2048
/*! cache: leaf pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2050
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2049
/*! cache: modified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2051
+#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2050
/*! cache: overflow pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2052
+#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2051
/*! cache: overflow values cached in memory */
-#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2053
+#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2052
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2054
+#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2053
/*! cache: page written requiring lookaside records */
-#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2055
+#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2054
/*! cache: pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ 2056
+#define WT_STAT_DSRC_CACHE_READ 2055
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2057
+#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2056
+/*! cache: pages requested from the cache */
+#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2057
/*! cache: pages written from cache */
#define WT_STAT_DSRC_CACHE_WRITE 2058
/*! cache: pages written requiring in-memory restoration */
@@ -4266,12 +4806,16 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
* @anchor statistics_join
* @{
*/
-/*! : accesses */
-#define WT_STAT_JOIN_ACCESSES 3000
-/*! : actual count of items */
-#define WT_STAT_JOIN_ACTUAL_COUNT 3001
+/*! : accesses to the main table */
+#define WT_STAT_JOIN_MAIN_ACCESS 3000
/*! : bloom filter false positives */
-#define WT_STAT_JOIN_BLOOM_FALSE_POSITIVE 3002
+#define WT_STAT_JOIN_BLOOM_FALSE_POSITIVE 3001
+/*! : checks that conditions of membership are satisfied */
+#define WT_STAT_JOIN_MEMBERSHIP_CHECK 3002
+/*! : items inserted into a bloom filter */
+#define WT_STAT_JOIN_BLOOM_INSERT 3003
+/*! : items iterated */
+#define WT_STAT_JOIN_ITERATED 3004
/*! @} */
/*
* Statistics section: END
diff --git a/src/include/wiredtiger_ext.h b/src/include/wiredtiger_ext.h
index 7d97d97dcf5..3d65cd1fc24 100644
--- a/src/include/wiredtiger_ext.h
+++ b/src/include/wiredtiger_ext.h
@@ -131,6 +131,19 @@ struct __wt_extension_api {
WT_EXTENSION_API *, WT_SESSION *session, int error);
/*!
+ * Map a Windows system error code to a POSIX 1003.1/ANSI C error.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param windows_error a Windows system error code
+ * @returns a string representation of the error
+ *
+ * @snippet ex_data_source.c WT_EXTENSION_API map_windows_error
+ */
+ int (*map_windows_error)(WT_EXTENSION_API *wt_api,
+ WT_SESSION *session, uint32_t windows_error);
+
+ /*!
* Allocate short-term use scratch memory.
*
* @param wt_api the extension handle
diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h
index 9e5007b38ed..c5337967f22 100644
--- a/src/include/wt_internal.h
+++ b/src/include/wt_internal.h
@@ -28,7 +28,6 @@ extern "C" {
#include <sys/time.h>
#include <sys/uio.h>
#endif
-#include <ctype.h>
#ifndef _WIN32
#include <dlfcn.h>
#endif
@@ -169,6 +168,8 @@ struct __wt_dsrc_stats;
typedef struct __wt_dsrc_stats WT_DSRC_STATS;
struct __wt_evict_entry;
typedef struct __wt_evict_entry WT_EVICT_ENTRY;
+struct __wt_evict_queue;
+ typedef struct __wt_evict_queue WT_EVICT_QUEUE;
struct __wt_evict_worker;
typedef struct __wt_evict_worker WT_EVICT_WORKER;
struct __wt_ext;
@@ -179,6 +180,14 @@ struct __wt_fair_lock;
typedef struct __wt_fair_lock WT_FAIR_LOCK;
struct __wt_fh;
typedef struct __wt_fh WT_FH;
+struct __wt_file_handle_inmem;
+ typedef struct __wt_file_handle_inmem WT_FILE_HANDLE_INMEM;
+struct __wt_file_handle_posix;
+ typedef struct __wt_file_handle_posix WT_FILE_HANDLE_POSIX;
+struct __wt_file_handle_win;
+ typedef struct __wt_file_handle_win WT_FILE_HANDLE_WIN;
+struct __wt_fstream;
+ typedef struct __wt_fstream WT_FSTREAM;
struct __wt_hazard;
typedef struct __wt_hazard WT_HAZARD;
struct __wt_ikey;
@@ -347,25 +356,33 @@ union __wt_rand_state;
#include "connection.h"
#include "extern.h"
+#ifdef _WIN32
+#include "extern_win.h"
+#else
+#include "extern_posix.h"
+#endif
#include "verify_build.h"
+#include "ctype.i" /* required by packing.i */
#include "intpack.i" /* required by cell.i, packing.i */
-#include "buf.i"
+#include "buf.i" /* required by cell.i */
#include "cache.i" /* required by txn.i */
#include "cell.i" /* required by btree.i */
-#include "log.i"
-#include "misc.i"
#include "mutex.i" /* required by btree.i */
-#include "packing.i"
#include "txn.i" /* required by btree.i */
+#include "bitstring.i"
#include "btree.i" /* required by cursor.i */
#include "btree_cmp.i"
-#include "cursor.i"
-
-#include "bitstring.i"
#include "column.i"
+#include "cursor.i"
+#include "log.i"
+#include "misc.i"
+#include "os_fhandle.i"
+#include "os_fs.i"
+#include "os_fstream.i"
+#include "packing.i"
#include "serial.i"
#if defined(__cplusplus)
diff --git a/src/log/log.c b/src/log/log.c
index 8591818b5a3..bf83c280d8d 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -95,9 +95,11 @@ __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn)
int
__wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
{
+ struct timespec fsync_start, fsync_stop;
WT_DECL_RET;
WT_FH *log_fh;
WT_LOG *log;
+ uint64_t fsync_duration_usecs;
log = S2C(session)->log;
@@ -124,9 +126,14 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
"log_force_sync: sync directory %s to LSN %" PRIu32
"/%" PRIu32,
log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset));
- WT_ERR(__wt_directory_sync_fh(session, log->log_dir_fh));
+ WT_ERR(__wt_epoch(session, &fsync_start));
+ WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
+ WT_ERR(__wt_epoch(session, &fsync_stop));
+ fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
log->sync_dir_lsn = *min_lsn;
WT_STAT_FAST_CONN_INCR(session, log_sync_dir);
+ WT_STAT_FAST_CONN_INCRV(session,
+ log_sync_dir_duration, fsync_duration_usecs);
}
/*
* Sync the log file if needed.
@@ -143,9 +150,14 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
log_fh->name, min_lsn->l.file, min_lsn->l.offset));
+ WT_ERR(__wt_epoch(session, &fsync_start));
WT_ERR(__wt_fsync(session, log_fh, true));
+ WT_ERR(__wt_epoch(session, &fsync_stop));
+ fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
log->sync_lsn = *min_lsn;
WT_STAT_FAST_CONN_INCR(session, log_sync);
+ WT_STAT_FAST_CONN_INCRV(session,
+ log_sync_duration, fsync_duration_usecs);
WT_ERR(__wt_close(session, &log_fh));
WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
}
@@ -258,8 +270,8 @@ __log_get_files(WT_SESSION_IMPL *session,
log_path = conn->log_path;
if (log_path == NULL)
log_path = "";
- return (__wt_dirlist(session, log_path, file_prefix,
- WT_DIRLIST_INCLUDE, filesp, countp));
+ return (__wt_fs_directory_list(
+ session, log_path, file_prefix, filesp, countp));
}
/*
@@ -277,6 +289,9 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session,
uint32_t id, max;
u_int count, i;
+ *filesp = NULL;
+ *countp = 0;
+
id = 0;
log = S2C(session)->log;
@@ -307,26 +322,12 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session,
*countp = count;
if (0) {
-err: __wt_log_files_free(session, files, count);
+err: WT_TRET(__wt_fs_directory_list_free(session, &files, count));
}
return (ret);
}
/*
- * __wt_log_files_free --
- * Free memory associated with a log file list.
- */
-void
-__wt_log_files_free(WT_SESSION_IMPL *session, char **files, u_int count)
-{
- u_int i;
-
- for (i = 0; i < count; i++)
- __wt_free(session, files[i]);
- __wt_free(session, files);
-}
-
-/*
* __log_filename --
* Given a log number, return a WT_ITEM of a generated log file name
* of the given prefix type.
@@ -443,21 +444,27 @@ __log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh)
conn = S2C(session);
log = conn->log;
- ret = 0;
+
/*
* If the user configured zero filling, pre-allocate the log file
* manually. Otherwise use either fallocate or ftruncate to create
* and zero the log file based on what is available.
*/
if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL))
- ret = __log_zero(session, fh,
- WT_LOG_FIRST_RECORD, conn->log_file_max);
- else if (fh->fallocate_available == WT_FALLOCATE_NOT_AVAILABLE ||
- (ret = __wt_fallocate(session, fh,
- WT_LOG_FIRST_RECORD, conn->log_file_max)) == ENOTSUP)
- ret = __wt_ftruncate(session, fh,
- WT_LOG_FIRST_RECORD + conn->log_file_max);
- return (ret);
+ return (__log_zero(session, fh,
+ WT_LOG_FIRST_RECORD, conn->log_file_max));
+
+ /*
+ * We have exclusive access to the log file and there are no other
+ * writes happening concurrently, so there are no locking issues.
+ */
+ if ((ret = __wt_fallocate(
+ session, fh, WT_LOG_FIRST_RECORD,
+ conn->log_file_max - WT_LOG_FIRST_RECORD)) == 0)
+ return (0);
+ WT_RET_ERROR_OK(ret, ENOTSUP);
+
+ return (__wt_ftruncate(session, fh, conn->log_file_max));
}
/*
@@ -669,14 +676,17 @@ static int
__log_openfile(WT_SESSION_IMPL *session,
bool ok_create, WT_FH **fhp, const char *file_prefix, uint32_t id)
{
+ WT_CONNECTION_IMPL *conn;
WT_DECL_ITEM(buf);
WT_DECL_RET;
WT_LOG *log;
WT_LOG_DESC *desc;
WT_LOG_RECORD *logrec;
uint32_t allocsize;
+ u_int flags;
- log = S2C(session)->log;
+ conn = S2C(session);
+ log = conn->log;
if (log == NULL)
allocsize = WT_LOG_ALIGN;
else
@@ -685,8 +695,14 @@ __log_openfile(WT_SESSION_IMPL *session,
WT_ERR(__log_filename(session, id, file_prefix, buf));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"opening log %s", (const char *)buf->data));
- WT_ERR(__wt_open(session, buf->data,
- WT_FILE_TYPE_LOG, ok_create ? WT_OPEN_CREATE : 0, fhp));
+ flags = 0;
+ if (ok_create)
+ LF_SET(WT_OPEN_CREATE);
+ if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG))
+ LF_SET(WT_OPEN_DIRECTIO);
+ WT_ERR(__wt_open(
+ session, buf->data, WT_OPEN_FILE_TYPE_LOG, flags, fhp));
+
/*
* If we are not creating the log file but opening it for reading,
* check that the magic number and versions are correct.
@@ -757,12 +773,11 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num)
* All file setup, writing the header and pre-allocation was done
* before. We only need to rename it.
*/
- WT_ERR(__wt_rename(session, from_path->data, to_path->data));
+ WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data));
err: __wt_scr_free(session, &from_path);
__wt_scr_free(session, &to_path);
- if (logfiles != NULL)
- __wt_log_files_free(session, logfiles, logcount);
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
return (ret);
}
@@ -992,8 +1007,7 @@ __log_truncate(WT_SESSION_IMPL *session,
}
}
err: WT_TRET(__wt_close(session, &log_fh));
- if (logfiles != NULL)
- __wt_log_files_free(session, logfiles, logcount);
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
return (ret);
}
@@ -1035,7 +1049,6 @@ __wt_log_allocfile(
*/
WT_ERR(__log_openfile(session, true, &log_fh, WT_LOG_TMPNAME, tmp_id));
WT_ERR(__log_file_header(session, log_fh, NULL, true));
- WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD));
WT_ERR(__log_prealloc(session, log_fh));
WT_ERR(__wt_fsync(session, log_fh, true));
WT_ERR(__wt_close(session, &log_fh));
@@ -1045,7 +1058,7 @@ __wt_log_allocfile(
/*
* Rename it into place and make it available.
*/
- WT_ERR(__wt_rename(session, from_path->data, to_path->data));
+ WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data));
err: __wt_scr_free(session, &from_path);
__wt_scr_free(session, &to_path);
@@ -1068,7 +1081,7 @@ __wt_log_remove(WT_SESSION_IMPL *session,
WT_ERR(__log_filename(session, lognum, file_prefix, path));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_remove: remove log %s", (char *)path->data));
- WT_ERR(__wt_remove(session, path->data));
+ WT_ERR(__wt_fs_remove(session, path->data));
err: __wt_scr_free(session, &path);
return (ret);
}
@@ -1104,7 +1117,7 @@ __wt_log_open(WT_SESSION_IMPL *session)
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"log_open: open fh to directory %s", conn->log_path));
WT_RET(__wt_open(session, conn->log_path,
- WT_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh));
+ WT_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh));
}
if (!F_ISSET(conn, WT_CONN_READONLY)) {
@@ -1121,9 +1134,8 @@ __wt_log_open(WT_SESSION_IMPL *session)
WT_ERR(__wt_log_remove(
session, WT_LOG_TMPNAME, lognum));
}
- __wt_log_files_free(session, logfiles, logcount);
- logfiles = NULL;
- logcount = 0;
+ WT_ERR(
+ __wt_fs_directory_list_free(session, &logfiles, logcount));
WT_ERR(__log_get_files(session,
WT_LOG_PREPNAME, &logfiles, &logcount));
for (i = 0; i < logcount; i++) {
@@ -1132,8 +1144,8 @@ __wt_log_open(WT_SESSION_IMPL *session)
WT_ERR(__wt_log_remove(
session, WT_LOG_PREPNAME, lognum));
}
- __wt_log_files_free(session, logfiles, logcount);
- logfiles = NULL;
+ WT_ERR(
+ __wt_fs_directory_list_free(session, &logfiles, logcount));
}
/*
@@ -1171,8 +1183,7 @@ __wt_log_open(WT_SESSION_IMPL *session)
FLD_SET(conn->log_flags, WT_CONN_LOG_EXISTED);
}
-err: if (logfiles != NULL)
- __wt_log_files_free(session, logfiles, logcount);
+err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
if (ret == 0)
F_SET(log, WT_LOG_OPENED);
return (ret);
@@ -1210,8 +1221,7 @@ __wt_log_close(WT_SESSION_IMPL *session)
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"closing log directory %s", log->log_dir_fh->name));
if (!F_ISSET(conn, WT_CONN_READONLY))
- WT_RET(
- __wt_directory_sync_fh(session, log->log_dir_fh));
+ WT_RET(__wt_fsync(session, log->log_dir_fh, true));
WT_RET(__wt_close(session, &log->log_dir_fh));
log->log_dir_fh = NULL;
}
@@ -1285,11 +1295,13 @@ err: __wt_free(session, buf);
int
__wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
{
+ struct timespec fsync_start, fsync_stop;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_LOG *log;
WT_LSN sync_lsn;
int64_t release_buffered, release_bytes;
+ uint64_t fsync_duration_usecs;
int yield_count;
bool locked;
@@ -1419,10 +1431,15 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
"/%" PRIu32,
log->log_dir_fh->name,
sync_lsn.l.file, sync_lsn.l.offset));
- WT_ERR(__wt_directory_sync_fh(
- session, log->log_dir_fh));
+ WT_ERR(__wt_epoch(session, &fsync_start));
+ WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
+ WT_ERR(__wt_epoch(session, &fsync_stop));
+ fsync_duration_usecs =
+ WT_TIMEDIFF_US(fsync_stop, fsync_start);
log->sync_dir_lsn = sync_lsn;
WT_STAT_FAST_CONN_INCR(session, log_sync_dir);
+ WT_STAT_FAST_CONN_INCRV(session,
+ log_sync_dir_duration, fsync_duration_usecs);
}
/*
@@ -1436,7 +1453,13 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
log->log_fh->name,
sync_lsn.l.file, sync_lsn.l.offset));
WT_STAT_FAST_CONN_INCR(session, log_sync);
+ WT_ERR(__wt_epoch(session, &fsync_start));
WT_ERR(__wt_fsync(session, log->log_fh, true));
+ WT_ERR(__wt_epoch(session, &fsync_stop));
+ fsync_duration_usecs =
+ WT_TIMEDIFF_US(fsync_stop, fsync_start);
+ WT_STAT_FAST_CONN_INCRV(session,
+ log_sync_duration, fsync_duration_usecs);
log->sync_lsn = sync_lsn;
WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
}
@@ -1561,8 +1584,8 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
}
WT_SET_LSN(&start_lsn, firstlog, 0);
WT_SET_LSN(&end_lsn, lastlog, 0);
- __wt_log_files_free(session, logfiles, logcount);
- logfiles = NULL;
+ WT_ERR(
+ __wt_fs_directory_list_free(session, &logfiles, logcount));
}
WT_ERR(__log_openfile(
session, false, &log_fh, WT_LOG_FILENAME, start_lsn.l.file));
@@ -1757,9 +1780,23 @@ advance:
&rd_lsn, WT_LOG_FILENAME, 0));
err: WT_STAT_FAST_CONN_INCR(session, log_scans);
+ /*
+ * If the first attempt to read a log record results in
+ * an error recovery is likely going to fail. Try to provide
+ * a helpful failure message.
+ */
+ if (ret != 0 && firstrecord) {
+ __wt_errx(session,
+ "WiredTiger is unable to read the recovery log.");
+ __wt_errx(session, "This may be due to the log"
+ " files being encrypted, being from an older"
+ " version or due to corruption on disk");
+ __wt_errx(session, "You should confirm that you have"
+ " opened the database with the correct options including"
+ " all encryption and compression options");
+ }
- if (logfiles != NULL)
- __wt_log_files_free(session, logfiles, logcount);
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
__wt_scr_free(session, &buf);
__wt_scr_free(session, &decryptitem);
diff --git a/src/log/log_auto.c b/src/log/log_auto.c
index d4dab4e1a33..34bccd0ede4 100644
--- a/src/log/log_auto.c
+++ b/src/log/log_auto.c
@@ -44,7 +44,7 @@ __wt_logop_read(WT_SESSION_IMPL *session,
}
static size_t
-__logrec_json_unpack_str(char *dest, size_t destlen, const char *src,
+__logrec_json_unpack_str(char *dest, size_t destlen, const u_char *src,
size_t srclen)
{
size_t total;
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index 9ca850da9f1..78235fb6a92 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -103,7 +103,6 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm)
bool hard_limit, have_primary, ovfl;
lsm_tree = clsm->lsm_tree;
- ovfl = false;
session = (WT_SESSION_IMPL *)clsm->iface.session;
if (clsm->nchunks == 0) {
@@ -1155,7 +1154,6 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
closest = NULL;
clsm = (WT_CURSOR_LSM *)cursor;
exact = 0;
- deleted = false;
CURSOR_API_CALL(cursor, session, search_near, NULL);
WT_CURSOR_NEEDKEY(cursor);
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 6d907284546..1ff0a216c02 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -152,16 +152,13 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
u_int end_chunk, i, merge_max, merge_min, nchunks, start_chunk;
u_int oldest_gen, youngest_gen;
- chunk_size = 0;
- nchunks = 0;
- record_count = 0;
- chunk = youngest = NULL;
-
/* Clear the return parameters */
- *start = 0;
- *end = 0;
+ *start = *end = 0;
*records = 0;
+ chunk_size = 0;
+ chunk = youngest = NULL;
+
aggressive = lsm_tree->merge_aggressiveness;
merge_max = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ?
100 : lsm_tree->merge_max;
@@ -218,8 +215,8 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
*/
retry_find:
oldest_gen = youngest_gen = lsm_tree->chunk[end_chunk]->generation;
- for (start_chunk = end_chunk + 1, record_count = 0;
- start_chunk > 0; ) {
+ for (record_count = 0,
+ start_chunk = end_chunk + 1; start_chunk > 0;) {
chunk = lsm_tree->chunk[start_chunk - 1];
youngest = lsm_tree->chunk[end_chunk];
nchunks = (end_chunk + 1) - start_chunk;
@@ -306,14 +303,12 @@ retry_find:
}
#endif
- WT_ASSERT(session,
- nchunks == 0 || (chunk != NULL && youngest != NULL));
+ WT_ASSERT(session, nchunks == 0 || (chunk != NULL && youngest != NULL));
+
/*
- * Don't do merges that are too small or across too many
- * generations.
+ * Don't do merges that are too small or across too many generations.
*/
- if (nchunks < merge_min ||
- oldest_gen - youngest_gen > max_gap) {
+ if (nchunks < merge_min || oldest_gen - youngest_gen > max_gap) {
for (i = 0; i < nchunks; i++) {
chunk = lsm_tree->chunk[start_chunk + i];
WT_ASSERT(session,
@@ -365,7 +360,6 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
bloom = NULL;
chunk = NULL;
dest = src = NULL;
- start_id = 0;
created_chunk = create_bloom = locked = in_sync = false;
/* Fast path if it's obvious no merges could be done. */
@@ -485,7 +479,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
lsm_rows_merged, insert_count % LSM_MERGE_CHECK_INTERVAL);
++lsm_tree->merge_progressing;
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Bloom size for %" PRIu64 " has %" PRIu64 " items inserted.",
+ "Bloom size for %" PRIu64 " has %" PRIu64 " items inserted",
record_count, insert_count));
/*
diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c
index e19e2cd0126..7e100cb855c 100644
--- a/src/lsm/lsm_meta.c
+++ b/src/lsm/lsm_meta.c
@@ -331,7 +331,7 @@ __lsm_meta_read_v1(
WT_ERR(__wt_scr_alloc(session, 0, &buf));
WT_ERR(__wt_buf_fmt(session, buf,
"key_format=u,value_format=u,memory_page_max=%" PRIu64,
- 2 * lsm_tree->chunk_max));
+ 2 * lsm_tree->chunk_size));
file_cfg[2] = buf->data;
WT_ERR(__wt_config_collapse(session, file_cfg, &fileconf));
lsm_tree->file_config = fileconf;
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index cb1ddf22f84..da106ae2089 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -235,7 +235,7 @@ __wt_lsm_tree_set_chunk_size(
if (!WT_PREFIX_SKIP(filename, "file:"))
WT_RET_MSG(session, EINVAL,
"Expected a 'file:' URI: %s", chunk->uri);
- WT_RET(__wt_filesize_name(session, filename, false, &size));
+ WT_RET(__wt_fs_size(session, filename, &size));
chunk->size = (uint64_t)size;
@@ -256,7 +256,7 @@ __lsm_tree_cleanup_old(WT_SESSION_IMPL *session, const char *uri)
{ WT_CONFIG_BASE(session, WT_SESSION_drop), "force", NULL };
bool exists;
- WT_RET(__wt_exist(session, uri + strlen("file:"), &exists));
+ WT_RET(__wt_fs_exist(session, uri + strlen("file:"), &exists));
if (exists)
WT_WITH_SCHEMA_LOCK(session, ret,
ret = __wt_schema_drop(session, uri, cfg));
@@ -1344,8 +1344,14 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
locked = true;
for (i = 0; i < lsm_tree->nchunks; i++) {
chunk = lsm_tree->chunk[i];
- if (file_func == __wt_checkpoint &&
- F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
+ /*
+ * If the chunk is on disk, don't include underlying handles in
+ * the checkpoint. Checking the "get handles" function is all
+ * we need to do, no further checkpoint calls are done if the
+ * handle is not gathered.
+ */
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
+ file_func == __wt_checkpoint_get_handles)
continue;
WT_ERR(__wt_schema_worker(session, chunk->uri,
file_func, name_func, cfg, open_flags));
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index f5bb4cfd337..c19f42327be 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -526,7 +526,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri)
ret = __wt_schema_drop(session, uri, drop_cfg));
if (ret == 0)
- ret = __wt_remove(session, uri + strlen("file:"));
+ ret = __wt_fs_remove(session, uri + strlen("file:"));
WT_RET(__wt_verbose(session, WT_VERB_LSM, "Dropped %s", uri));
if (ret == EBUSY || ret == ENOENT)
diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c
index dd65f1a7ef9..38a2edd7219 100644
--- a/src/meta/meta_table.c
+++ b/src/meta/meta_table.c
@@ -9,18 +9,6 @@
#include "wt_internal.h"
/*
- * __wt_metadata_init --
- * Metadata initialization.
- */
-void
-__wt_metadata_init(WT_SESSION_IMPL *session)
-{
- /* We cache the metadata file's URI hash for fast detection. */
- S2C(session)->meta_uri_hash =
- __wt_hash_city64(WT_METAFILE_URI, strlen(WT_METAFILE_URI));
-}
-
-/*
* __metadata_turtle --
* Return if a key's value should be taken from the turtle file.
*/
diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c
index a73b7e09d37..eb06b2bed66 100644
--- a/src/meta/meta_track.c
+++ b/src/meta/meta_track.c
@@ -153,7 +153,6 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
case WT_ST_REMOVE:
case WT_ST_SET:
break;
- WT_ILLEGAL_VALUE(session);
}
__meta_track_clear(session, trk);
@@ -194,8 +193,8 @@ __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
__wt_err(session, ret,
"metadata unroll rename %s to %s", trk->b, trk->a);
- if (trk->a == NULL &&
- (ret = __wt_remove(session, trk->b + strlen("file:"))) != 0)
+ if (trk->a == NULL && (ret =
+ __wt_fs_remove(session, trk->b + strlen("file:"))) != 0)
__wt_err(session, ret,
"metadata unroll create %s", trk->b);
@@ -215,7 +214,6 @@ __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
__wt_err(session, ret,
"metadata unroll update %s to %s", trk->a, trk->b);
break;
- WT_ILLEGAL_VALUE(session);
}
__meta_track_clear(session, trk);
diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c
index 0b287c228e5..4d2b359bbed 100644
--- a/src/meta/meta_turtle.c
+++ b/src/meta/meta_turtle.c
@@ -18,12 +18,9 @@ __metadata_config(WT_SESSION_IMPL *session, char **metaconfp)
WT_DECL_ITEM(buf);
WT_DECL_RET;
const char *cfg[] = { WT_CONFIG_BASE(session, file_meta), NULL, NULL };
- char *metaconf;
*metaconfp = NULL;
- metaconf = NULL;
-
/* Create a turtle file with default values. */
WT_RET(__wt_scr_alloc(session, 0, &buf));
WT_ERR(__wt_buf_fmt(session, buf,
@@ -31,14 +28,9 @@ __metadata_config(WT_SESSION_IMPL *session, char **metaconfp)
WT_METAFILE_ID,
WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX));
cfg[1] = buf->data;
- WT_ERR(__wt_config_collapse(session, cfg, &metaconf));
+ ret = __wt_config_collapse(session, cfg, metaconfp);
- *metaconfp = metaconf;
-
- if (0) {
-err: __wt_free(session, metaconf);
- }
- __wt_scr_free(session, &buf);
+err: __wt_scr_free(session, &buf);
return (ret);
}
@@ -71,24 +63,24 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session)
WT_DECL_ITEM(key);
WT_DECL_ITEM(value);
WT_DECL_RET;
- WT_FH *fh;
+ WT_FSTREAM *fs;
bool exist;
/* Look for a hot backup file: if we find it, load it. */
- WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist));
+ WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist));
if (!exist)
return (0);
- WT_RET(__wt_open(session, WT_METADATA_BACKUP,
- WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh));
+ WT_RET(__wt_fopen(session,
+ WT_METADATA_BACKUP, 0, WT_STREAM_READ, &fs));
/* Read line pairs and load them into the metadata file. */
WT_ERR(__wt_scr_alloc(session, 512, &key));
WT_ERR(__wt_scr_alloc(session, 512, &value));
for (;;) {
- WT_ERR(__wt_getline(session, key, fh));
+ WT_ERR(__wt_getline(session, fs, key));
if (key->size == 0)
break;
- WT_ERR(__wt_getline(session, value, fh));
+ WT_ERR(__wt_getline(session, fs, value));
if (value->size == 0)
WT_ERR(__wt_illegal_value(session, WT_METADATA_BACKUP));
WT_ERR(__wt_metadata_update(session, key->data, value->data));
@@ -96,7 +88,7 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session)
F_SET(S2C(session), WT_CONN_WAS_BACKUP);
-err: WT_TRET(__wt_close(session, &fh));
+err: WT_TRET(__wt_fclose(session, &fs));
__wt_scr_free(session, &key);
__wt_scr_free(session, &value);
return (ret);
@@ -128,7 +120,7 @@ __metadata_load_bulk(WT_SESSION_IMPL *session)
continue;
/* If the file exists, it's all good. */
- WT_ERR(__wt_exist(session, key, &exist));
+ WT_ERR(__wt_fs_exist(session, key, &exist));
if (exist)
continue;
@@ -156,7 +148,7 @@ int
__wt_turtle_init(WT_SESSION_IMPL *session)
{
WT_DECL_RET;
- bool exist_backup, exist_incr, exist_turtle, load;
+ bool exist_backup, exist_incr, exist_isrc, exist_turtle, load;
char *metaconf;
metaconf = NULL;
@@ -182,21 +174,28 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
* that is an error. Otherwise, if there's already a turtle file, we're
* done.
*/
- WT_RET(__wt_exist(session, WT_INCREMENTAL_BACKUP, &exist_incr));
- WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist_backup));
- WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist_turtle));
+ WT_RET(__wt_fs_exist(session, WT_INCREMENTAL_BACKUP, &exist_incr));
+ WT_RET(__wt_fs_exist(session, WT_INCREMENTAL_SRC, &exist_isrc));
+ WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup));
+ WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle));
if (exist_turtle) {
- if (exist_incr)
+ /*
+ * We need to detect the difference between a source database
+ * that may have crashed with an incremental backup file
+ * and a destination database that incorrectly ran recovery.
+ */
+ if (exist_incr && !exist_isrc)
WT_RET_MSG(session, EINVAL,
"Incremental backup after running recovery "
- "is not allowed.");
+ "is not allowed");
/*
* If we have a backup file and metadata and turtle files,
* we want to recreate the metadata from the backup.
*/
if (exist_backup) {
- WT_RET(__wt_msg(session, "Both %s and %s exist. "
- "Recreating metadata from backup.",
+ WT_RET(__wt_msg(session,
+ "Both %s and %s exist; recreating metadata from "
+ "backup",
WT_METADATA_TURTLE, WT_METADATA_BACKUP));
WT_RET(__wt_remove_if_exists(session, WT_METAFILE));
WT_RET(__wt_remove_if_exists(
@@ -242,7 +241,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
{
WT_DECL_ITEM(buf);
WT_DECL_RET;
- WT_FH *fh;
+ WT_FSTREAM *fs;
bool exist, match;
*valuep = NULL;
@@ -253,24 +252,23 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
* the turtle file, and that means returning the default configuration
* string for the metadata file.
*/
- WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist));
+ WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist));
if (!exist)
return (strcmp(key, WT_METAFILE_URI) == 0 ?
__metadata_config(session, valuep) : WT_NOTFOUND);
- WT_RET(__wt_open(session, WT_METADATA_TURTLE,
- WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh));
+ WT_RET(__wt_fopen(session, WT_METADATA_TURTLE, 0, WT_STREAM_READ, &fs));
/* Search for the key. */
WT_ERR(__wt_scr_alloc(session, 512, &buf));
for (match = false;;) {
- WT_ERR(__wt_getline(session, buf, fh));
+ WT_ERR(__wt_getline(session, fs, buf));
if (buf->size == 0)
WT_ERR(WT_NOTFOUND);
if (strcmp(key, buf->data) == 0)
match = true;
/* Key matched: read the subsequent line for the value. */
- WT_ERR(__wt_getline(session, buf, fh));
+ WT_ERR(__wt_getline(session, fs, buf));
if (buf->size == 0)
WT_ERR(__wt_illegal_value(session, WT_METADATA_TURTLE));
if (match)
@@ -280,7 +278,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
/* Copy the value for the caller. */
WT_ERR(__wt_strdup(session, buf->data, valuep));
-err: WT_TRET(__wt_close(session, &fh));
+err: WT_TRET(__wt_fclose(session, &fs));
__wt_scr_free(session, &buf);
if (ret != 0)
@@ -295,38 +293,34 @@ err: WT_TRET(__wt_close(session, &fh));
int
__wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
{
- WT_FH *fh;
- WT_DECL_ITEM(buf);
+ WT_FSTREAM *fs;
WT_DECL_RET;
int vmajor, vminor, vpatch;
const char *version;
- fh = NULL;
+ fs = NULL;
/*
* Create the turtle setup file: we currently re-write it from scratch
* every time.
*/
- WT_RET(__wt_open(session, WT_METADATA_TURTLE_SET,
- WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh));
+ WT_RET(__wt_fopen(session, WT_METADATA_TURTLE_SET,
+ WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
version = wiredtiger_version(&vmajor, &vminor, &vpatch);
- WT_ERR(__wt_scr_alloc(session, 2 * 1024, &buf));
- WT_ERR(__wt_buf_fmt(session, buf,
+ WT_ERR(__wt_fprintf(session, fs,
"%s\n%s\n%s\n" "major=%d,minor=%d,patch=%d\n%s\n%s\n",
WT_METADATA_VERSION_STR, version,
WT_METADATA_VERSION, vmajor, vminor, vpatch,
key, value));
- WT_ERR(__wt_write(session, fh, 0, buf->size, buf->data));
- /* Flush the handle and rename the file into place. */
- ret = __wt_sync_handle_and_rename(
- session, &fh, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE);
+ /* Flush the stream and rename the file into place. */
+ ret = __wt_sync_and_rename(
+ session, &fs, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE);
/* Close any file handle left open, remove any temporary file. */
-err: WT_TRET(__wt_close(session, &fh));
+err: WT_TRET(__wt_fclose(session, &fs));
WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET));
- __wt_scr_free(session, &buf);
return (ret);
}
diff --git a/src/os_common/filename.c b/src/os_common/filename.c
index dfd67284948..5f174288350 100644
--- a/src/os_common/filename.c
+++ b/src/os_common/filename.c
@@ -60,9 +60,9 @@ __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name)
{
bool exist;
- WT_RET(__wt_exist(session, name, &exist));
+ WT_RET(__wt_fs_exist(session, name, &exist));
if (exist)
- WT_RET(__wt_remove(session, name));
+ WT_RET(__wt_fs_remove(session, name));
return (0);
}
@@ -78,7 +78,7 @@ __wt_rename_and_sync_directory(
bool same_directory;
/* Rename the source file to the target. */
- WT_RET(__wt_rename(session, from, to));
+ WT_RET(__wt_fs_rename(session, from, to));
/*
* Flush the backing directory to guarantee the rename. My reading of
@@ -89,7 +89,7 @@ __wt_rename_and_sync_directory(
* with specific mount options. Flush both of the from/to directories
* until it's a performance problem.
*/
- WT_RET(__wt_directory_sync(session, from));
+ WT_RET(__wt_fs_directory_sync(session, from));
/*
* In almost all cases, we're going to be renaming files in the same
@@ -101,29 +101,7 @@ __wt_rename_and_sync_directory(
(fp != NULL && tp != NULL &&
fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0);
- return (same_directory ? 0 : __wt_directory_sync(session, to));
-}
-
-/*
- * __wt_sync_handle_and_rename --
- * Sync and close a handle, and swap it into place.
- */
-int
-__wt_sync_handle_and_rename(
- WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to)
-{
- WT_DECL_RET;
- WT_FH *fh;
-
- fh = *fhp;
- *fhp = NULL;
-
- /* Flush to disk and close the handle. */
- ret = __wt_fsync(session, fh, true);
- WT_TRET(__wt_close(session, &fh));
- WT_RET(ret);
-
- return (__wt_rename_and_sync_directory(session, from, to));
+ return (same_directory ? 0 : __wt_fs_directory_sync(session, to));
}
/*
@@ -160,10 +138,9 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to)
WT_ERR(__wt_remove_if_exists(session, tmp->data));
/* Open the from and temporary file handles. */
- WT_ERR(__wt_open(session, from,
- WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &ffh));
- WT_ERR(__wt_open(session, tmp->data,
- WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh));
+ WT_ERR(__wt_open(session, from, WT_OPEN_FILE_TYPE_REGULAR, 0, &ffh));
+ WT_ERR(__wt_open(session, tmp->data, WT_OPEN_FILE_TYPE_REGULAR,
+ WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh));
/*
* Allocate a copy buffer. Don't use a scratch buffer, this thing is
@@ -182,7 +159,10 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to)
/* Close the from handle, then swap the temporary file into place. */
WT_ERR(__wt_close(session, &ffh));
- ret = __wt_sync_handle_and_rename(session, &tfh, tmp->data, to);
+ WT_ERR(__wt_fsync(session, tfh, true));
+ WT_ERR(__wt_close(session, &tfh));
+
+ ret = __wt_rename_and_sync_directory(session, tmp->data, to);
err: WT_TRET(__wt_close(session, &ffh));
WT_TRET(__wt_close(session, &tfh));
diff --git a/src/os_posix/os_errno.c b/src/os_common/os_errno.c
index a0f1202c6ef..a8e56b7f1aa 100644
--- a/src/os_posix/os_errno.c
+++ b/src/os_common/os_errno.c
@@ -23,24 +23,8 @@ __wt_errno(void)
}
/*
- * __wt_map_error_rdonly --
- * Map an error into a WiredTiger error code specific for
- * read-only operation which intercepts based on certain types
- * of failures.
- */
-int
-__wt_map_error_rdonly(int error)
-{
- if (error == ENOENT)
- return (WT_NOTFOUND);
- else if (error == EACCES)
- return (WT_PERM_DENIED);
- return (error);
-}
-
-/*
* __wt_strerror --
- * POSIX implementation of WT_SESSION.strerror and wiredtiger_strerror.
+ * WT_SESSION.strerror and wiredtiger_strerror.
*/
const char *
__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen)
@@ -69,3 +53,28 @@ __wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen)
/* Defeated. */
return ("Unable to return error string");
}
+
+/*
+ * __wt_ext_map_windows_error --
+ * Extension API call to map a Windows system error to a POSIX/ANSI error.
+ */
+int
+__wt_ext_map_windows_error(
+ WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error)
+{
+ WT_UNUSED(wt_api);
+ WT_UNUSED(wt_session);
+
+ /*
+ * This extension API only makes sense in Windows builds, but it's hard
+ * to exclude it otherwise (there's no way to return an error, anyway).
+ * Call an underlying function on Windows, else panic so callers figure
+ * out what they're doing wrong.
+ */
+#ifdef _WIN32
+ return (__wt_map_windows_error(windows_error));
+#else
+ WT_UNUSED(windows_error);
+ return (WT_PANIC);
+#endif
+}
diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c
index b16b2e24bfa..81e4cc14ccb 100644
--- a/src/os_common/os_fhandle.c
+++ b/src/os_common/os_fhandle.c
@@ -9,20 +9,89 @@
#include "wt_internal.h"
/*
- * __wt_handle_search --
- * Search for a matching handle.
+ * __fhandle_method_finalize --
+ * Initialize any NULL WT_FH structure methods to not-supported. Doing
+ * this means that custom file systems with incomplete implementations
+ * won't dereference NULL pointers.
+ */
+static int
+__fhandle_method_finalize(
+ WT_SESSION_IMPL *session, WT_FILE_HANDLE *handle, bool readonly)
+{
+#define WT_HANDLE_METHOD_REQ(name) \
+ if (handle->name == NULL) \
+ WT_RET_MSG(session, EINVAL, \
+ "a WT_FILE_HANDLE.%s method must be configured", #name)
+
+ WT_HANDLE_METHOD_REQ(close);
+ /* not required: fadvise */
+ /* not required: fallocate */
+ /* not required: fallocate_nolock */
+ WT_HANDLE_METHOD_REQ(fh_lock);
+ /* not required: map */
+ /* not required: map_discard */
+ /* not required: map_preload */
+ /* not required: map_unmap */
+ WT_HANDLE_METHOD_REQ(fh_read);
+ WT_HANDLE_METHOD_REQ(fh_size);
+ if (!readonly)
+ WT_HANDLE_METHOD_REQ(fh_sync);
+ /* not required: sync_nowait */
+ if (!readonly) {
+ WT_HANDLE_METHOD_REQ(fh_truncate);
+ WT_HANDLE_METHOD_REQ(fh_write);
+ }
+
+ return (0);
+}
+
+#ifdef HAVE_DIAGNOSTIC
+/*
+ * __wt_handle_is_open --
+ * Return if there's an open handle matching a name.
*/
bool
-__wt_handle_search(WT_SESSION_IMPL *session,
- const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp)
+__wt_handle_is_open(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_FH *fh;
+ uint64_t bucket, hash;
+ bool found;
+
+ conn = S2C(session);
+ found = false;
+
+ hash = __wt_hash_city64(name, strlen(name));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+
+ __wt_spin_lock(session, &conn->fh_lock);
+
+ TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
+ if (strcmp(name, fh->name) == 0) {
+ found = true;
+ break;
+ }
+
+ __wt_spin_unlock(session, &conn->fh_lock);
+
+ return (found);
+}
+#endif
+
+/*
+ * __handle_search --
+ * Search for a matching handle.
+ */
+static bool
+__handle_search(
+ WT_SESSION_IMPL *session, const char *name, WT_FH *newfh, WT_FH **fhp)
{
WT_CONNECTION_IMPL *conn;
WT_FH *fh;
uint64_t bucket, hash;
bool found;
- if (fhp != NULL)
- *fhp = NULL;
+ *fhp = NULL;
conn = S2C(session);
found = false;
@@ -33,15 +102,13 @@ __wt_handle_search(WT_SESSION_IMPL *session,
__wt_spin_lock(session, &conn->fh_lock);
/*
- * If we already have the file open, optionally increment the reference
- * count and return a pointer.
+ * If we already have the file open, increment the reference count and
+ * return a pointer.
*/
TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
if (strcmp(name, fh->name) == 0) {
- if (increment_ref)
- ++fh->ref;
- if (fhp != NULL)
- *fhp = fh;
+ ++fh->ref;
+ *fhp = fh;
found = true;
break;
}
@@ -49,13 +116,11 @@ __wt_handle_search(WT_SESSION_IMPL *session,
/* If we don't find a match, optionally add a new entry. */
if (!found && newfh != NULL) {
newfh->name_hash = hash;
- WT_CONN_FILE_INSERT(conn, newfh, bucket);
+ WT_FILE_HANDLE_INSERT(conn, newfh, bucket);
(void)__wt_atomic_add32(&conn->open_file_count, 1);
- if (increment_ref)
- ++newfh->ref;
- if (fhp != NULL)
- *fhp = newfh;
+ ++newfh->ref;
+ *fhp = newfh;
}
__wt_spin_unlock(session, &conn->fh_lock);
@@ -68,8 +133,8 @@ __wt_handle_search(WT_SESSION_IMPL *session,
* Optionally output a verbose message on handle open.
*/
static inline int
-__open_verbose(WT_SESSION_IMPL *session,
- const char *name, uint32_t file_type, uint32_t flags)
+__open_verbose(
+ WT_SESSION_IMPL *session, const char *name, int file_type, u_int flags)
{
#ifdef HAVE_VERBOSE
WT_DECL_RET;
@@ -85,19 +150,19 @@ __open_verbose(WT_SESSION_IMPL *session,
*/
switch (file_type) {
- case WT_FILE_TYPE_CHECKPOINT:
+ case WT_OPEN_FILE_TYPE_CHECKPOINT:
file_type_tag = "checkpoint";
break;
- case WT_FILE_TYPE_DATA:
+ case WT_OPEN_FILE_TYPE_DATA:
file_type_tag = "data";
break;
- case WT_FILE_TYPE_DIRECTORY:
+ case WT_OPEN_FILE_TYPE_DIRECTORY:
file_type_tag = "directory";
break;
- case WT_FILE_TYPE_LOG:
+ case WT_OPEN_FILE_TYPE_LOG:
file_type_tag = "log";
break;
- case WT_FILE_TYPE_REGULAR:
+ case WT_OPEN_FILE_TYPE_REGULAR:
file_type_tag = "regular";
break;
default:
@@ -115,18 +180,16 @@ __open_verbose(WT_SESSION_IMPL *session,
}
WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create");
+ WT_OPEN_VERBOSE_FLAG(WT_OPEN_DIRECTIO, "direct-IO");
WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive");
WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed");
WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly");
- WT_OPEN_VERBOSE_FLAG(WT_STREAM_APPEND, "stream-append");
- WT_OPEN_VERBOSE_FLAG(WT_STREAM_READ, "stream-read");
- WT_OPEN_VERBOSE_FLAG(WT_STREAM_WRITE, "stream-write");
if (tmp->size != 0)
WT_ERR(__wt_buf_catfmt(session, tmp, ")"));
ret = __wt_verbose(session, WT_VERB_FILEOPS,
- "%s: handle-open: type %s%s",
+ "%s: file-open: type %s%s",
name, file_type_tag, tmp->size == 0 ? "" : (char *)tmp->data);
err: __wt_scr_free(session, &tmp);
@@ -146,17 +209,19 @@ err: __wt_scr_free(session, &tmp);
*/
int
__wt_open(WT_SESSION_IMPL *session,
- const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp)
+ const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_FH *fh;
+ WT_FILE_SYSTEM *file_system;
bool lock_file, open_called;
char *path;
WT_ASSERT(session, file_type != 0); /* A file type is required. */
conn = S2C(session);
+ file_system = conn->file_system;
fh = NULL;
open_called = false;
path = NULL;
@@ -164,21 +229,12 @@ __wt_open(WT_SESSION_IMPL *session,
WT_RET(__open_verbose(session, name, file_type, flags));
/* Check if the handle is already open. */
- if (__wt_handle_search(session, name, true, NULL, &fh)) {
- /*
- * XXX
- * The in-memory implementation has to reset the file offset
- * when a file is re-opened (which obviously also depends on
- * in-memory configurations never opening a file in more than
- * one thread at a time). This needs to be fixed.
- */
- if (F_ISSET(fh, WT_FH_IN_MEMORY) && fh->ref == 1)
- fh->off = 0;
+ if (__handle_search(session, name, NULL, &fh)) {
*fhp = fh;
return (0);
}
- /* Allocate a structure and set the name. */
+ /* Allocate and initialize the handle. */
WT_ERR(__wt_calloc_one(session, &fh));
WT_ERR(__wt_strdup(session, name, &fh->name));
@@ -200,17 +256,21 @@ __wt_open(WT_SESSION_IMPL *session,
WT_ERR(__wt_filename(session, name, &path));
/* Call the underlying open function. */
- WT_ERR(conn->handle_open(
- session, fh, path == NULL ? name : path, file_type, flags));
+ WT_ERR(file_system->fs_open_file(file_system, &session->iface,
+ path == NULL ? name : path, file_type, flags, &fh->handle));
open_called = true;
+ WT_ERR(__fhandle_method_finalize(
+ session, fh->handle, LF_ISSET(WT_OPEN_READONLY)));
+
/*
* Repeat the check for a match: if there's no match, link our newly
* created handle onto the database's list of files.
*/
- if (__wt_handle_search(session, name, true, fh, fhp)) {
+ if (__handle_search(session, name, fh, fhp)) {
err: if (open_called)
- WT_TRET(fh->fh_close(session, fh));
+ WT_TRET(fh->handle->close(
+ fh->handle, (WT_SESSION *)session));
if (fh != NULL) {
__wt_free(session, fh->name);
__wt_free(session, fh);
@@ -242,7 +302,7 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
/* Track handle-close as a file operation, so open and close match. */
WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "%s: handle-close", fh->name));
+ session, WT_VERB_FILEOPS, "%s: file-close", fh->name));
/*
* If the reference count hasn't gone to 0, or if it's an in-memory
@@ -252,20 +312,20 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
*/
__wt_spin_lock(session, &conn->fh_lock);
WT_ASSERT(session, fh->ref > 0);
- if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) {
+ if ((fh->ref > 0 && --fh->ref > 0)) {
__wt_spin_unlock(session, &conn->fh_lock);
return (0);
}
/* Remove from the list. */
bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
- WT_CONN_FILE_REMOVE(conn, fh, bucket);
+ WT_FILE_HANDLE_REMOVE(conn, fh, bucket);
(void)__wt_atomic_sub32(&conn->open_file_count, 1);
__wt_spin_unlock(session, &conn->fh_lock);
/* Discard underlying resources. */
- ret = fh->fh_close(session, fh);
+ ret = fh->handle->close(fh->handle, (WT_SESSION *)session);
__wt_free(session, fh->name);
__wt_free(session, fh);
@@ -287,18 +347,13 @@ __wt_close_connection_close(WT_SESSION_IMPL *session)
conn = S2C(session);
while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) {
- /*
- * In-memory configurations will have open files, but the ref
- * counts should be zero.
- */
- if (!F_ISSET(conn, WT_CONN_IN_MEMORY) || fh->ref != 0) {
+ if (fh->ref != 0) {
ret = EBUSY;
__wt_errx(session,
"Connection has open file handles: %s", fh->name);
}
fh->ref = 1;
- F_CLR(fh, WT_FH_IN_MEMORY);
WT_TRET(__wt_close(session, &fh));
}
diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c
index 260514eac66..09c2e08db83 100644
--- a/src/os_common/os_fs_inmemory.c
+++ b/src/os_common/os_fs_inmemory.c
@@ -8,475 +8,596 @@
#include "wt_internal.h"
-static int __im_handle_size(WT_SESSION_IMPL *, WT_FH *, wt_off_t *);
-
/*
- * In-memory information.
+ * File system interface for in-memory implementation.
*/
typedef struct {
+ WT_FILE_SYSTEM iface;
+
+ TAILQ_HEAD(__wt_fhhash_inmem,
+ __wt_file_handle_inmem) fhhash[WT_HASH_ARRAY_SIZE];
+ TAILQ_HEAD(__wt_fh_inmem_qh, __wt_file_handle_inmem) fhqh;
+
WT_SPINLOCK lock;
-} WT_IM;
+} WT_FILE_SYSTEM_INMEM;
+
+static int __im_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *);
/*
- * __im_directory_list --
- * Get a list of files from a directory, in-memory version.
+ * __im_handle_search --
+ * Return a matching handle, if one exists.
*/
-static int
-__im_directory_list(WT_SESSION_IMPL *session, const char *dir,
- const char *prefix, uint32_t flags, char ***dirlist, u_int *countp)
+static WT_FILE_HANDLE_INMEM *
+__im_handle_search(WT_FILE_SYSTEM *file_system, const char *name)
{
- WT_UNUSED(session);
- WT_UNUSED(dir);
- WT_UNUSED(prefix);
- WT_UNUSED(flags);
- WT_UNUSED(dirlist);
- WT_UNUSED(countp);
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ uint64_t bucket, hash;
- WT_RET_MSG(session, ENOTSUP, "directory-list");
-}
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
-/*
- * __im_directory_sync --
- * Flush a directory to ensure file creation is durable.
- */
-static int
-__im_directory_sync(WT_SESSION_IMPL *session, const char *path)
-{
- WT_UNUSED(session);
- WT_UNUSED(path);
- return (0);
+ hash = __wt_hash_city64(name, strlen(name));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+ TAILQ_FOREACH(im_fh, &im_fs->fhhash[bucket], hashq)
+ if (strcmp(im_fh->iface.name, name) == 0)
+ break;
+
+ return (im_fh);
}
/*
- * __im_file_exist --
- * Return if the file exists.
+ * __im_handle_remove --
+ * Destroy an in-memory file handle. Should only happen on remove or
+ * shutdown.
*/
static int
-__im_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
+__im_handle_remove(WT_SESSION_IMPL *session,
+ WT_FILE_SYSTEM *file_system, WT_FILE_HANDLE_INMEM *im_fh)
{
- *existp = __wt_handle_search(session, name, false, NULL, NULL);
+ WT_FILE_HANDLE *fhp;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ uint64_t bucket;
+
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+
+ if (im_fh->ref != 0)
+ WT_RET_MSG(session, EBUSY,
+ "%s: file-remove", im_fh->iface.name);
+
+ bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_FILE_HANDLE_REMOVE(im_fs, im_fh, bucket);
+
+ /* Clean up private information. */
+ __wt_buf_free(session, &im_fh->buf);
+
+ /* Clean up public information. */
+ fhp = (WT_FILE_HANDLE *)im_fh;
+ __wt_free(session, fhp->name);
+
+ __wt_free(session, im_fh);
+
return (0);
}
/*
- * __im_file_remove --
- * POSIX remove.
+ * __im_fs_directory_list --
+ * Return the directory contents.
*/
static int
-__im_file_remove(WT_SESSION_IMPL *session, const char *name)
+__im_fs_directory_list(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
{
WT_DECL_RET;
- WT_FH *fh;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
+ size_t dirallocsz, len;
+ uint32_t count;
+ char *name, **entries;
+
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ *dirlistp = NULL;
+ *countp = 0;
+
+ dirallocsz = 0;
+ len = strlen(directory);
+ entries = NULL;
+
+ __wt_spin_lock(session, &im_fs->lock);
+
+ count = 0;
+ TAILQ_FOREACH(im_fh, &im_fs->fhqh, q) {
+ name = im_fh->iface.name;
+ if (strncmp(name, directory, len) != 0 ||
+ (prefix != NULL && !WT_PREFIX_MATCH(name + len, prefix)))
+ continue;
+
+ WT_ERR(__wt_realloc_def(
+ session, &dirallocsz, count + 1, &entries));
+ WT_ERR(__wt_strdup(session, name, &entries[count]));
+ ++count;
+ }
- if (__wt_handle_search(session, name, true, NULL, &fh)) {
- WT_ASSERT(session, fh->ref == 1);
+ *dirlistp = entries;
+ *countp = count;
+
+err: __wt_spin_unlock(session, &im_fs->lock);
+ if (ret == 0)
+ return (0);
- /* Force a discard of the handle. */
- F_CLR(fh, WT_FH_IN_MEMORY);
- ret = __wt_close(session, &fh);
+ if (entries != NULL) {
+ while (count > 0)
+ __wt_free(session, entries[--count]);
+ __wt_free(session, entries);
}
- return (ret);
+
+ WT_RET_MSG(session, ret,
+ "%s: directory-list, prefix \"%s\"",
+ directory, prefix == NULL ? "" : prefix);
}
/*
- * __im_file_rename --
- * POSIX rename.
+ * __im_fs_directory_list_free --
+ * Free memory returned by __im_fs_directory_list.
*/
static int
-__im_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+__im_fs_directory_list_free(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, char **dirlist, uint32_t count)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *fh;
- uint64_t bucket, hash;
- char *to_name;
-
- conn = S2C(session);
-
- /* We'll need a copy of the target name. */
- WT_RET(__wt_strdup(session, to, &to_name));
+ WT_SESSION_IMPL *session;
- __wt_spin_lock(session, &conn->fh_lock);
+ WT_UNUSED(file_system);
- /* Make sure the target name isn't active. */
- hash = __wt_hash_city64(to, strlen(to));
- bucket = hash % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
- if (strcmp(to, fh->name) == 0)
- WT_ERR(EPERM);
+ session = (WT_SESSION_IMPL *)wt_session;
- /* Find the source name. */
- hash = __wt_hash_city64(from, strlen(from));
- bucket = hash % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
- if (strcmp(from, fh->name) == 0)
- break;
- if (fh == NULL)
- WT_ERR(ENOENT);
+ if (dirlist != NULL) {
+ while (count > 0)
+ __wt_free(session, dirlist[--count]);
+ __wt_free(session, dirlist);
+ }
+ return (0);
+}
- /* Remove source from the list. */
- WT_CONN_FILE_REMOVE(conn, fh, bucket);
+/*
+ * __im_fs_exist --
+ * Return if the file exists.
+ */
+static int
+__im_fs_exist(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *name, bool *existp)
+{
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- /* Swap the names. */
- __wt_free(session, fh->name);
- fh->name = to_name;
- to_name = NULL;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- /* Put source back on the list. */
- hash = __wt_hash_city64(to, strlen(to));
- bucket = hash % WT_HASH_ARRAY_SIZE;
- WT_CONN_FILE_INSERT(conn, fh, bucket);
+ __wt_spin_lock(session, &im_fs->lock);
- if (0) {
-err: __wt_free(session, to_name);
- }
- __wt_spin_unlock(session, &conn->fh_lock);
+ *existp = __im_handle_search(file_system, name) != NULL;
- return (ret);
+ __wt_spin_unlock(session, &im_fs->lock);
+ return (0);
}
/*
- * __im_file_size --
- * Get the size of a file in bytes, by file name.
+ * __im_fs_remove --
+ * POSIX remove.
*/
static int
-__im_file_size(
- WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep)
+__im_fs_remove(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name)
{
WT_DECL_RET;
- WT_FH *fh;
- WT_IM *im;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(silent);
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- im = S2C(session)->inmemory;
- __wt_spin_lock(session, &im->lock);
+ __wt_spin_lock(session, &im_fs->lock);
- if (__wt_handle_search(session, name, true, NULL, &fh)) {
- WT_ERR(__im_handle_size(session, fh, sizep));
- WT_ERR(__wt_close(session, &fh));
- } else
- ret = ENOENT;
+ ret = ENOENT;
+ if ((im_fh = __im_handle_search(file_system, name)) != NULL)
+ ret = __im_handle_remove(session, file_system, im_fh);
-err: __wt_spin_unlock(session, &im->lock);
+ __wt_spin_unlock(session, &im_fs->lock);
return (ret);
}
/*
- * __im_handle_advise --
- * POSIX fadvise.
+ * __im_fs_rename --
+ * POSIX rename.
*/
static int
-__im_handle_advise(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, wt_off_t len, int advice)
+__im_fs_rename(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *from, const char *to)
{
- WT_UNUSED(session);
- WT_UNUSED(fh);
- WT_UNUSED(offset);
- WT_UNUSED(len);
- WT_UNUSED(advice);
- return (ENOTSUP);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
+ uint64_t bucket;
+ char *copy;
+
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_spin_lock(session, &im_fs->lock);
+
+ ret = ENOENT;
+ if ((im_fh = __im_handle_search(file_system, from)) != NULL) {
+ WT_ERR(__wt_strdup(session, to, &copy));
+ __wt_free(session, im_fh->iface.name);
+ im_fh->iface.name = copy;
+
+ bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_FILE_HANDLE_REMOVE(im_fs, im_fh, bucket);
+ im_fh->name_hash = __wt_hash_city64(to, strlen(to));
+ bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_FILE_HANDLE_INSERT(im_fs, im_fh, bucket);
+ }
+
+err: __wt_spin_unlock(session, &im_fs->lock);
+ return (ret);
}
/*
- * __im_handle_close --
- * ANSI C close/fclose.
+ * __im_fs_size --
+ * Get the size of a file in bytes, by file name.
*/
static int
-__im_handle_close(WT_SESSION_IMPL *session, WT_FH *fh)
+__im_fs_size(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
{
- __wt_buf_free(session, &fh->buf);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- return (0);
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_spin_lock(session, &im_fs->lock);
+
+ /* Search for the handle, then get its size. */
+ if ((im_fh = __im_handle_search(file_system, name)) == NULL)
+ ret = ENOENT;
+ else
+ *sizep = (wt_off_t)im_fh->buf.size;
+
+ __wt_spin_unlock(session, &im_fs->lock);
+
+ return (ret);
}
/*
- * __im_handle_getc --
- * ANSI C fgetc.
+ * __im_file_close --
+ * ANSI C close.
*/
static int
-__im_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp)
+__im_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_IM *im;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- im = S2C(session)->inmemory;
- __wt_spin_lock(session, &im->lock);
+ im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- if (fh->off >= fh->buf.size)
- *chp = EOF;
- else
- *chp = ((char *)fh->buf.data)[fh->off++];
+ __wt_spin_lock(session, &im_fs->lock);
+
+ --im_fh->ref;
+
+ __wt_spin_unlock(session, &im_fs->lock);
- __wt_spin_unlock(session, &im->lock);
return (0);
}
/*
- * __im_handle_lock --
+ * __im_file_lock --
* Lock/unlock a file.
*/
static int
-__im_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
+__im_file_lock(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
{
- WT_UNUSED(session);
- WT_UNUSED(fh);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(wt_session);
WT_UNUSED(lock);
return (0);
}
/*
- * __im_handle_printf --
- * ANSI C vfprintf.
- */
-static int
-__im_handle_printf(
- WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
-{
- va_list ap_copy;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_IM *im;
- size_t len;
-
- im = S2C(session)->inmemory;
-
- /* Build the string we're writing. */
- WT_RET(__wt_scr_alloc(session, strlen(fmt) * 2 + 128, &tmp));
- for (;;) {
- va_copy(ap_copy, ap);
- len = (size_t)vsnprintf(tmp->mem, tmp->memsize, fmt, ap_copy);
- va_end(ap_copy);
- if (len < tmp->memsize) {
- tmp->data = tmp->mem;
- tmp->size = len;
- break;
- }
- WT_ERR(__wt_buf_extend(session, tmp, len + 1));
- }
-
- __wt_spin_lock(session, &im->lock);
-
- /* Grow the handle's buffer as necessary. */
- WT_ERR(__wt_buf_grow(session, &fh->buf, fh->off + len));
-
- /* Copy the data into place and update the offset. */
- memcpy((uint8_t *)fh->buf.mem + fh->off, tmp->data, len);
- fh->off += len;
-
-err: __wt_spin_unlock(session, &im->lock);
-
- __wt_scr_free(session, &tmp);
- return (ret);
-}
-
-/*
- * __im_handle_read --
+ * __im_file_read --
* POSIX pread.
*/
static int
-__im_handle_read(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+__im_file_read(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
{
WT_DECL_RET;
- WT_IM *im;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
size_t off;
- im = S2C(session)->inmemory;
- __wt_spin_lock(session, &im->lock);
+ im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_spin_lock(session, &im_fs->lock);
off = (size_t)offset;
- if (off < fh->buf.size) {
- len = WT_MIN(len, fh->buf.size - off);
- memcpy(buf, (uint8_t *)fh->buf.mem + off, len);
- fh->off = off + len;
+ if (off < im_fh->buf.size) {
+ len = WT_MIN(len, im_fh->buf.size - off);
+ memcpy(buf, (uint8_t *)im_fh->buf.mem + off, len);
} else
ret = WT_ERROR;
- __wt_spin_unlock(session, &im->lock);
+ __wt_spin_unlock(session, &im_fs->lock);
if (ret == 0)
return (0);
WT_RET_MSG(session, WT_ERROR,
"%s: handle-read: failed to read %" WT_SIZET_FMT " bytes at "
"offset %" WT_SIZET_FMT,
- fh->name, len, off);
+ file_handle->name, len, off);
}
/*
- * __im_handle_size --
+ * __im_file_size --
* Get the size of a file in bytes, by file handle.
*/
static int
-__im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+__im_file_size(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
{
- WT_UNUSED(session);
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
+
+ im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_spin_lock(session, &im_fs->lock);
+
+ *sizep = (wt_off_t)im_fh->buf.size;
+
+ __wt_spin_unlock(session, &im_fs->lock);
- /*
- * XXX hack - MongoDB assumes that any file with content will have a
- * non-zero size. In memory tables generally are zero-sized, make
- * MongoDB happy.
- */
- *sizep = fh->buf.size == 0 ? 1024 : (wt_off_t)fh->buf.size;
return (0);
}
/*
- * __im_handle_sync --
- * POSIX fflush/fsync.
+ * __im_file_sync --
+ * In-memory sync.
*/
static int
-__im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+__im_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_UNUSED(session);
- WT_UNUSED(fh);
-
- /*
- * Callers attempting asynchronous flush handle ENOTSUP returns, and
- * won't make further attempts.
- */
- return (block ? 0 : ENOTSUP);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(wt_session);
+ return (0);
}
/*
- * __im_handle_truncate --
+ * __im_file_truncate --
* POSIX ftruncate.
*/
static int
-__im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset)
+__im_file_truncate(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
WT_DECL_RET;
- WT_IM *im;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
size_t off;
- im = S2C(session)->inmemory;
- __wt_spin_lock(session, &im->lock);
+ im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_spin_lock(session, &im_fs->lock);
/*
- * Grow the buffer as necessary, clear any new space in the file,
- * and reset the file's data length.
+ * Grow the buffer as necessary, clear any new space in the file, and
+ * reset the file's data length.
*/
off = (size_t)offset;
- WT_ERR(__wt_buf_grow(session, &fh->buf, off));
- if (fh->buf.size < off)
- memset((uint8_t *)
- fh->buf.data + fh->buf.size, 0, off - fh->buf.size);
- fh->buf.size = off;
+ WT_ERR(__wt_buf_grow(session, &im_fh->buf, off));
+ if (im_fh->buf.size < off)
+ memset((uint8_t *)im_fh->buf.data + im_fh->buf.size,
+ 0, off - im_fh->buf.size);
+ im_fh->buf.size = off;
-err: __wt_spin_unlock(session, &im->lock);
+err: __wt_spin_unlock(session, &im_fs->lock);
return (ret);
}
/*
- * __im_handle_write --
+ * __im_file_write --
* POSIX pwrite.
*/
static int
-__im_handle_write(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+__im_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
+ wt_off_t offset, size_t len, const void *buf)
{
WT_DECL_RET;
- WT_IM *im;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
size_t off;
- im = S2C(session)->inmemory;
- __wt_spin_lock(session, &im->lock);
+ im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_spin_lock(session, &im_fs->lock);
off = (size_t)offset;
- WT_ERR(__wt_buf_grow(session, &fh->buf, off + len + 1024));
+ WT_ERR(__wt_buf_grow(session, &im_fh->buf, off + len + 1024));
- memcpy((uint8_t *)fh->buf.data + off, buf, len);
- if (off + len > fh->buf.size)
- fh->buf.size = off + len;
- fh->off = off + len;
+ memcpy((uint8_t *)im_fh->buf.data + off, buf, len);
+ if (off + len > im_fh->buf.size)
+ im_fh->buf.size = off + len;
-err: __wt_spin_unlock(session, &im->lock);
+err: __wt_spin_unlock(session, &im_fs->lock);
if (ret == 0)
return (0);
WT_RET_MSG(session, ret,
"%s: handle-write: failed to write %" WT_SIZET_FMT " bytes at "
"offset %" WT_SIZET_FMT,
- fh->name, len, off);
+ file_handle->name, len, off);
}
/*
- * __im_handle_open --
+ * __im_file_open --
* POSIX fopen/open.
*/
static int
-__im_handle_open(WT_SESSION_IMPL *session,
- WT_FH *fh, const char *path, uint32_t file_type, uint32_t flags)
+__im_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ WT_FILE_HANDLE **file_handlep)
{
- WT_UNUSED(session);
- WT_UNUSED(path);
+ WT_DECL_RET;
+ WT_FILE_HANDLE *file_handle;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
+ uint64_t bucket, hash;
+
WT_UNUSED(file_type);
WT_UNUSED(flags);
- fh->off = 0;
- F_SET(fh, WT_FH_IN_MEMORY);
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- fh->fh_advise = __im_handle_advise;
- fh->fh_close = __im_handle_close;
- fh->fh_getc = __im_handle_getc;
- fh->fh_lock = __im_handle_lock;
- fh->fh_printf = __im_handle_printf;
- fh->fh_read = __im_handle_read;
- fh->fh_size = __im_handle_size;
- fh->fh_sync = __im_handle_sync;
- fh->fh_truncate = __im_handle_truncate;
- fh->fh_write = __im_handle_write;
+ __wt_spin_lock(session, &im_fs->lock);
- return (0);
+ /*
+ * First search the file queue, if we find it, assert there's only a
+ * single reference, in-memory only supports a single handle on any
+ * file, for now.
+ */
+ im_fh = __im_handle_search(file_system, name);
+ if (im_fh != NULL) {
+
+ if (im_fh->ref != 0)
+ WT_ERR_MSG(session, EBUSY,
+ "%s: file-open: already open", name);
+
+ im_fh->ref = 1;
+
+ *file_handlep = (WT_FILE_HANDLE *)im_fh;
+
+ __wt_spin_unlock(session, &im_fs->lock);
+ return (0);
+ }
+
+ /* The file hasn't been opened before, create a new one. */
+ WT_ERR(__wt_calloc_one(session, &im_fh));
+
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)im_fh;
+ file_handle->file_system = file_system;
+ WT_ERR(__wt_strdup(session, name, &file_handle->name));
+
+ /* Initialize private information. */
+ im_fh->ref = 1;
+
+ hash = __wt_hash_city64(name, strlen(name));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+ im_fh->name_hash = hash;
+ WT_FILE_HANDLE_INSERT(im_fs, im_fh, bucket);
+
+ file_handle->close = __im_file_close;
+ file_handle->fh_lock = __im_file_lock;
+ file_handle->fh_read = __im_file_read;
+ file_handle->fh_size = __im_file_size;
+ file_handle->fh_sync = __im_file_sync;
+ file_handle->fh_truncate = __im_file_truncate;
+ file_handle->fh_write = __im_file_write;
+
+ *file_handlep = file_handle;
+
+ if (0) {
+err: __wt_free(session, im_fh);
+ }
+
+ __wt_spin_unlock(session, &im_fs->lock);
+ return (ret);
}
/*
- * __wt_os_inmemory --
- * Initialize an in-memory configuration.
+ * __im_terminate --
+ * Terminate an in-memory configuration.
*/
-int
-__wt_os_inmemory(WT_SESSION_IMPL *session)
+static int
+__im_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session)
{
- WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- WT_IM *im;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- conn = S2C(session);
- im = NULL;
+ WT_UNUSED(file_system);
- /* Initialize the in-memory jump table. */
- conn->file_directory_list = __im_directory_list;
- conn->file_directory_sync = __im_directory_sync;
- conn->file_exist = __im_file_exist;
- conn->file_remove = __im_file_remove;
- conn->file_rename = __im_file_rename;
- conn->file_size = __im_file_size;
- conn->handle_open = __im_handle_open;
-
- /* Allocate an in-memory structure. */
- WT_RET(__wt_calloc_one(session, &im));
- WT_ERR(__wt_spin_init(session, &im->lock, "in-memory I/O"));
- conn->inmemory = im;
+ session = (WT_SESSION_IMPL *)wt_session;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- return (0);
+ while ((im_fh = TAILQ_FIRST(&im_fs->fhqh)) != NULL)
+ WT_TRET(__im_handle_remove(session, file_system, im_fh));
+
+ __wt_spin_destroy(session, &im_fs->lock);
+ __wt_free(session, im_fs);
-err: __wt_free(session, im);
return (ret);
}
/*
- * __wt_os_inmemory_cleanup --
- * Discard an in-memory configuration.
+ * __wt_os_inmemory --
+ * Initialize an in-memory configuration.
*/
int
-__wt_os_inmemory_cleanup(WT_SESSION_IMPL *session)
+__wt_os_inmemory(WT_SESSION_IMPL *session)
{
WT_DECL_RET;
- WT_IM *im;
+ WT_FILE_SYSTEM *file_system;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ u_int i;
- if ((im = S2C(session)->inmemory) == NULL)
- return (0);
- S2C(session)->inmemory = NULL;
+ WT_RET(__wt_calloc_one(session, &im_fs));
+
+ /* Initialize private information. */
+ TAILQ_INIT(&im_fs->fhqh);
+ for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
+ TAILQ_INIT(&im_fs->fhhash[i]);
- __wt_spin_destroy(session, &im->lock);
- __wt_free(session, im);
+ WT_ERR(__wt_spin_init(session, &im_fs->lock, "in-memory I/O"));
+
+ /* Initialize the in-memory jump table. */
+ file_system = (WT_FILE_SYSTEM *)im_fs;
+ file_system->fs_directory_list = __im_fs_directory_list;
+ file_system->fs_directory_list_free = __im_fs_directory_list_free;
+ file_system->fs_exist = __im_fs_exist;
+ file_system->fs_open_file = __im_file_open;
+ file_system->fs_remove = __im_fs_remove;
+ file_system->fs_rename = __im_fs_rename;
+ file_system->fs_size = __im_fs_size;
+ file_system->terminate = __im_terminate;
+
+ /* Switch the file system into place. */
+ S2C(session)->file_system = (WT_FILE_SYSTEM *)im_fs;
+
+ return (0);
+err: __wt_free(session, im_fs);
return (ret);
}
diff --git a/src/os_common/os_fs_stdio.c b/src/os_common/os_fs_stdio.c
deleted file mode 100644
index 9baba9b6945..00000000000
--- a/src/os_common/os_fs_stdio.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __stdio_handle_advise --
- * POSIX fadvise.
- */
-static int
-__stdio_handle_advise(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, wt_off_t len, int advice)
-{
- WT_UNUSED(offset);
- WT_UNUSED(len);
- WT_UNUSED(advice);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-advise", fh->name);
-}
-
-/*
- * __stdio_handle_allocate --
- * POSIX fallocate.
- */
-static int
-__stdio_handle_allocate(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
-{
- WT_UNUSED(offset);
- WT_UNUSED(len);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name);
-}
-
-/*
- * __stdio_handle_close --
- * ANSI C close/fclose.
- */
-static int
-__stdio_handle_close(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_RET_MSG(session, ENOTSUP, "%s: handle-close", fh->name);
-}
-
-/*
- * __stdio_handle_getc --
- * ANSI C fgetc.
- */
-static int
-__stdio_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp)
-{
- WT_UNUSED(chp);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-getc", fh->name);
-}
-
-/*
- * __stdio_handle_lock --
- * Lock/unlock a file.
- */
-static int
-__stdio_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
-{
- WT_UNUSED(lock);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-lock", fh->name);
-}
-
-/*
- * __stdio_handle_map --
- * Map a file.
- */
-static int
-__stdio_handle_map(WT_SESSION_IMPL *session,
- WT_FH *fh, void *p, size_t *lenp, void **mappingcookie)
-{
- WT_UNUSED(p);
- WT_UNUSED(lenp);
- WT_UNUSED(mappingcookie);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-map", fh->name);
-}
-
-/*
- * __stdio_handle_map_discard --
- * Discard a section of a mapped region.
- */
-static int
-__stdio_handle_map_discard(
- WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len)
-{
- WT_UNUSED(p);
- WT_UNUSED(len);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-map-discard", fh->name);
-}
-
-/*
- * __stdio_handle_map_preload --
- * Preload a section of a mapped region.
- */
-static int
-__stdio_handle_map_preload(
- WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t len)
-{
- WT_UNUSED(p);
- WT_UNUSED(len);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-map-preload", fh->name);
-}
-
-/*
- * __stdio_handle_map_unmap --
- * Unmap a file.
- */
-static int
-__stdio_handle_map_unmap(WT_SESSION_IMPL *session,
- WT_FH *fh, void *p, size_t len, void **mappingcookie)
-{
- WT_UNUSED(p);
- WT_UNUSED(len);
- WT_UNUSED(mappingcookie);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-map-unmap", fh->name);
-}
-
-/*
- * __stdio_handle_printf --
- * ANSI C vfprintf.
- */
-static int
-__stdio_handle_printf(
- WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
-{
- if (vfprintf(fh->fp, fmt, ap) >= 0)
- return (0);
- WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name);
-}
-
-/*
- * __stdio_handle_read --
- * POSIX pread.
- */
-static int
-__stdio_handle_read(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
-{
- WT_UNUSED(offset);
- WT_UNUSED(len);
- WT_UNUSED(buf);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-read", fh->name);
-}
-
-/*
- * __stdio_handle_size --
- * Get the size of a file in bytes, by file handle.
- */
-static int
-__stdio_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
-{
- WT_UNUSED(sizep);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-size", fh->name);
-}
-
-/*
- * __stdio_handle_sync --
- * POSIX fflush/fsync.
- */
-static int
-__stdio_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
-{
- WT_UNUSED(block);
-
- if (fflush(fh->fp) == 0)
- return (0);
- WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name);
-}
-
-/*
- * __stdio_handle_truncate --
- * POSIX ftruncate.
- */
-static int
-__stdio_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
-{
- WT_UNUSED(len);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-truncate", fh->name);
-}
-
-/*
- * __stdio_handle_write --
- * POSIX pwrite.
- */
-static int
-__stdio_handle_write(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
-{
- WT_UNUSED(offset);
- WT_UNUSED(len);
- WT_UNUSED(buf);
- WT_RET_MSG(session, ENOTSUP, "%s: handle-write", fh->name);
-}
-
-/*
- * __stdio_func_init --
- * Initialize stdio functions.
- */
-static void
-__stdio_func_init(WT_FH *fh, const char *name, FILE *fp)
-{
- fh->name = name;
- fh->fp = fp;
-
- fh->fh_advise = __stdio_handle_advise;
- fh->fh_allocate = __stdio_handle_allocate;
- fh->fh_close = __stdio_handle_close;
- fh->fh_getc = __stdio_handle_getc;
- fh->fh_lock = __stdio_handle_lock;
- fh->fh_map = __stdio_handle_map;
- fh->fh_map_discard = __stdio_handle_map_discard;
- fh->fh_map_preload = __stdio_handle_map_preload;
- fh->fh_map_unmap = __stdio_handle_map_unmap;
- fh->fh_printf = __stdio_handle_printf;
- fh->fh_read = __stdio_handle_read;
- fh->fh_size = __stdio_handle_size;
- fh->fh_sync = __stdio_handle_sync;
- fh->fh_truncate = __stdio_handle_truncate;
- fh->fh_write = __stdio_handle_write;
-}
-
-/*
- * __wt_os_stdio --
- * Initialize the stdio configuration.
- */
-int
-__wt_os_stdio(WT_SESSION_IMPL *session)
-{
- __stdio_func_init(WT_STDERR(session), "stderr", stderr);
- __stdio_func_init(WT_STDOUT(session), "stdout", stdout);
-
- return (0);
-}
diff --git a/src/os_common/os_fstream.c b/src/os_common/os_fstream.c
new file mode 100644
index 00000000000..0b199529e19
--- /dev/null
+++ b/src/os_common/os_fstream.c
@@ -0,0 +1,217 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/* Buffer size for streamed reads/writes. */
+#define WT_STREAM_BUFSIZE 8192
+
+/*
+ * __fstream_close --
+ * Close a stream handle.
+ */
+static int
+__fstream_close(WT_SESSION_IMPL *session, WT_FSTREAM *fstr)
+{
+ WT_DECL_RET;
+
+ if (!F_ISSET(fstr, WT_STREAM_READ))
+ WT_TRET(fstr->fstr_flush(session, fstr));
+
+ WT_TRET(__wt_close(session, &fstr->fh));
+ __wt_buf_free(session, &fstr->buf);
+ __wt_free(session, fstr);
+ return (ret);
+}
+
+/*
+ * __fstream_flush --
+ * Flush the data from a stream.
+ */
+static int
+__fstream_flush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr)
+{
+ if (fstr->buf.size > 0) {
+ WT_RET(__wt_write(session,
+ fstr->fh, fstr->off, fstr->buf.size, fstr->buf.data));
+ fstr->off += (wt_off_t)fstr->buf.size;
+ fstr->buf.size = 0;
+ }
+
+ return (0);
+}
+
+/*
+ * __fstream_flush_notsup --
+ * Stream flush unsupported.
+ */
+static int
+__fstream_flush_notsup(WT_SESSION_IMPL *session, WT_FSTREAM *fstr)
+{
+ WT_RET_MSG(session, ENOTSUP, "%s: flush", fstr->name);
+}
+
+/*
+ * __fstream_getline --
+ * Get a line from a stream.
+ *
+ * Implementation of the POSIX getline or BSD fgetln functions (finding the
+ * function in a portable way is hard, it's simple enough to write it instead).
+ *
+ * Note: Unlike the standard getline calls, this function doesn't include the
+ * trailing newline character in the returned buffer and discards empty lines
+ * (so the caller's EOF marker is a returned line length of 0).
+ */
+static int
+__fstream_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf)
+{
+ const char *p;
+ size_t len;
+ char c;
+
+ /*
+ * We always NUL-terminate the returned string (even if it's empty),
+ * make sure there's buffer space for a trailing NUL in all cases.
+ */
+ WT_RET(__wt_buf_init(session, buf, 100));
+
+ for (;;) {
+ /* Check if we need to refill the buffer. */
+ if (WT_PTRDIFF(fstr->buf.data, fstr->buf.mem) >=
+ fstr->buf.size) {
+ len = WT_MIN(WT_STREAM_BUFSIZE,
+ (size_t)(fstr->size - fstr->off));
+ if (len == 0)
+ break; /* EOF */
+ WT_RET(__wt_buf_initsize(session, &fstr->buf, len));
+ WT_RET(__wt_read(
+ session, fstr->fh, fstr->off, len, fstr->buf.mem));
+ fstr->off += (wt_off_t)len;
+ }
+
+ c = *(p = fstr->buf.data);
+ fstr->buf.data = ++p;
+
+ /* Leave space for a trailing NUL. */
+ WT_RET(__wt_buf_extend(session, buf, buf->size + 2));
+ if (c == '\n') {
+ if (buf->size == 0)
+ continue;
+ break;
+ }
+ ((char *)buf->mem)[buf->size++] = c;
+ }
+
+ ((char *)buf->mem)[buf->size] = '\0';
+
+ return (0);
+}
+
+/*
+ * __fstream_getline_notsup --
+ * Stream getline unsupported.
+ */
+static int
+__fstream_getline_notsup(
+ WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf)
+{
+ WT_UNUSED(buf);
+ WT_RET_MSG(session, ENOTSUP, "%s: getline", fstr->name);
+}
+
+/*
+ * __fstream_printf --
+ * ANSI C vfprintf.
+ */
+static int
+__fstream_printf(
+ WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap)
+{
+ WT_ITEM *buf;
+ va_list ap_copy;
+ size_t len, space;
+ char *p;
+
+ buf = &fstr->buf;
+
+ for (;;) {
+ va_copy(ap_copy, ap);
+ p = (char *)((uint8_t *)buf->mem + buf->size);
+ WT_ASSERT(session, buf->memsize >= buf->size);
+ space = buf->memsize - buf->size;
+ len = (size_t)vsnprintf(p, space, fmt, ap_copy);
+ va_end(ap_copy);
+
+ if (len < space) {
+ buf->size += len;
+
+ return (buf->size >= WT_STREAM_BUFSIZE ?
+ __wt_fflush(session, fstr) : 0);
+ }
+ WT_RET(__wt_buf_extend(session, buf, buf->size + len + 1));
+ }
+}
+
+/*
+ * __fstream_printf_notsup --
+ * ANSI C vfprintf unsupported.
+ */
+static int
+__fstream_printf_notsup(
+ WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap)
+{
+ WT_UNUSED(fmt);
+ WT_UNUSED(ap);
+ WT_RET_MSG(session, ENOTSUP, "%s: printf", fstr->name);
+}
+
+/*
+ * __wt_fopen --
+ * Open a stream handle.
+ */
+int
+__wt_fopen(WT_SESSION_IMPL *session,
+ const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp)
+{
+ WT_DECL_RET;
+ WT_FH *fh;
+ WT_FSTREAM *fstr;
+
+ *fstrp = NULL;
+
+ fstr = NULL;
+
+ WT_RET(__wt_open(
+ session, name, WT_OPEN_FILE_TYPE_REGULAR, open_flags, &fh));
+
+ WT_ERR(__wt_calloc_one(session, &fstr));
+ fstr->fh = fh;
+ fstr->name = fh->name;
+ fstr->flags = flags;
+
+ fstr->close = __fstream_close;
+ WT_ERR(__wt_filesize(session, fh, &fstr->size));
+ if (LF_ISSET(WT_STREAM_APPEND))
+ fstr->off = fstr->size;
+ if (LF_ISSET(WT_STREAM_APPEND | WT_STREAM_WRITE)) {
+ fstr->fstr_flush = __fstream_flush;
+ fstr->fstr_getline = __fstream_getline_notsup;
+ fstr->fstr_printf = __fstream_printf;
+ } else {
+ WT_ASSERT(session, LF_ISSET(WT_STREAM_READ));
+ fstr->fstr_flush = __fstream_flush_notsup;
+ fstr->fstr_getline = __fstream_getline;
+ fstr->fstr_printf = __fstream_printf_notsup;
+ }
+ *fstrp = fstr;
+ return (0);
+
+err: WT_TRET(__wt_close(session, &fh));
+ __wt_free(session, fstr);
+ return (ret);
+}
diff --git a/src/os_common/os_fstream_stdio.c b/src/os_common/os_fstream_stdio.c
new file mode 100644
index 00000000000..eea2c80ff0e
--- /dev/null
+++ b/src/os_common/os_fstream_stdio.c
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __stdio_close --
+ * ANSI C close/fclose.
+ */
+static int
+__stdio_close(WT_SESSION_IMPL *session, WT_FSTREAM *fs)
+{
+ WT_RET_MSG(session, ENOTSUP, "%s: close", fs->name);
+}
+
+/*
+ * __stdio_flush --
+ * POSIX fflush.
+ */
+static int
+__stdio_flush(WT_SESSION_IMPL *session, WT_FSTREAM *fs)
+{
+ if (fflush(fs->fp) == 0)
+ return (0);
+ WT_RET_MSG(session, __wt_errno(), "%s: flush", fs->name);
+}
+
+/*
+ * __stdio_getline --
+ * ANSI C getline.
+ */
+static int
+__stdio_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fs, WT_ITEM *buf)
+{
+ WT_UNUSED(buf);
+ WT_RET_MSG(session, ENOTSUP, "%s: getline", fs->name);
+}
+
+/*
+ * __stdio_printf --
+ * ANSI C vfprintf.
+ */
+static int
+__stdio_printf(
+ WT_SESSION_IMPL *session, WT_FSTREAM *fs, const char *fmt, va_list ap)
+{
+ if (vfprintf(fs->fp, fmt, ap) >= 0)
+ return (0);
+ WT_RET_MSG(session, EIO, "%s: printf", fs->name);
+}
+
+/*
+ * __stdio_init --
+ * Initialize stdio functions.
+ */
+static void
+__stdio_init(WT_FSTREAM *fs, const char *name, FILE *fp)
+{
+ fs->name = name;
+ fs->fp = fp;
+
+ fs->close = __stdio_close;
+ fs->fstr_flush = __stdio_flush;
+ fs->fstr_getline = __stdio_getline;
+ fs->fstr_printf = __stdio_printf;
+}
+
+/*
+ * __wt_os_stdio --
+ * Initialize the stdio configuration.
+ */
+int
+__wt_os_stdio(WT_SESSION_IMPL *session)
+{
+ __stdio_init(WT_STDERR(session), "stderr", stderr);
+ __stdio_init(WT_STDOUT(session), "stdout", stdout);
+
+ return (0);
+}
diff --git a/src/os_common/os_getline.c b/src/os_common/os_getline.c
deleted file mode 100644
index 01e11581edf..00000000000
--- a/src/os_common/os_getline.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_getline --
- * Get a line from a stream.
- *
- * Implementation of the POSIX getline or BSD fgetln functions (finding the
- * function in a portable way is hard, it's simple enough to write it instead).
- *
- * Note: Unlike the standard getline calls, this function doesn't include the
- * trailing newline character in the returned buffer and discards empty lines
- * (so the caller's EOF marker is a returned line length of 0).
- */
-int
-__wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh)
-{
- int c;
-
- /*
- * We always NUL-terminate the returned string (even if it's empty),
- * make sure there's buffer space for a trailing NUL in all cases.
- */
- WT_RET(__wt_buf_init(session, buf, 100));
-
- for (;;) {
- WT_RET(fh->fh_getc(session, fh, &c));
- if (c == EOF)
- break;
-
- /* Leave space for a trailing NUL. */
- WT_RET(__wt_buf_extend(session, buf, buf->size + 2));
- if (c == '\n') {
- if (buf->size == 0)
- continue;
- break;
- }
- ((char *)buf->mem)[buf->size++] = (char)c;
- }
-
- ((char *)buf->mem)[buf->size] = '\0';
-
- return (0);
-}
diff --git a/src/os_common/os_init.c b/src/os_common/os_init.c
deleted file mode 100644
index 512216c52a5..00000000000
--- a/src/os_common/os_init.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_os_init --
- * Initialize the OS layer.
- */
-int
-__wt_os_init(WT_SESSION_IMPL *session)
-{
- return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ?
- __wt_os_inmemory(session) :
-#if defined(_MSC_VER)
- __wt_os_win(session));
-#else
- __wt_os_posix(session));
-#endif
-}
-
-/*
- * __wt_os_cleanup --
- * Clean up the OS layer.
- */
-int
-__wt_os_cleanup(WT_SESSION_IMPL *session)
-{
- return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ?
- __wt_os_inmemory_cleanup(session) :
-#if defined(_MSC_VER)
- __wt_os_win_cleanup(session));
-#else
- __wt_os_posix_cleanup(session));
-#endif
-}
diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c
index 02f12ec7311..768a1324cd8 100644
--- a/src/os_posix/os_dir.c
+++ b/src/os_posix/os_dir.c
@@ -15,30 +15,34 @@
* Get a list of files from a directory, POSIX version.
*/
int
-__wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir,
- const char *prefix, uint32_t flags, char ***dirlist, u_int *countp)
+__wt_posix_directory_list(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
{
struct dirent *dp;
DIR *dirp;
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
size_t dirallocsz;
- u_int count, dirsz;
- bool match;
- char **entries, *path;
+ uint32_t count;
+ int tret;
+ char **entries;
- *dirlist = NULL;
- *countp = 0;
+ WT_UNUSED(file_system);
+
+ session = (WT_SESSION_IMPL *)wt_session;
- WT_RET(__wt_filename(session, dir, &path));
+ *dirlistp = NULL;
+ *countp = 0;
dirp = NULL;
dirallocsz = 0;
- dirsz = 0;
entries = NULL;
- WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? -1 : 0), ret);
+ WT_SYSCALL_RETRY(((dirp = opendir(directory)) == NULL ? -1 : 0), ret);
if (ret != 0)
- WT_ERR_MSG(session, ret, "%s: directory-list: opendir", path);
+ WT_RET_MSG(session, ret,
+ "%s: directory-list: opendir", directory);
for (count = 0; (dp = readdir(dirp)) != NULL;) {
/*
@@ -49,44 +53,57 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir,
continue;
/* The list of files is optionally filtered by a prefix. */
- match = false;
- if (prefix != NULL &&
- ((LF_ISSET(WT_DIRLIST_INCLUDE) &&
- WT_PREFIX_MATCH(dp->d_name, prefix)) ||
- (LF_ISSET(WT_DIRLIST_EXCLUDE) &&
- !WT_PREFIX_MATCH(dp->d_name, prefix))))
- match = true;
- if (prefix == NULL || match) {
- /*
- * We have a file name we want to return.
- */
- count++;
- if (count > dirsz) {
- dirsz += WT_DIR_ENTRY;
- WT_ERR(__wt_realloc_def(
- session, &dirallocsz, dirsz, &entries));
- }
- WT_ERR(__wt_strdup(
- session, dp->d_name, &entries[count-1]));
- }
+ if (prefix != NULL && !WT_PREFIX_MATCH(dp->d_name, prefix))
+ continue;
+
+ WT_ERR(__wt_realloc_def(
+ session, &dirallocsz, count + 1, &entries));
+ WT_ERR(__wt_strdup(session, dp->d_name, &entries[count]));
+ ++count;
}
- if (count > 0)
- *dirlist = entries;
+
+ *dirlistp = entries;
*countp = count;
-err: if (dirp != NULL)
- (void)closedir(dirp);
- __wt_free(session, path);
+err: if (dirp != NULL) {
+ WT_SYSCALL(closedir(dirp), tret);
+ if (tret != 0) {
+ __wt_err(session, tret,
+ "%s: directory-list: closedir", directory);
+ if (ret == 0)
+ ret = tret;
+ }
+ }
if (ret == 0)
return (0);
- if (*dirlist != NULL) {
- for (count = dirsz; count > 0; count--)
- __wt_free(session, entries[count]);
- __wt_free(session, entries);
- }
+ WT_TRET(__wt_posix_directory_list_free(
+ file_system, wt_session, entries, count));
+
WT_RET_MSG(session, ret,
"%s: directory-list, prefix \"%s\"",
- dir, prefix == NULL ? "" : prefix);
+ directory, prefix == NULL ? "" : prefix);
+}
+
+/*
+ * __wt_posix_directory_list_free --
+ * Free memory returned by __wt_posix_directory_list.
+ */
+int
+__wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, char **dirlist, uint32_t count)
+{
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(file_system);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ if (dirlist != NULL) {
+ while (count > 0)
+ __wt_free(session, dirlist[--count]);
+ __wt_free(session, dirlist);
+ }
+ return (0);
}
diff --git a/src/os_posix/os_dlopen.c b/src/os_posix/os_dlopen.c
index 9a74eb4813d..ad1fcc90150 100644
--- a/src/os_posix/os_dlopen.c
+++ b/src/os_posix/os_dlopen.c
@@ -19,7 +19,7 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
WT_DLH *dlh;
WT_RET(__wt_calloc_one(session, &dlh));
- WT_ERR(__wt_strdup(session, path, &dlh->name));
+ WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name));
if ((dlh->handle = dlopen(path, RTLD_LAZY)) == NULL)
WT_ERR_MSG(
diff --git a/src/os_posix/os_fallocate.c b/src/os_posix/os_fallocate.c
index 22879d36182..9e5d9519900 100644
--- a/src/os_posix/os_fallocate.c
+++ b/src/os_posix/os_fallocate.c
@@ -12,47 +12,28 @@
#include <linux/falloc.h>
#include <sys/syscall.h>
#endif
-/*
- * __wt_posix_handle_allocate_configure --
- * Configure POSIX file-extension behavior for a file handle.
- */
-void
-__wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_UNUSED(session);
-
- fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE;
- fh->fallocate_requires_locking = false;
-
- /*
- * Check for the availability of some form of fallocate; in all cases,
- * start off requiring locking, we'll relax that requirement once we
- * know which system calls work with the handle's underlying filesystem.
- */
-#if defined(HAVE_FALLOCATE) || defined(HAVE_POSIX_FALLOCATE)
- fh->fallocate_available = WT_FALLOCATE_AVAILABLE;
- fh->fallocate_requires_locking = true;
-#endif
-#if defined(__linux__) && defined(SYS_fallocate)
- fh->fallocate_available = WT_FALLOCATE_AVAILABLE;
- fh->fallocate_requires_locking = true;
-#endif
-}
/*
* __posix_std_fallocate --
* Linux fallocate call.
*/
static int
-__posix_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
+__posix_std_fallocate(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *wt_session, wt_off_t offset, wt_off_t len)
{
#if defined(HAVE_FALLOCATE)
WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
- WT_SYSCALL_RETRY(fallocate(fh->fd, 0, offset, len), ret);
+ WT_UNUSED(wt_session);
+
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ WT_SYSCALL_RETRY(fallocate(pfh->fd, 0, offset, len), ret);
return (ret);
#else
- WT_UNUSED(fh);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(wt_session);
WT_UNUSED(offset);
WT_UNUSED(len);
return (ENOTSUP);
@@ -64,10 +45,16 @@ __posix_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
* Linux fallocate call (system call version).
*/
static int
-__posix_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
+__posix_sys_fallocate(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *wt_session, wt_off_t offset, wt_off_t len)
{
#if defined(__linux__) && defined(SYS_fallocate)
WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+
+ WT_UNUSED(wt_session);
+
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
/*
* Try the system call for fallocate even if the C library wrapper was
@@ -75,10 +62,11 @@ __posix_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
* Linux versions (RHEL 5.5), but not in the version of the C library.
* This allows it to work everywhere the kernel supports it.
*/
- WT_SYSCALL_RETRY(syscall(SYS_fallocate, fh->fd, 0, offset, len), ret);
+ WT_SYSCALL_RETRY(syscall(SYS_fallocate, pfh->fd, 0, offset, len), ret);
return (ret);
#else
- WT_UNUSED(fh);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(wt_session);
WT_UNUSED(offset);
WT_UNUSED(len);
return (ENOTSUP);
@@ -90,15 +78,22 @@ __posix_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
* POSIX fallocate call.
*/
static int
-__posix_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
+__posix_posix_fallocate(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *wt_session, wt_off_t offset, wt_off_t len)
{
#if defined(HAVE_POSIX_FALLOCATE)
WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+
+ WT_UNUSED(wt_session);
- WT_SYSCALL_RETRY(posix_fallocate(fh->fd, offset, len), ret);
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ WT_SYSCALL_RETRY(posix_fallocate(pfh->fd, offset, len), ret);
return (ret);
#else
- WT_UNUSED(fh);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(wt_session);
WT_UNUSED(offset);
WT_UNUSED(len);
return (ENOTSUP);
@@ -106,67 +101,52 @@ __posix_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
}
/*
- * __wt_posix_handle_allocate --
+ * __wt_posix_file_fallocate --
* POSIX fallocate.
*/
int
-__wt_posix_handle_allocate(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
+__wt_posix_file_fallocate(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *wt_session, wt_off_t offset, wt_off_t len)
{
- WT_DECL_RET;
-
- switch (fh->fallocate_available) {
- /*
- * Check for already configured handles and make the configured call.
- */
- case WT_FALLOCATE_POSIX:
- if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: posix_fallocate", fh->name);
- case WT_FALLOCATE_STD:
- if ((ret = __posix_std_fallocate(fh, offset, len)) == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: fallocate", fh->name);
- case WT_FALLOCATE_SYS:
- if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: sys_fallocate", fh->name);
-
/*
- * Figure out what allocation call this system/filesystem supports, if
- * any.
+ * The first fallocate call: figure out what fallocate call this system
+ * supports, if any.
+ *
+ * The function is configured as a locking fallocate call, so we know
+ * we're single-threaded through here. Set the nolock function first,
+ * then publish the NULL replacement to ensure the handle functions are
+ * always correct.
+ *
+ * We've seen Linux systems where posix_fallocate has corrupted
+ * existing file data (even though that is explicitly disallowed
+ * by POSIX). FreeBSD and Solaris support posix_fallocate, and
+ * so far we've seen no problems leaving it unlocked. Check for
+ * fallocate (and the system call version of fallocate) first to
+ * avoid locking on Linux if at all possible.
*/
- case WT_FALLOCATE_AVAILABLE:
- /*
- * We've seen Linux systems where posix_fallocate has corrupted
- * existing file data (even though that is explicitly disallowed
- * by POSIX). FreeBSD and Solaris support posix_fallocate, and
- * so far we've seen no problems leaving it unlocked. Check for
- * fallocate (and the system call version of fallocate) first to
- * avoid locking on Linux if at all possible.
- */
- if ((ret = __posix_std_fallocate(fh, offset, len)) == 0) {
- fh->fallocate_available = WT_FALLOCATE_STD;
- fh->fallocate_requires_locking = false;
- return (0);
- }
- if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0) {
- fh->fallocate_available = WT_FALLOCATE_SYS;
- fh->fallocate_requires_locking = false;
- return (0);
- }
- if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0) {
- fh->fallocate_available = WT_FALLOCATE_POSIX;
-#if !defined(__linux__)
- fh->fallocate_requires_locking = false;
+ if (__posix_std_fallocate(file_handle, wt_session, offset, len) == 0) {
+ file_handle->fh_allocate_nolock = __posix_std_fallocate;
+ WT_PUBLISH(file_handle->fh_allocate, NULL);
+ return (0);
+ }
+ if (__posix_sys_fallocate(file_handle, wt_session, offset, len) == 0) {
+ file_handle->fh_allocate_nolock = __posix_sys_fallocate;
+ WT_PUBLISH(file_handle->fh_allocate, NULL);
+ return (0);
+ }
+ if (__posix_posix_fallocate(
+ file_handle, wt_session, offset, len) == 0) {
+#if defined(__linux__)
+ file_handle->fh_allocate = __posix_posix_fallocate;
+ WT_WRITE_BARRIER();
+#else
+ file_handle->fh_allocate_nolock = __posix_posix_fallocate;
+ WT_PUBLISH(file_handle->fh_allocate, NULL);
#endif
- return (0);
- }
- /* FALLTHROUGH */
- case WT_FALLOCATE_NOT_AVAILABLE:
- default:
- fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE;
- return (ENOTSUP);
+ return (0);
}
- /* NOTREACHED */
+
+ file_handle->fh_allocate = NULL;
+ WT_WRITE_BARRIER();
+ return (ENOTSUP);
}
diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c
index 7d8f3b937b6..86fa2e8f117 100644
--- a/src/os_posix/os_fs.c
+++ b/src/os_posix/os_fs.c
@@ -1,9 +1,29 @@
/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
*
- * See the file LICENSE for redistribution information.
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
#include "wt_internal.h"
@@ -13,30 +33,11 @@
* Underlying support function to flush a file handle.
*/
static int
-__posix_sync(WT_SESSION_IMPL *session,
- int fd, const char *name, const char *func, bool block)
+__posix_sync(
+ WT_SESSION_IMPL *session, int fd, const char *name, const char *func)
{
WT_DECL_RET;
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
-#ifdef HAVE_SYNC_FILE_RANGE
- if (!block) {
- WT_SYSCALL_RETRY(sync_file_range(fd,
- (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret);
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func);
- }
-#else
- /*
- * Callers attempting asynchronous flush handle ENOTSUP returns, and
- * won't make further attempts.
- */
- if (!block)
- return (ENOTSUP);
-#endif
-
#if defined(F_FULLFSYNC)
/*
* OS X fsync documentation:
@@ -73,105 +74,82 @@ __posix_sync(WT_SESSION_IMPL *session,
#endif
}
+#ifdef __linux__
/*
* __posix_directory_sync --
* Flush a directory to ensure file creation is durable.
*/
static int
-__posix_directory_sync(WT_SESSION_IMPL *session, const char *path)
+__posix_directory_sync(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *path)
{
-#ifdef __linux__
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
int fd, tret;
- const char *dir;
- char *copy;
- tret = 0;
- /*
- * POSIX 1003.1 does not require that fsync of a file handle ensures the
- * entry in the directory containing the file has also reached disk (and
- * there are historic Linux filesystems requiring this), do an explicit
- * fsync on a file descriptor for the directory to be sure.
- */
- copy = NULL;
- if (path == NULL || (dir = strrchr(path, '/')) == NULL)
- path = S2C(session)->home;
- else {
- /*
- * Copy the directory name, leaving the trailing slash in place,
- * so a path of "/foo" doesn't result in an empty string.
- */
- WT_RET(__wt_strndup(
- session, path, (size_t)(dir - path) + 1, &copy));
- path = copy;
- }
+ WT_UNUSED(file_system);
+
+ session = (WT_SESSION_IMPL *)wt_session;
WT_SYSCALL_RETRY((
(fd = open(path, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
if (ret != 0)
- WT_ERR_MSG(session, ret, "%s: directory-sync: open", path);
+ WT_RET_MSG(session, ret, "%s: directory-sync: open", path);
- ret = __posix_sync(session, fd, path, "directory-sync", true);
+ ret = __posix_sync(session, fd, path, "directory-sync");
- WT_SYSCALL_RETRY(close(fd), tret);
+ WT_SYSCALL(close(fd), tret);
if (tret != 0) {
__wt_err(session, tret, "%s: directory-sync: close", path);
if (ret == 0)
ret = tret;
}
-err: __wt_free(session, copy);
return (ret);
-#else
- WT_UNUSED(session);
- WT_UNUSED(path);
- return (0);
-#endif
}
+#endif
/*
- * __posix_file_exist --
+ * __posix_fs_exist --
* Return if the file exists.
*/
static int
-__posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
+__posix_fs_exist(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *name, bool *existp)
{
struct stat sb;
WT_DECL_RET;
- char *path;
+ WT_SESSION_IMPL *session;
- WT_RET(__wt_filename(session, name, &path));
- name = path;
+ WT_UNUSED(file_system);
- WT_SYSCALL_RETRY(stat(name, &sb), ret);
- if (ret == 0)
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_SYSCALL(stat(name, &sb), ret);
+ if (ret == 0) {
*existp = true;
- else if (ret == ENOENT) {
+ return (0);
+ }
+ if (ret == ENOENT) {
*existp = false;
- ret = 0;
- } else
- __wt_err(session, ret, "%s: file-exist: stat", name);
-
- __wt_free(session, path);
- return (ret);
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "%s: file-exist: stat", name);
}
/*
- * __posix_file_remove --
+ * __posix_fs_remove --
* Remove a file.
*/
static int
-__posix_file_remove(WT_SESSION_IMPL *session, const char *name)
+__posix_fs_remove(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name)
{
WT_DECL_RET;
- char *path;
+ WT_SESSION_IMPL *session;
-#ifdef HAVE_DIAGNOSTIC
- if (__wt_handle_search(session, name, false, NULL, NULL))
- WT_RET_MSG(session, EINVAL,
- "%s: file-remove: file has open handles", name);
-#endif
+ WT_UNUSED(file_system);
- WT_RET(__wt_filename(session, name, &path));
+ session = (WT_SESSION_IMPL *)wt_session;
/*
* ISO C doesn't require remove return -1 on failure or set errno (note
@@ -180,35 +158,26 @@ __posix_file_remove(WT_SESSION_IMPL *session, const char *name)
* where we're not doing any special checking for standards compliance,
* using unlink may be marginally safer.
*/
- WT_SYSCALL_RETRY(unlink(path), ret);
- __wt_free(session, path);
+ WT_SYSCALL(unlink(name), ret);
if (ret == 0)
return (0);
WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
}
/*
- * __posix_file_rename --
+ * __posix_fs_rename --
* Rename a file.
*/
static int
-__posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+__posix_fs_rename(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *from, const char *to)
{
WT_DECL_RET;
- char *from_path, *to_path;
-
-#ifdef HAVE_DIAGNOSTIC
- if (__wt_handle_search(session, from, false, NULL, NULL))
- WT_RET_MSG(session, EINVAL,
- "%s: file-rename: file has open handles", from);
- if (__wt_handle_search(session, to, false, NULL, NULL))
- WT_RET_MSG(session, EINVAL,
- "%s: file-rename: file has open handles", to);
-#endif
+ WT_SESSION_IMPL *session;
- from_path = to_path = NULL;
- WT_ERR(__wt_filename(session, from, &from_path));
- WT_ERR(__wt_filename(session, to, &to_path));
+ WT_UNUSED(file_system);
+
+ session = (WT_SESSION_IMPL *)wt_session;
/*
* ISO C doesn't require rename return -1 on failure or set errno (note
@@ -217,143 +186,114 @@ __posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
* with the wrong errno (if errno is garbage), or the generic WT_ERROR
* return (if errno is 0), but we've done the best we can.
*/
- WT_SYSCALL_RETRY(rename(from_path, to_path) != 0 ? -1 : 0, ret);
-
-err: __wt_free(session, from_path);
- __wt_free(session, to_path);
+ WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret);
if (ret == 0)
return (0);
WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to);
}
/*
- * __posix_file_size --
+ * __posix_fs_size --
* Get the size of a file in bytes, by file name.
*/
static int
-__posix_file_size(
- WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep)
+__posix_fs_size(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
{
struct stat sb;
WT_DECL_RET;
- char *path;
-
- WT_RET(__wt_filename(session, name, &path));
- name = path;
+ WT_SESSION_IMPL *session;
- /*
- * Optionally don't log errors on ENOENT; some callers of this function
- * expect failure in that case and don't want an error message logged.
- */
- WT_SYSCALL_RETRY(stat(name, &sb), ret);
- if (ret == 0)
- *sizep = sb.st_size;
- else if (ret != ENOENT || !silent)
- __wt_err(session, ret, "%s: file-size: stat", name);
+ WT_UNUSED(file_system);
- __wt_free(session, path);
+ session = (WT_SESSION_IMPL *)wt_session;
- return (ret);
+ WT_SYSCALL(stat(name, &sb), ret);
+ if (ret == 0) {
+ *sizep = sb.st_size;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "%s: file-size: stat", name);
}
+#if defined(HAVE_POSIX_FADVISE)
/*
- * __posix_handle_advise --
+ * __posix_file_advise --
* POSIX fadvise.
*/
static int
-__posix_handle_advise(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, wt_off_t len, int advice)
+__posix_file_advise(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
+ wt_off_t offset, wt_off_t len, int advice)
{
-#if defined(HAVE_POSIX_FADVISE)
WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
- /*
- * Refuse pre-load when direct I/O is configured for the file, the
- * kernel cache isn't interesting.
- */
- if (advice == POSIX_MADV_WILLNEED && fh->direct_io)
- return (ENOTSUP);
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret);
+ WT_SYSCALL(posix_fadvise(pfh->fd, offset, len, advice), ret);
if (ret == 0)
return (0);
/*
* Treat EINVAL as not-supported, some systems don't support some flags.
- * Quietly fail, callers expect not-supported failures.
+ * Quietly fail, callers expect not-supported failures, and reset the
+ * handle method to prevent future calls.
*/
- if (ret == EINVAL)
+ if (ret == EINVAL) {
+ file_handle->fh_advise = NULL;
return (ENOTSUP);
+ }
- WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name);
-#else
- WT_UNUSED(session);
- WT_UNUSED(fh);
- WT_UNUSED(offset);
- WT_UNUSED(len);
- WT_UNUSED(advice);
+ WT_RET_MSG(session, ret,
+ "%s: handle-advise: posix_fadvise", file_handle->name);
- /* Quietly fail, callers expect not-supported failures. */
- return (ENOTSUP);
-#endif
}
+#endif
/*
- * __posix_handle_close --
- * ANSI C close/fclose.
+ * __posix_file_close --
+ * ANSI C close.
*/
static int
-__posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh)
+__posix_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
- if (fh->fp == NULL) {
- WT_SYSCALL_RETRY(close(fh->fd), ret);
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name);
- }
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- /* If the stream was opened for writing, flush the file. */
- if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) {
- ret = __wt_errno();
- __wt_err(session, ret, "%s: handle-close: fflush", fh->name);
+ /* Close the file handle. */
+ if (pfh->fd != -1) {
+ WT_SYSCALL(close(pfh->fd), ret);
+ if (ret != 0)
+ __wt_err(session, ret,
+ "%s: handle-close: close", file_handle->name);
}
- /* Close the file. */
- if (fclose(fh->fp) != 0) {
- ret = __wt_errno();
- __wt_err(session, ret, "%s: handle-close: fclose", fh->name);
- }
+ __wt_free(session, file_handle->name);
+ __wt_free(session, pfh);
return (ret);
}
/*
- * __posix_handle_getc --
- * ANSI C fgetc.
- */
-static int
-__posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp)
-{
- if (fh->fp == NULL)
- WT_RET_MSG(session,
- ENOTSUP, "%s: handle-getc: no stream configured", fh->name);
-
- *chp = fgetc(fh->fp);
- if (*chp != EOF || !ferror(fh->fp))
- return (0);
- WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name);
-}
-
-/*
- * __posix_handle_lock --
+ * __posix_file_lock --
* Lock/unlock a file.
*/
static int
-__posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
+__posix_file_lock(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
{
struct flock fl;
WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
/*
* WiredTiger requires this function be able to acquire locks past
@@ -369,44 +309,32 @@ __posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
fl.l_type = lock ? F_WRLCK : F_UNLCK;
fl.l_whence = SEEK_SET;
- WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret);
+ WT_SYSCALL(fcntl(pfh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret);
if (ret == 0)
return (0);
- WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name);
-}
-
-/*
- * __posix_handle_printf --
- * ANSI C vfprintf.
- */
-static int
-__posix_handle_printf(
- WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
-{
- if (fh->fp == NULL)
- WT_RET_MSG(session, ENOTSUP,
- "%s: vfprintf: no stream configured", fh->name);
-
- if (vfprintf(fh->fp, fmt, ap) >= 0)
- return (0);
- WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name);
+ WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", file_handle->name);
}
/*
- * __posix_handle_read --
+ * __posix_file_read --
* POSIX pread.
*/
static int
-__posix_handle_read(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+__posix_file_read(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
{
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
size_t chunk;
ssize_t nr;
uint8_t *addr;
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
/* Assert direct I/O is aligned and a multiple of the alignment. */
WT_ASSERT(session,
- !fh->direct_io ||
+ !pfh->direct_io ||
S2C(session)->buffer_alignment == 0 ||
(!((uintptr_t)buf &
(uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
@@ -416,79 +344,122 @@ __posix_handle_read(
/* Break reads larger than 1GB into 1GB chunks. */
for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
chunk = WT_MIN(len, WT_GIGABYTE);
- if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0)
+ if ((nr = pread(pfh->fd, addr, chunk, offset)) <= 0)
WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(),
"%s: handle-read: pread: failed to read %"
WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- fh->name, chunk, (uintmax_t)offset);
+ file_handle->name, chunk, (uintmax_t)offset);
}
return (0);
}
/*
- * __posix_handle_size --
+ * __posix_file_size --
* Get the size of a file in bytes, by file handle.
*/
static int
-__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+__posix_file_size(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
{
struct stat sb;
WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret);
+ WT_SYSCALL(fstat(pfh->fd, &sb), ret);
if (ret == 0) {
*sizep = sb.st_size;
return (0);
}
- WT_RET_MSG(session, ret, "%s: handle-size: fstat", fh->name);
+ WT_RET_MSG(session, ret, "%s: handle-size: fstat", file_handle->name);
}
/*
- * __posix_handle_sync --
- * POSIX fflush/fsync.
+ * __posix_file_sync --
+ * POSIX fsync.
*/
static int
-__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+__posix_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- if (fh->fp == NULL)
- return (__posix_sync(
- session, fh->fd, fh->name, "handle-sync", block));
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
- if (fflush(fh->fp) == 0)
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ return (
+ __posix_sync(session, pfh->fd, file_handle->name, "handle-sync"));
+}
+
+#ifdef HAVE_SYNC_FILE_RANGE
+/*
+ * __posix_file_sync_nowait --
+ * POSIX fsync.
+ */
+static int
+__posix_file_sync_nowait(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
+{
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ WT_SYSCALL_RETRY(sync_file_range(pfh->fd,
+ (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret);
+ if (ret == 0)
return (0);
- WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name);
+ WT_RET_MSG(session, ret,
+ "%s: handle-sync-nowait: sync_file_range", file_handle->name);
}
+#endif
/*
- * __posix_handle_truncate --
+ * __posix_file_truncate --
* POSIX ftruncate.
*/
static int
-__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+__posix_file_truncate(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len)
{
WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret);
+ WT_SYSCALL_RETRY(ftruncate(pfh->fd, len), ret);
if (ret == 0)
return (0);
- WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", fh->name);
+ WT_RET_MSG(session, ret,
+ "%s: handle-truncate: ftruncate", file_handle->name);
}
/*
- * __posix_handle_write --
+ * __posix_file_write --
* POSIX pwrite.
*/
static int
-__posix_handle_write(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+__posix_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
+ wt_off_t offset, size_t len, const void *buf)
{
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
size_t chunk;
ssize_t nw;
const uint8_t *addr;
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
/* Assert direct I/O is aligned and a multiple of the alignment. */
WT_ASSERT(session,
- !fh->direct_io ||
+ !pfh->direct_io ||
S2C(session)->buffer_alignment == 0 ||
(!((uintptr_t)buf &
(uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
@@ -498,21 +469,21 @@ __posix_handle_write(WT_SESSION_IMPL *session,
/* Break writes larger than 1GB into 1GB chunks. */
for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
chunk = WT_MIN(len, WT_GIGABYTE);
- if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0)
+ if ((nw = pwrite(pfh->fd, addr, chunk, offset)) < 0)
WT_RET_MSG(session, __wt_errno(),
"%s: handle-write: pwrite: failed to write %"
WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- fh->name, chunk, (uintmax_t)offset);
+ file_handle->name, chunk, (uintmax_t)offset);
}
return (0);
}
/*
- * __posix_handle_open_cloexec --
+ * __posix_open_file_cloexec --
* Prevent child access to file handles.
*/
static inline int
-__posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name)
+__posix_open_file_cloexec(WT_SESSION_IMPL *session, int fd, const char *name)
{
#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC)
int f;
@@ -537,28 +508,35 @@ __posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name)
}
/*
- * __posix_handle_open --
+ * __posix_open_file --
* Open a file handle.
*/
static int
-__posix_handle_open(WT_SESSION_IMPL *session,
- WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags)
+__posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ WT_FILE_HANDLE **file_handlep)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_FILE_HANDLE *file_handle;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
mode_t mode;
- int f, fd, tret;
- bool direct_io;
- const char *stream_mode;
+ int f;
+ WT_UNUSED(file_system);
+
+ *file_handlep = NULL;
+
+ session = (WT_SESSION_IMPL *)wt_session;
conn = S2C(session);
- direct_io = false;
+
+ WT_RET(__wt_calloc_one(session, &pfh));
/* Set up error handling. */
- fh->fd = fd = -1;
- fh->fp = NULL;
+ pfh->fd = -1;
- if (file_type == WT_FILE_TYPE_DIRECTORY) {
+ if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) {
f = O_RDONLY;
#ifdef O_CLOEXEC
/*
@@ -569,10 +547,10 @@ __posix_handle_open(WT_SESSION_IMPL *session,
f |= O_CLOEXEC;
#endif
WT_SYSCALL_RETRY((
- (fd = open(name, f, 0444)) == -1 ? -1 : 0), ret);
+ (pfh->fd = open(name, f, 0444)) == -1 ? -1 : 0), ret);
if (ret != 0)
WT_ERR_MSG(session, ret, "%s: handle-open: open", name);
- WT_ERR(__posix_handle_open_cloexec(session, fd, name));
+ WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
goto directory_open;
}
@@ -598,28 +576,20 @@ __posix_handle_open(WT_SESSION_IMPL *session,
f |= O_CLOEXEC;
#endif
#ifdef O_DIRECT
- /*
- * Direct I/O: file-type is a flag from the set of possible flags stored
- * in the connection handle during configuration, check for a match.
- * Also, "direct_io=checkpoint" configures direct I/O for readonly data
- * files.
- */
- if (FLD_ISSET(conn->direct_io, file_type) ||
- (LF_ISSET(WT_OPEN_READONLY) &&
- file_type == WT_FILE_TYPE_DATA &&
- FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) {
+ /* Direct I/O. */
+ if (LF_ISSET(WT_OPEN_DIRECTIO)) {
f |= O_DIRECT;
- direct_io = true;
- }
+ pfh->direct_io = true;
+ } else
+ pfh->direct_io = false;
#endif
- fh->direct_io = direct_io;
#ifdef O_NOATIME
/* Avoid updating metadata for read-only workloads. */
- if (file_type == WT_FILE_TYPE_DATA)
+ if (file_type == WT_OPEN_FILE_TYPE_DATA)
f |= O_NOATIME;
#endif
- if (file_type == WT_FILE_TYPE_LOG &&
+ if (file_type == WT_OPEN_FILE_TYPE_LOG &&
FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
#ifdef O_DSYNC
f |= O_DSYNC;
@@ -631,115 +601,122 @@ __posix_handle_open(WT_SESSION_IMPL *session,
#endif
}
- WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
+ WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
if (ret != 0)
WT_ERR_MSG(session, ret,
- direct_io ?
+ pfh->direct_io ?
"%s: handle-open: open: failed with direct I/O configured, "
"some filesystem types do not support direct I/O" :
"%s: handle-open: open", name);
- WT_ERR(__posix_handle_open_cloexec(session, fd, name));
+ WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
- /* Disable read-ahead on trees: it slows down random read workloads. */
#if defined(HAVE_POSIX_FADVISE)
- if (file_type == WT_FILE_TYPE_DATA) {
- WT_SYSCALL_RETRY(
- posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret);
+ /*
+ * Disable read-ahead on trees: it slows down random read workloads.
+ * Ignore fadvise when doing direct I/O, the kernel cache isn't
+ * interesting.
+ */
+ if (!pfh->direct_io && file_type == WT_OPEN_FILE_TYPE_DATA) {
+ WT_SYSCALL(
+ posix_fadvise(pfh->fd, 0, 0, POSIX_FADV_RANDOM), ret);
if (ret != 0)
WT_ERR_MSG(session, ret,
"%s: handle-open: posix_fadvise", name);
}
#endif
- /* Optionally configure a stdio stream API. */
- switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) {
- case WT_STREAM_APPEND:
- stream_mode = "a";
- F_SET(fh, WT_FH_FLUSH_ON_CLOSE);
- break;
- case WT_STREAM_READ:
- stream_mode = "r";
- break;
- case WT_STREAM_WRITE:
- stream_mode = "w";
- F_SET(fh, WT_FH_FLUSH_ON_CLOSE);
- break;
- case 0:
- default:
- stream_mode = NULL;
- break;
- }
- if (stream_mode != NULL) {
- if ((fh->fp = fdopen(fd, stream_mode)) == NULL)
- WT_ERR_MSG(session, __wt_errno(),
- "%s: handle-open: fdopen", name);
- if (LF_ISSET(WT_STREAM_LINE_BUFFER))
- __wt_stream_set_line_buffer(fh->fp);
- }
-
directory_open:
- fh->fd = fd;
-
- /* Configure fallocate calls. */
- __wt_posix_handle_allocate_configure(session, fh);
-
- fh->fh_advise = __posix_handle_advise;
- fh->fh_allocate = __wt_posix_handle_allocate;
- fh->fh_close = __posix_handle_close;
- fh->fh_getc = __posix_handle_getc;
- fh->fh_lock = __posix_handle_lock;
- fh->fh_map = __wt_posix_map;
- fh->fh_map_discard = __wt_posix_map_discard;
- fh->fh_map_preload = __wt_posix_map_preload;
- fh->fh_map_unmap = __wt_posix_map_unmap;
- fh->fh_printf = __posix_handle_printf;
- fh->fh_read = __posix_handle_read;
- fh->fh_size = __posix_handle_size;
- fh->fh_sync = __posix_handle_sync;
- fh->fh_truncate = __posix_handle_truncate;
- fh->fh_write = __posix_handle_write;
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)pfh;
+ WT_ERR(__wt_strdup(session, name, &file_handle->name));
+
+ file_handle->close = __posix_file_close;
+#if defined(HAVE_POSIX_FADVISE)
+ /*
+ * Ignore fadvise when doing direct I/O, the kernel cache isn't
+ * interesting.
+ */
+ if (!pfh->direct_io)
+ file_handle->fh_advise = __posix_file_advise;
+#endif
+ file_handle->fh_allocate = __wt_posix_file_fallocate;
+ file_handle->fh_lock = __posix_file_lock;
+#ifdef WORDS_BIGENDIAN
+ /*
+ * The underlying objects are little-endian, mapping objects isn't
+ * currently supported on big-endian systems.
+ */
+#else
+ file_handle->fh_map = __wt_posix_map;
+#ifdef HAVE_POSIX_MADVISE
+ file_handle->fh_map_discard = __wt_posix_map_discard;
+ file_handle->fh_map_preload = __wt_posix_map_preload;
+#endif
+ file_handle->fh_unmap = __wt_posix_unmap;
+#endif
+ file_handle->fh_read = __posix_file_read;
+ file_handle->fh_size = __posix_file_size;
+ file_handle->fh_sync = __posix_file_sync;
+#ifdef HAVE_SYNC_FILE_RANGE
+ file_handle->fh_sync_nowait = __posix_file_sync_nowait;
+#endif
+ file_handle->fh_truncate = __posix_file_truncate;
+ file_handle->fh_write = __posix_file_write;
+
+ *file_handlep = file_handle;
return (0);
-err: if (fd != -1) {
- WT_SYSCALL_RETRY(close(fd), tret);
- if (tret != 0)
- __wt_err(session, tret, "%s: handle-open: close", name);
- }
+err: WT_TRET(__posix_file_close((WT_FILE_HANDLE *)pfh, wt_session));
return (ret);
}
/*
- * __wt_os_posix --
- * Initialize a POSIX configuration.
+ * __posix_terminate --
+ * Terminate a POSIX configuration.
*/
-int
-__wt_os_posix(WT_SESSION_IMPL *session)
+static int
+__posix_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session)
{
- WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
- conn = S2C(session);
+ WT_UNUSED(file_system);
- /* Initialize the POSIX jump table. */
- conn->file_directory_list = __wt_posix_directory_list;
- conn->file_directory_sync = __posix_directory_sync;
- conn->file_exist = __posix_file_exist;
- conn->file_remove = __posix_file_remove;
- conn->file_rename = __posix_file_rename;
- conn->file_size = __posix_file_size;
- conn->handle_open = __posix_handle_open;
+ session = (WT_SESSION_IMPL *)wt_session;
+ __wt_free(session, file_system);
return (0);
}
/*
- * __wt_os_posix_cleanup --
- * Discard a POSIX configuration.
+ * __wt_os_posix --
+ * Initialize a POSIX configuration.
*/
int
-__wt_os_posix_cleanup(WT_SESSION_IMPL *session)
+__wt_os_posix(WT_SESSION_IMPL *session)
{
- WT_UNUSED(session);
+ WT_CONNECTION_IMPL *conn;
+ WT_FILE_SYSTEM *file_system;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_calloc_one(session, &file_system));
+
+ /* Initialize the POSIX jump table. */
+ file_system->fs_directory_list = __wt_posix_directory_list;
+ file_system->fs_directory_list_free = __wt_posix_directory_list_free;
+#ifdef __linux__
+ file_system->fs_directory_sync = __posix_directory_sync;
+#endif
+ file_system->fs_exist = __posix_fs_exist;
+ file_system->fs_open_file = __posix_open_file;
+ file_system->fs_remove = __posix_fs_remove;
+ file_system->fs_rename = __posix_fs_rename;
+ file_system->fs_size = __posix_fs_size;
+ file_system->terminate = __posix_terminate;
+
+ /* Switch it into place. */
+ conn->file_system = file_system;
return (0);
}
diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c
index e161e268f6d..b33f6d82e34 100644
--- a/src/os_posix/os_map.c
+++ b/src/os_posix/os_map.c
@@ -13,23 +13,26 @@
* Map a file into memory.
*/
int
-__wt_posix_map(WT_SESSION_IMPL *session,
- WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie)
+__wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session,
+ void *mapped_regionp, size_t *lenp, void *mapped_cookiep)
{
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
size_t len;
wt_off_t file_size;
void *map;
- WT_UNUSED(mappingcookie);
+ WT_UNUSED(mapped_cookiep);
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)fh;
/*
* Mapping isn't possible if direct I/O configured for the file, the
* Linux open(2) documentation says applications should avoid mixing
* mmap(2) of files with direct I/O to the same files.
*/
- if (fh->direct_io)
+ if (pfh->direct_io)
return (ENOTSUP);
/*
@@ -37,7 +40,7 @@ __wt_posix_map(WT_SESSION_IMPL *session,
* underneath us, our caller needs to ensure consistency of the mapped
* region vs. any other file activity.
*/
- WT_RET(__wt_filesize(session, fh, &file_size));
+ WT_RET(fh->fh_size(fh, wt_session, &file_size));
len = (size_t)file_size;
(void)__wt_verbose(session, WT_VERB_HANDLEOPS,
@@ -49,43 +52,48 @@ __wt_posix_map(WT_SESSION_IMPL *session,
MAP_NOCORE |
#endif
MAP_PRIVATE,
- fh->fd, (wt_off_t)0)) == MAP_FAILED)
+ pfh->fd, (wt_off_t)0)) == MAP_FAILED)
WT_RET_MSG(session,
__wt_errno(), "%s: memory-map: mmap", fh->name);
- *(void **)mapp = map;
+ *(void **)mapped_regionp = map;
*lenp = len;
return (0);
}
#ifdef HAVE_POSIX_MADVISE
/*
- * __posix_map_preload_madvise --
+ * __wt_posix_map_preload --
* Cause a section of a memory map to be faulted in.
*/
-static int
-__posix_map_preload_madvise(
- WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size)
+int
+__wt_posix_map_preload(WT_FILE_HANDLE *fh,
+ WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie)
{
WT_BM *bm;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
void *blk;
+ WT_UNUSED(mapped_cookie);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
conn = S2C(session);
bm = S2BT(session)->bm;
/* Linux requires the address be aligned to a 4KB boundary. */
- blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1));
- size += WT_PTRDIFF(p, blk);
+ blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
+ length += WT_PTRDIFF(map, blk);
/* XXX proxy for "am I doing a scan?" -- manual read-ahead */
if (F_ISSET(session, WT_SESSION_NO_CACHE)) {
/* Read in 2MB blocks every 1MB of data. */
- if (((uintptr_t)((uint8_t *)blk + size) &
+ if (((uintptr_t)((uint8_t *)blk + length) &
(uintptr_t)((1<<20) - 1)) < (uintptr_t)blk)
return (0);
- size = WT_MIN(WT_MAX(20 * size, 2 << 20),
+ length = WT_MIN(WT_MAX(20 * length, 2 << 20),
WT_PTRDIFF((uint8_t *)bm->map + bm->maplen, blk));
}
@@ -93,10 +101,12 @@ __posix_map_preload_madvise(
* Manual pages aren't clear on whether alignment is required for the
* size, so we will be conservative.
*/
- size &= ~(size_t)(conn->page_size - 1);
+ length &= ~(size_t)(conn->page_size - 1);
+ if (length <= (size_t)conn->page_size)
+ return (0);
- if (size <= (size_t)conn->page_size ||
- (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) == 0)
+ WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_WILLNEED), ret);
+ if (ret == 0)
return (0);
WT_RET_MSG(session, ret,
@@ -105,46 +115,31 @@ __posix_map_preload_madvise(
}
#endif
-/*
- * __wt_posix_map_preload --
- * Cause a section of a memory map to be faulted in.
- */
-int
-__wt_posix_map_preload(
- WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
-
-#ifdef HAVE_POSIX_MADVISE
- return (__posix_map_preload_madvise(session, fh, p, size));
-#else
- WT_UNUSED(fh);
- WT_UNUSED(p);
- WT_UNUSED(size);
- return (ENOTSUP);
-#endif
-}
-
#ifdef HAVE_POSIX_MADVISE
/*
- * __posix_map_discard_madvise --
+ * __wt_posix_map_discard --
* Discard a chunk of the memory map.
*/
-static int
-__posix_map_discard_madvise(
- WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size)
+int
+__wt_posix_map_discard(WT_FILE_HANDLE *fh,
+ WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
void *blk;
+ WT_UNUSED(mapped_cookie);
+
+ session = (WT_SESSION_IMPL *)wt_session;
conn = S2C(session);
/* Linux requires the address be aligned to a 4KB boundary. */
- blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1));
- size += WT_PTRDIFF(p, blk);
+ blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
+ length += WT_PTRDIFF(map, blk);
- if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) == 0)
+ WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_DONTNEED), ret);
+ if (ret == 0)
return (0);
WT_RET_MSG(session, ret,
@@ -154,41 +149,23 @@ __posix_map_discard_madvise(
#endif
/*
- * __wt_posix_map_discard --
- * Discard a chunk of the memory map.
- */
-int
-__wt_posix_map_discard(
- WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
-
-#ifdef HAVE_POSIX_MADVISE
- return (__posix_map_discard_madvise(session, fh, p, size));
-#else
- WT_UNUSED(fh);
- WT_UNUSED(p);
- WT_UNUSED(size);
- return (ENOTSUP);
-#endif
-}
-
-/*
- * __wt_posix_map_unmap --
+ * __wt_posix_unmap --
* Remove a memory mapping.
*/
int
-__wt_posix_map_unmap(WT_SESSION_IMPL *session,
- WT_FH *fh, void *map, size_t len, void **mappingcookie)
+__wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session,
+ void *mapped_region, size_t len, void *mapped_cookie)
{
- WT_UNUSED(mappingcookie);
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(mapped_cookie);
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+ session = (WT_SESSION_IMPL *)wt_session;
(void)__wt_verbose(session, WT_VERB_HANDLEOPS,
"%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len);
- if (munmap(map, len) == 0)
+ if (munmap(mapped_region, len) == 0)
return (0);
WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name);
diff --git a/src/os_posix/os_thread.c b/src/os_posix/os_thread.c
index 35a23622ddc..e57a308c9b0 100644
--- a/src/os_posix/os_thread.c
+++ b/src/os_posix/os_thread.c
@@ -34,7 +34,7 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid)
{
WT_DECL_RET;
- WT_SYSCALL_RETRY(pthread_join(tid, NULL), ret);
+ WT_SYSCALL(pthread_join(tid, NULL), ret);
if (ret == 0)
return (0);
diff --git a/src/os_posix/os_time.c b/src/os_posix/os_time.c
index 0e5a1cdadfb..b1b22a8e684 100644
--- a/src/os_posix/os_time.c
+++ b/src/os_posix/os_time.c
@@ -18,14 +18,14 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
WT_DECL_RET;
#if defined(HAVE_CLOCK_GETTIME)
- WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
+ WT_SYSCALL(clock_gettime(CLOCK_REALTIME, tsp), ret);
if (ret == 0)
return (0);
WT_RET_MSG(session, ret, "clock_gettime");
#elif defined(HAVE_GETTIMEOFDAY)
struct timeval v;
- WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret);
+ WT_SYSCALL(gettimeofday(&v, NULL), ret);
if (ret == 0) {
tsp->tv_sec = v.tv_sec;
tsp->tv_nsec = v.tv_usec * WT_THOUSAND;
diff --git a/src/os_win/os_dir.c b/src/os_win/os_dir.c
index 64eae60983c..dccacc1e446 100644
--- a/src/os_win/os_dir.c
+++ b/src/os_win/os_dir.c
@@ -13,38 +13,46 @@
* Get a list of files from a directory, MSVC version.
*/
int
-__wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir,
- const char *prefix, uint32_t flags, char ***dirlist, u_int *countp)
+__wt_win_directory_list(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
{
+ DWORD windows_error;
HANDLE findhandle;
WIN32_FIND_DATA finddata;
WT_DECL_ITEM(pathbuf);
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
size_t dirallocsz, pathlen;
- u_int count, dirsz;
- bool match;
- char **entries, *path;
+ uint32_t count;
+ char *dir_copy, **entries;
- *dirlist = NULL;
- *countp = 0;
+ WT_UNUSED(file_system);
- WT_RET(__wt_filename(session, dir, &path));
+ session = (WT_SESSION_IMPL *)wt_session;
- pathlen = strlen(path);
- if (path[pathlen - 1] == '\\')
- path[pathlen - 1] = '\0';
- WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf));
- WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", path));
+ *dirlistp = NULL;
+ *countp = 0;
findhandle = INVALID_HANDLE_VALUE;
dirallocsz = 0;
- dirsz = 0;
entries = NULL;
+ WT_ERR(__wt_strdup(session, directory, &dir_copy));
+ pathlen = strlen(dir_copy);
+ if (dir_copy[pathlen - 1] == '\\')
+ dir_copy[pathlen - 1] = '\0';
+ WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf));
+ WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", dir_copy));
+
findhandle = FindFirstFileA(pathbuf->data, &finddata);
- if (findhandle == INVALID_HANDLE_VALUE)
- WT_ERR_MSG(session, __wt_getlasterror(),
- "%s: directory-list: FindFirstFile", pathbuf->data);
+ if (findhandle == INVALID_HANDLE_VALUE) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: directory-list: FindFirstFile: %s",
+ pathbuf->data, __wt_formatmessage(session, windows_error));
+ WT_ERR(__wt_map_windows_error(windows_error));
+ }
count = 0;
do {
@@ -56,46 +64,63 @@ __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir,
continue;
/* The list of files is optionally filtered by a prefix. */
- match = false;
if (prefix != NULL &&
- ((LF_ISSET(WT_DIRLIST_INCLUDE) &&
- WT_PREFIX_MATCH(finddata.cFileName, prefix)) ||
- (LF_ISSET(WT_DIRLIST_EXCLUDE) &&
- !WT_PREFIX_MATCH(finddata.cFileName, prefix))))
- match = true;
- if (prefix == NULL || match) {
- /*
- * We have a file name we want to return.
- */
- count++;
- if (count > dirsz) {
- dirsz += WT_DIR_ENTRY;
- WT_ERR(__wt_realloc_def(session,
- &dirallocsz, dirsz, &entries));
- }
- WT_ERR(__wt_strdup(session,
- finddata.cFileName, &entries[count - 1]));
- }
+ !WT_PREFIX_MATCH(finddata.cFileName, prefix))
+ continue;
+
+ WT_ERR(__wt_realloc_def(
+ session, &dirallocsz, count + 1, &entries));
+ WT_ERR(__wt_strdup(
+ session, finddata.cFileName, &entries[count]));
+ ++count;
} while (FindNextFileA(findhandle, &finddata) != 0);
- if (count > 0)
- *dirlist = entries;
+
+ *dirlistp = entries;
*countp = count;
err: if (findhandle != INVALID_HANDLE_VALUE)
- (void)FindClose(findhandle);
- __wt_free(session, path);
+ if (FindClose(findhandle) == 0) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: directory-list: FindClose: %s",
+ pathbuf->data,
+ __wt_formatmessage(session, windows_error));
+ if (ret == 0)
+ ret = __wt_map_windows_error(windows_error);
+ }
+
+ __wt_free(session, dir_copy);
__wt_scr_free(session, &pathbuf);
if (ret == 0)
return (0);
- if (*dirlist != NULL) {
- for (count = dirsz; count > 0; count--)
- __wt_free(session, entries[count]);
- __wt_free(session, entries);
- }
+ WT_TRET(__wt_win_directory_list_free(
+ file_system, wt_session, entries, count));
WT_RET_MSG(session, ret,
"%s: directory-list, prefix \"%s\"",
- dir, prefix == NULL ? "" : prefix);
+ directory, prefix == NULL ? "" : prefix);
+}
+
+/*
+ * __wt_win_directory_list_free --
+ * Free memory returned by __wt_win_directory_list, Windows version.
+ */
+int
+__wt_win_directory_list_free(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, char **dirlist, uint32_t count)
+{
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(file_system);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ if (dirlist != NULL) {
+ while (count > 0)
+ __wt_free(session, dirlist[--count]);
+ __wt_free(session, dirlist);
+ }
+ return (0);
}
diff --git a/src/os_win/os_dlopen.c b/src/os_win/os_dlopen.c
index ce949e4ea5f..3da47bf23a3 100644
--- a/src/os_win/os_dlopen.c
+++ b/src/os_win/os_dlopen.c
@@ -15,19 +15,23 @@
int
__wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
{
+ DWORD windows_error;
WT_DECL_RET;
WT_DLH *dlh;
WT_RET(__wt_calloc_one(session, &dlh));
WT_ERR(__wt_strdup(session, path, &dlh->name));
+ WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name));
/* NULL means load from the current binary */
if (path == NULL) {
if (GetModuleHandleExA(
0, NULL, (HMODULE *)&dlh->handle) == FALSE) {
- ret = __wt_getlasterror();
- WT_ERR_MSG(session, ret,
- "GetModuleHandleEx(%s): %s", path, 0);
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "GetModuleHandleEx: %s: %s",
+ path, __wt_formatmessage(session, windows_error));
+ WT_ERR(__wt_map_windows_error(windows_error));
}
} else {
// TODO: load dll here
@@ -50,14 +54,20 @@ int
__wt_dlsym(WT_SESSION_IMPL *session,
WT_DLH *dlh, const char *name, bool fail, void *sym_ret)
{
+ DWORD windows_error;
void *sym;
*(void **)sym_ret = NULL;
sym = GetProcAddress(dlh->handle, name);
- if (sym == NULL && fail)
- WT_RET_MSG(session, __wt_getlasterror(),
- "GetProcAddress(%s in %s)", name, dlh->name);
+ if (sym == NULL && fail) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "GetProcAddress: %s in %s: %s",
+ name, dlh->name,
+ __wt_formatmessage(session, windows_error));
+ WT_RET(__wt_map_windows_error(windows_error));
+ }
*(void **)sym_ret = sym;
return (0);
@@ -70,11 +80,14 @@ __wt_dlsym(WT_SESSION_IMPL *session,
int
__wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh)
{
+ DWORD windows_error;
WT_DECL_RET;
if (FreeLibrary(dlh->handle) == FALSE) {
- ret = __wt_getlasterror();
- __wt_err(session, ret, "FreeLibrary: %s", dlh->name);
+ windows_error = __wt_getlasterror();
+ __wt_errx(session, "FreeLibrary: %s: %s",
+ dlh->name, __wt_formatmessage(session, windows_error));
+ ret = __wt_map_windows_error(windows_error);
}
__wt_free(session, dlh->name);
diff --git a/src/os_win/os_errno.c b/src/os_win/os_errno.c
deleted file mode 100644
index f3fffd5ef42..00000000000
--- a/src/os_win/os_errno.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-static const int windows_error_offset = -29000;
-
-/*
- * __wt_map_error_to_windows_error --
- * Return a negative integer, an encoded Windows error
- * Standard C errors are positive integers from 0 - ~200
- * Windows errors are from 0 - 15999 according to the documentation
- */
-static DWORD
-__wt_map_error_to_windows_error(int error)
-{
- /*
- * Ensure we do not exceed the error range
- * Also validate we do not get any COM errors
- * (which are negative integers)
- */
- WT_ASSERT(NULL, error < 0);
-
- return (error + -(windows_error_offset));
-}
-
-/*
- * __wt_map_windows_error_to_error --
- * Return a positive integer, a decoded Windows error
- */
-static int
-__wt_map_windows_error_to_error(DWORD winerr)
-{
- return (winerr + windows_error_offset);
-}
-
-/*
- * __wt_map_error_rdonly --
- * Map an error into a WiredTiger error code specific for
- * read-only operation which intercepts based on certain types
- * of failures.
- */
-int
-__wt_map_error_rdonly(int error)
-{
- if (error == ERROR_FILE_NOT_FOUND)
- return (WT_NOTFOUND);
- else if (error == ERROR_ACCESS_DENIED)
- return (WT_PERM_DENIED);
- return (error);
-}
-
-/*
- * __wt_errno --
- * Return errno, or WT_ERROR if errno not set.
- */
-int
-__wt_errno(void)
-{
- /*
- * Check for 0:
- * It's easy to introduce a problem by calling the wrong error function,
- * for example, this function when the MSVC function set the C runtime
- * error value. Handle gracefully and always return an error.
- */
- return (errno == 0 ? WT_ERROR : errno);
-}
-
-/*
- * __wt_getlasterror --
- * Return GetLastError, or WT_ERROR if error not set.
- */
-int
-__wt_getlasterror(void)
-{
- /*
- * Called when we know an error occurred, and we want the system
- * error code.
- */
- DWORD err = GetLastError();
-
- /*
- * Check for ERROR_SUCCESS:
- * It's easy to introduce a problem by calling the wrong error function,
- * for example, this function when the MSVC function set the C runtime
- * error value. Handle gracefully and always return an error.
- */
- return (err == ERROR_SUCCESS ?
- WT_ERROR : __wt_map_windows_error_to_error(err));
-}
-
-/*
- * __wt_strerror --
- * Windows implementation of WT_SESSION.strerror and wiredtiger_strerror.
- */
-const char *
-__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen)
-{
- DWORD lasterror;
- const char *p;
- char buf[512];
-
- /*
- * Check for a WiredTiger or POSIX constant string, no buffer needed.
- */
- if ((p = __wt_wiredtiger_error(error)) != NULL)
- return (p);
-
- /*
- * When called from wiredtiger_strerror, write a passed-in buffer.
- * When called from WT_SESSION.strerror, write the session's buffer.
- *
- * Check for Windows errors.
- */
- if (error < 0) {
- error = __wt_map_error_to_windows_error(error);
-
- lasterror = FormatMessageA(
- FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL,
- error,
- 0, /* let system choose the correct LANGID */
- buf,
- sizeof(buf),
- NULL);
-
- if (lasterror != 0 && session == NULL &&
- snprintf(errbuf, errlen, "%s", buf) > 0)
- return (errbuf);
- if (lasterror != 0 && session != NULL &&
- __wt_buf_fmt(session, &session->err, "%s", buf) == 0)
- return (session->err.data);
- }
-
- /* Fallback to a generic message. */
- if (session == NULL &&
- snprintf(errbuf, errlen, "error return: %d", error) > 0)
- return (errbuf);
- if (session != NULL && __wt_buf_fmt(
- session, &session->err, "error return: %d", error) == 0)
- return (session->err.data);
-
- /* Defeated. */
- return ("Unable to return error string");
-}
diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c
index 4ac613fc9f9..5daba124e90 100644
--- a/src/os_win/os_fs.c
+++ b/src/os_win/os_fs.c
@@ -9,34 +9,21 @@
#include "wt_internal.h"
/*
- * __win_directory_sync --
- * Flush a directory to ensure a file creation is durable.
- */
-static int
-__win_directory_sync(WT_SESSION_IMPL *session, const char *path)
-{
- WT_UNUSED(session);
- WT_UNUSED(path);
- return (0);
-}
-
-/*
- * __win_file_exist --
+ * __win_fs_exist --
* Return if the file exists.
*/
static int
-__win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
+__win_fs_exist(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *name, bool *existp)
{
WT_DECL_RET;
- char *path;
-
- WT_RET(__wt_filename(session, name, &path));
+ WT_SESSION_IMPL *session;
- ret = GetFileAttributesA(path);
+ WT_UNUSED(file_system);
- __wt_free(session, path);
+ session = (WT_SESSION_IMPL *)wt_session;
- if (ret != INVALID_FILE_ATTRIBUTES)
+ if (GetFileAttributesA(name) != INVALID_FILE_ATTRIBUTES)
*existp = true;
else
*existp = false;
@@ -45,57 +32,44 @@ __win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
}
/*
- * __win_file_remove --
+ * __win_fs_remove --
* Remove a file.
*/
static int
-__win_file_remove(WT_SESSION_IMPL *session, const char *name)
+__win_fs_remove(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name)
{
- WT_DECL_RET;
- char *path;
+ DWORD windows_error;
+ WT_SESSION_IMPL *session;
-#ifdef HAVE_DIAGNOSTIC
- if (__wt_handle_search(session, name, false, NULL, NULL))
- WT_RET_MSG(session, EINVAL,
- "%s: file-remove: file has open handles", name);
-#endif
+ WT_UNUSED(file_system);
- WT_RET(__wt_filename(session, name, &path));
- name = path;
+ session = (WT_SESSION_IMPL *)wt_session;
if (DeleteFileA(name) == FALSE) {
- ret = __wt_getlasterror();
- __wt_err(session, ret, "%s: file-remove: DeleteFileA", name);
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: file-remove: DeleteFileA: %s",
+ name, __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
-
- __wt_free(session, path);
- return (ret);
+ return (0);
}
/*
- * __win_file_rename --
+ * __win_fs_rename --
* Rename a file.
*/
static int
-__win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+__win_fs_rename(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *from, const char *to)
{
- WT_DECL_RET;
- char *from_path, *to_path;
+ DWORD windows_error;
+ WT_SESSION_IMPL *session;
-#ifdef HAVE_DIAGNOSTIC
- if (__wt_handle_search(session, from, false, NULL, NULL))
- WT_RET_MSG(session, EINVAL,
- "%s: file-rename: file has open handles", from);
- if (__wt_handle_search(session, to, false, NULL, NULL))
- WT_RET_MSG(session, EINVAL,
- "%s: file-rename: file has open handles", to);
-#endif
+ WT_UNUSED(file_system);
- from_path = to_path = NULL;
- WT_ERR(__wt_filename(session, from, &from_path));
- from = from_path;
- WT_ERR(__wt_filename(session, to, &to_path));
- to = to_path;
+ session = (WT_SESSION_IMPL *)wt_session;
/*
* Check if file exists since Windows does not override the file if
@@ -103,184 +77,114 @@ __win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
*/
if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES)
if (DeleteFileA(to) == FALSE) {
- ret = __wt_getlasterror();
- __wt_err(session, ret,
- "%s to %s: file-rename: rename", from, to);
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: file-rename: DeleteFileA: %s",
+ to, __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
- if (ret == 0 && MoveFileA(from, to) == FALSE) {
- ret = __wt_getlasterror();
- __wt_err(session, ret,
- "%s to %s: file-rename: rename", from, to);
+ if (MoveFileA(from, to) == FALSE) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s to %s: file-rename: MoveFileA: %s",
+ from, to, __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
-err: __wt_free(session, from_path);
- __wt_free(session, to_path);
- return (ret);
+ return (0);
}
/*
- * __win_file_size --
+ * __wt_win_fs_size --
* Get the size of a file in bytes, by file name.
*/
-static int
-__win_file_size(
- WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep)
+int
+__wt_win_fs_size(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
{
+ DWORD windows_error;
WIN32_FILE_ATTRIBUTE_DATA data;
- WT_DECL_RET;
- char *path;
-
- WT_RET(__wt_filename(session, name, &path));
+ WT_SESSION_IMPL *session;
- ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data);
+ WT_UNUSED(file_system);
- __wt_free(session, path);
+ session = (WT_SESSION_IMPL *)wt_session;
- if (ret != 0) {
+ if (GetFileAttributesExA(name, GetFileExInfoStandard, &data) != 0) {
*sizep =
((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow;
return (0);
}
- /*
- * Some callers of this function expect failure if the file doesn't
- * exist, and don't want an error message logged.
- */
- ret = __wt_getlasterror();
- if (!silent)
- WT_RET_MSG(session, ret,
- "%s: file-size: GetFileAttributesEx", name);
- return (ret);
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: file-size: GetFileAttributesEx: %s",
+ name, __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
/*
- * __win_handle_advise --
- * MSVC fadvise.
+ * __win_file_close --
+ * ANSI C close.
*/
static int
-__win_handle_advise(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, wt_off_t len, int advice)
+__win_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_UNUSED(session);
- WT_UNUSED(fh);
- WT_UNUSED(offset);
- WT_UNUSED(len);
- WT_UNUSED(advice);
-
- /* Quietly fail, callers expect not-supported failures. */
- return (ENOTSUP);
-}
+ DWORD windows_error;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
-/*
- * __win_handle_allocate_configure --
- * Configure fallocate behavior for a file handle.
- */
-static void
-__win_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_UNUSED(session);
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
/*
- * fallocate on Windows would be implemented using SetEndOfFile, which
- * can also truncate the file. WiredTiger expects fallocate to ignore
- * requests to truncate the file which Windows does not do, so we don't
- * support the call.
+ * Close the primary and secondary handles.
+ *
+ * We don't open Windows system handles when opening directories for
+ * flushing, as it's not necessary (or possible) to flush a directory
+ * on Windows. Confirm the file handle is open before closing it.
*/
- fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE;
- fh->fallocate_requires_locking = false;
-}
-
-/*
- * __win_handle_allocate --
- * Allocate space for a file handle.
- */
-static int
-__win_handle_allocate(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
-{
- WT_UNUSED(session);
- WT_UNUSED(fh);
- WT_UNUSED(offset);
- WT_UNUSED(len);
-
- WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name);
- return (ENOTSUP);
-}
-
-/*
- * __win_handle_close --
- * Close a file handle.
- */
-static int
-__win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_DECL_RET;
-
- if (fh->filehandle != INVALID_HANDLE_VALUE) {
- /*
- * We don't open Windows system handles when opening directories
- * for flushing, as it is not necessary (or possible) to flush
- * a directory on Windows. Confirm the file handle is set before
- * attempting to close it.
- */
- if (CloseHandle(fh->filehandle) == 0) {
- ret = __wt_getlasterror();
- __wt_err(session, ret,
- "%s: handle-close: CloseHandle", fh->name);
- }
+ if (win_fh->filehandle != INVALID_HANDLE_VALUE &&
+ CloseHandle(win_fh->filehandle) == 0) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: handle-close: CloseHandle: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ ret = __wt_map_windows_error(windows_error);
}
- if (fh->fp != NULL) {
- /* If the stream was opened for writing, flush the file. */
- if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) {
- ret = __wt_errno();
- __wt_err(session,
- ret, "%s: handle-close: fflush", fh->name);
- }
- /* Close the file, closing all the underlying handles. */
- if (fclose(fh->fp) != 0) {
- ret = __wt_errno();
- __wt_err(session,
- ret, "%s: handle-close: fclose", fh->name);
- }
+ if (win_fh->filehandle_secondary != INVALID_HANDLE_VALUE &&
+ CloseHandle(win_fh->filehandle_secondary) == 0) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: handle-close: secondary: CloseHandle: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ ret = __wt_map_windows_error(windows_error);
}
- /* Close the secondary handle. */
- if (fh->filehandle_secondary != INVALID_HANDLE_VALUE &&
- CloseHandle(fh->filehandle_secondary) == 0) {
- ret = __wt_getlasterror();
- __wt_err(session, ret,
- "%s: handle-close: secondary: CloseHandle", fh->name);
- }
+ __wt_free(session, file_handle->name);
+ __wt_free(session, win_fh);
return (ret);
}
/*
- * __win_handle_getc --
- * ANSI C fgetc.
- */
-static int
-__win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp)
-{
- if (fh->fp == NULL)
- WT_RET_MSG(session,
- ENOTSUP, "%s: handle-getc: no stream configured", fh->name);
-
- *chp = fgetc(fh->fp);
- if (*chp != EOF || !ferror(fh->fp))
- return (0);
- WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name);
-}
-
-/*
- * __win_handle_lock --
+ * __win_file_lock --
* Lock/unlock a file.
*/
static int
-__win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
+__win_file_lock(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
{
- WT_DECL_RET;
+ DWORD windows_error;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
/*
* WiredTiger requires this function be able to acquire locks past
@@ -293,54 +197,48 @@ __win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
* This is useful to coordinate adding records to the end of a file.
*/
if (lock) {
- if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) {
- ret = __wt_getlasterror();
- __wt_err(session, ret,
- "%s: handle-lock: LockFile", fh->name);
+ if (LockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: handle-lock: LockFile: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
} else
- if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) {
- ret = __wt_getlasterror();
- __wt_err(session, ret,
- "%s: handle-lock: UnlockFile", fh->name);
+ if (UnlockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: handle-lock: UnlockFile: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
- return (ret);
-}
-
-/*
- * __win_handle_printf --
- * ANSI C vfprintf.
- */
-static int
-__win_handle_printf(
- WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
-{
- if (fh->fp == NULL)
- WT_RET_MSG(session, ENOTSUP,
- "%s: vfprintf: no stream configured", fh->name);
-
- if (vfprintf(fh->fp, fmt, ap) >= 0)
- return (0);
- WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name);
+ return (0);
}
/*
- * __win_handle_read --
+ * __win_file_read --
* Read a chunk.
*/
static int
-__win_handle_read(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+__win_file_read(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
{
- DWORD chunk, nr;
+ DWORD chunk, nr, windows_error;
uint8_t *addr;
OVERLAPPED overlapped = { 0 };
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
nr = 0;
/* Assert direct I/O is aligned and a multiple of the alignment. */
WT_ASSERT(session,
- !fh->direct_io ||
+ !win_fh->direct_io ||
S2C(session)->buffer_alignment == 0 ||
(!((uintptr_t)buf &
(uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
@@ -353,44 +251,61 @@ __win_handle_read(
overlapped.Offset = UINT32_MAX & offset;
overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
- if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped))
- WT_RET_MSG(session,
- nr == 0 ? WT_ERROR : __wt_getlasterror(),
+ if (!ReadFile(
+ win_fh->filehandle, addr, chunk, &nr, &overlapped)) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
"%s: handle-read: ReadFile: failed to read %lu "
- "bytes at offset %" PRIuMAX,
- fh->name, chunk, (uintmax_t)offset);
+ "bytes at offset %" PRIuMAX ": %s",
+ file_handle->name, chunk, (uintmax_t)offset,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
+ }
}
return (0);
}
/*
- * __win_handle_size --
+ * __win_file_size --
* Get the size of a file in bytes, by file handle.
*/
static int
-__win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+__win_file_size(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
{
+ DWORD windows_error;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
LARGE_INTEGER size;
- if (GetFileSizeEx(fh->filehandle, &size) != 0) {
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ if (GetFileSizeEx(win_fh->filehandle, &size) != 0) {
*sizep = size.QuadPart;
return (0);
}
- WT_RET_MSG(session,
- __wt_getlasterror(), "%s: handle-size: GetFileSizeEx", fh->name);
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: handle-size: GetFileSizeEx: %s",
+ file_handle->name, __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
/*
- * __win_handle_sync --
- * MSVC fflush/fsync.
+ * __win_file_sync --
+ * MSVC fsync.
*/
static int
-__win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+__win_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_DECL_RET;
+ DWORD windows_error;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
/*
* We don't open Windows system handles when opening directories
@@ -398,76 +313,87 @@ __win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
* a directory on Windows. Confirm the file handle is set before
* attempting to sync it.
*/
- if (fh->fp == NULL && fh->filehandle == INVALID_HANDLE_VALUE)
+ if (win_fh->filehandle == INVALID_HANDLE_VALUE)
return (0);
- if (fh->fp == NULL) {
- /*
- * Callers attempting asynchronous flush handle ENOTSUP returns,
- * and won't make further attempts.
- */
- if (!block)
- return (ENOTSUP);
-
- if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE)
- WT_RET_MSG(session, __wt_getlasterror(),
- "%s handle-sync: FlushFileBuffers error", fh->name);
- return (0);
+ if (FlushFileBuffers(win_fh->filehandle) == FALSE) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s handle-sync: FlushFileBuffers: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
-
- if (fflush(fh->fp) == 0)
- return (0);
- WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name);
+ return (0);
}
/*
- * __win_handle_truncate --
+ * __win_file_truncate --
* Truncate a file.
*/
static int
-__win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+__win_file_truncate(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len)
{
- WT_DECL_RET;
+ DWORD windows_error;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
LARGE_INTEGER largeint;
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
largeint.QuadPart = len;
- if (fh->filehandle_secondary == INVALID_HANDLE_VALUE)
+ if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE)
WT_RET_MSG(session, EINVAL,
- "%s: handle-truncate: read-only", fh->name);
-
- if (SetFilePointerEx(
- fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE)
- WT_RET_MSG(session, __wt_getlasterror(),
- "%s: handle-truncate: SetFilePointerEx", fh->name);
+ "%s: handle-truncate: read-only", file_handle->name);
+
+ if (SetFilePointerEx(win_fh->filehandle_secondary,
+ largeint, NULL, FILE_BEGIN) == FALSE) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: handle-truncate: SetFilePointerEx: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
+ }
- if (SetEndOfFile(fh->filehandle_secondary) == FALSE) {
+ if (SetEndOfFile(win_fh->filehandle_secondary) == FALSE) {
if (GetLastError() == ERROR_USER_MAPPED_FILE)
return (EBUSY);
- WT_RET_MSG(session, __wt_getlasterror(),
- "%s: handle-truncate: SetEndOfFile error", fh->name);
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: handle-truncate: SetEndOfFile: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
return (0);
}
/*
- * __win_handle_write --
+ * __win_file_write --
* Write a chunk.
*/
static int
-__win_handle_write(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+__win_file_write(WT_FILE_HANDLE *file_handle,
+ WT_SESSION *wt_session, wt_off_t offset, size_t len, const void *buf)
{
- DWORD chunk;
- DWORD nw;
+ DWORD chunk, nw, windows_error;
const uint8_t *addr;
OVERLAPPED overlapped = { 0 };
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
nw = 0;
/* Assert direct I/O is aligned and a multiple of the alignment. */
WT_ASSERT(session,
- !fh->direct_io ||
+ !win_fh->direct_io ||
S2C(session)->buffer_alignment == 0 ||
(!((uintptr_t)buf &
(uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
@@ -480,38 +406,51 @@ __win_handle_write(WT_SESSION_IMPL *session,
overlapped.Offset = UINT32_MAX & offset;
overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
- if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped))
- WT_RET_MSG(session, __wt_getlasterror(),
+ if (!WriteFile(
+ win_fh->filehandle, addr, chunk, &nw, &overlapped)) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
"%s: handle-write: WriteFile: failed to write %lu "
- "bytes at offset %" PRIuMAX,
- fh->name, chunk, (uintmax_t)offset);
+ "bytes at offset %" PRIuMAX ": %s",
+ file_handle->name, chunk, (uintmax_t)offset,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
+ }
}
return (0);
}
/*
- * __win_handle_open --
+ * __win_open_file --
* Open a file handle.
*/
static int
-__win_handle_open(WT_SESSION_IMPL *session,
- WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags)
+__win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ WT_FILE_HANDLE **file_handlep)
{
- DWORD dwCreationDisposition;
- HANDLE filehandle, filehandle_secondary;
+ DWORD dwCreationDisposition, windows_error;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_FILE_HANDLE *file_handle;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
int desired_access, f;
- bool direct_io;
- const char *stream_mode;
+ WT_UNUSED(file_system);
+
+ *file_handlep = NULL;
+
+ session = (WT_SESSION_IMPL *)wt_session;
conn = S2C(session);
- direct_io = false;
+
+ WT_RET(__wt_calloc_one(session, &win_fh));
+
+ win_fh->direct_io = false;
/* Set up error handling. */
- fh->filehandle = fh->filehandle_secondary =
- filehandle = filehandle_secondary = INVALID_HANDLE_VALUE;
- fh->fp = NULL;
+ win_fh->filehandle =
+ win_fh->filehandle_secondary = INVALID_HANDLE_VALUE;
/*
* Opening a file handle on a directory is only to support filesystems
@@ -519,7 +458,7 @@ __win_handle_open(WT_SESSION_IMPL *session,
* require that functionality: create an empty WT_FH structure with
* invalid handles.
*/
- if (file_type == WT_FILE_TYPE_DIRECTORY)
+ if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY)
goto directory_open;
desired_access = GENERIC_READ;
@@ -544,47 +483,44 @@ __win_handle_open(WT_SESSION_IMPL *session,
} else
dwCreationDisposition = OPEN_EXISTING;
- /*
- * direct_io means no OS file caching. This requires aligned buffer
- * allocations like O_DIRECT.
- */
- if (FLD_ISSET(conn->direct_io, file_type) ||
- (LF_ISSET(WT_OPEN_READONLY) &&
- file_type == WT_FILE_TYPE_DATA &&
- FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) {
+ /* Direct I/O. */
+ if (LF_ISSET(WT_OPEN_DIRECTIO)) {
f |= FILE_FLAG_NO_BUFFERING;
- direct_io = true;
+ win_fh->direct_io = true;
}
- fh->direct_io = direct_io;
/* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */
if (FLD_ISSET(conn->write_through, file_type))
f |= FILE_FLAG_WRITE_THROUGH;
- if (file_type == WT_FILE_TYPE_LOG &&
+ if (file_type == WT_OPEN_FILE_TYPE_LOG &&
FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
f |= FILE_FLAG_WRITE_THROUGH;
/* Disable read-ahead on trees: it slows down random read workloads. */
- if (file_type == WT_FILE_TYPE_DATA)
+ if (file_type == WT_OPEN_FILE_TYPE_DATA)
f |= FILE_FLAG_RANDOM_ACCESS;
- filehandle = CreateFileA(name, desired_access,
+ win_fh->filehandle = CreateFileA(name, desired_access,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, dwCreationDisposition, f, NULL);
- if (filehandle == INVALID_HANDLE_VALUE) {
+ if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
if (LF_ISSET(WT_OPEN_CREATE) &&
GetLastError() == ERROR_FILE_EXISTS)
- filehandle = CreateFileA(name, desired_access,
+ win_fh->filehandle = CreateFileA(name, desired_access,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, f, NULL);
- if (filehandle == INVALID_HANDLE_VALUE)
- WT_ERR_MSG(session, __wt_getlasterror(),
- direct_io ?
+ if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ win_fh->direct_io ?
"%s: handle-open: CreateFileA: failed with direct "
"I/O configured, some filesystem types do not "
- "support direct I/O" :
- "%s: handle-open: CreateFileA", name);
+ "support direct I/O: %s" :
+ "%s: handle-open: CreateFileA: %s",
+ name, __wt_formatmessage(session, windows_error));
+ WT_ERR(__wt_map_windows_error(windows_error));
+ }
}
/*
@@ -593,78 +529,64 @@ __win_handle_open(WT_SESSION_IMPL *session,
* pointer.
*/
if (!LF_ISSET(WT_OPEN_READONLY)) {
- filehandle_secondary = CreateFileA(name, desired_access,
+ win_fh->filehandle_secondary = CreateFileA(name, desired_access,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, f, NULL);
- if (filehandle_secondary == INVALID_HANDLE_VALUE)
- WT_ERR_MSG(session, __wt_getlasterror(),
- "%s: handle-open: CreateFileA: secondary", name);
- }
-
- /* Optionally configure a stdio stream API. */
- switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) {
- case WT_STREAM_APPEND:
- f = _O_APPEND | _O_TEXT;
- stream_mode = "a";
- F_SET(fh, WT_FH_FLUSH_ON_CLOSE);
- break;
- case WT_STREAM_READ:
- f = _O_RDONLY | _O_TEXT;
- stream_mode = "r";
- break;
- case WT_STREAM_WRITE:
- f = _O_TEXT;
- stream_mode = "w";
- F_SET(fh, WT_FH_FLUSH_ON_CLOSE);
- break;
- case 0:
- default:
- stream_mode = NULL;
- break;
+ if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: handle-open: CreateFileA: secondary: %s",
+ name, __wt_formatmessage(session, windows_error));
+ WT_ERR(__wt_map_windows_error(windows_error));
+ }
}
- if (stream_mode != NULL) {
- if ((fh->fp = fopen(name, stream_mode)) == NULL)
- WT_ERR_MSG(session, __wt_errno(),
- "%s: handle-open: fopen", name);
- if (LF_ISSET(WT_STREAM_LINE_BUFFER))
- __wt_stream_set_line_buffer(fh->fp);
- }
+directory_open:
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)win_fh;
+ WT_ERR(__wt_strdup(session, name, &file_handle->name));
- /* Configure fallocate/posix_fallocate calls. */
- __win_handle_allocate_configure(session, fh);
+ file_handle->close = __win_file_close;
+ file_handle->fh_lock = __win_file_lock;
+#ifdef WORDS_BIGENDIAN
+ /*
+ * The underlying objects are little-endian, mapping objects isn't
+ * currently supported on big-endian systems.
+ */
+#else
+ file_handle->fh_map = __wt_win_map;
+ file_handle->fh_unmap = __wt_win_unmap;
+#endif
+ file_handle->fh_read = __win_file_read;
+ file_handle->fh_size = __win_file_size;
+ file_handle->fh_sync = __win_file_sync;
+ file_handle->fh_truncate = __win_file_truncate;
+ file_handle->fh_write = __win_file_write;
-directory_open:
- fh->filehandle = filehandle;
- fh->filehandle_secondary = filehandle_secondary;
-
- fh->fh_advise = __win_handle_advise;
- fh->fh_allocate = __win_handle_allocate;
- fh->fh_close = __win_handle_close;
- fh->fh_getc = __win_handle_getc;
- fh->fh_lock = __win_handle_lock;
- fh->fh_map = __wt_win_map;
- fh->fh_map_discard = __wt_win_map_discard;
- fh->fh_map_preload = __wt_win_map_preload;
- fh->fh_map_unmap = __wt_win_map_unmap;
- fh->fh_printf = __win_handle_printf;
- fh->fh_read = __win_handle_read;
- fh->fh_size = __win_handle_size;
- fh->fh_sync = __win_handle_sync;
- fh->fh_truncate = __win_handle_truncate;
- fh->fh_write = __win_handle_write;
+ *file_handlep = file_handle;
return (0);
-err: if (filehandle != INVALID_HANDLE_VALUE)
- (void)CloseHandle(filehandle);
- if (filehandle_secondary != INVALID_HANDLE_VALUE)
- (void)CloseHandle(filehandle_secondary);
-
+err: WT_TRET(__win_file_close((WT_FILE_HANDLE *)win_fh, wt_session));
return (ret);
}
/*
+ * __win_terminate --
+ * Discard a Windows configuration.
+ */
+static int
+__win_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session)
+{
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_free(session, file_system);
+ return (0);
+}
+
+/*
* __wt_os_win --
* Initialize a MSVC configuration.
*/
@@ -672,29 +594,24 @@ int
__wt_os_win(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
+ WT_FILE_SYSTEM *file_system;
conn = S2C(session);
- /* Initialize the POSIX jump table. */
- conn->file_directory_list = __wt_win_directory_list;
- conn->file_directory_sync = __win_directory_sync;
- conn->file_exist = __win_file_exist;
- conn->file_remove = __win_file_remove;
- conn->file_rename = __win_file_rename;
- conn->file_size = __win_file_size;
- conn->handle_open = __win_handle_open;
+ WT_RET(__wt_calloc_one(session, &file_system));
- return (0);
-}
+ /* Initialize the Windows jump table. */
+ file_system->fs_directory_list = __wt_win_directory_list;
+ file_system->fs_directory_list_free = __wt_win_directory_list_free;
+ file_system->fs_exist = __win_fs_exist;
+ file_system->fs_open_file = __win_open_file;
+ file_system->fs_remove = __win_fs_remove;
+ file_system->fs_rename = __win_fs_rename;
+ file_system->fs_size = __wt_win_fs_size;
+ file_system->terminate = __win_terminate;
-/*
- * __wt_os_win_cleanup --
- * Discard a POSIX configuration.
- */
-int
-__wt_os_win_cleanup(WT_SESSION_IMPL *session)
-{
- WT_UNUSED(session);
+ /* Switch it into place. */
+ conn->file_system = file_system;
return (0);
}
diff --git a/src/os_win/os_getenv.c b/src/os_win/os_getenv.c
index 9b297ac3a74..fe228328ee6 100644
--- a/src/os_win/os_getenv.c
+++ b/src/os_win/os_getenv.c
@@ -15,22 +15,22 @@
int
__wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp)
{
- WT_DECL_RET;
- DWORD size;
+ DWORD size, windows_error;
*envp = NULL;
- size = GetEnvironmentVariableA(variable, NULL, 0);
- if (size <= 1)
+ if ((size = GetEnvironmentVariableA(variable, NULL, 0)) <= 1)
return (WT_NOTFOUND);
- WT_RET(__wt_calloc(session, 1, size, envp));
+ WT_RET(__wt_malloc(session, (size_t)size, envp));
- ret = GetEnvironmentVariableA(variable, *envp, size);
/* We expect the number of bytes not including nul terminator. */
- if ((ret + 1) != size)
- WT_RET_MSG(session, __wt_getlasterror(),
- "GetEnvironmentVariableA failed: %s", variable);
+ if (GetEnvironmentVariableA(variable, *envp, size) == size - 1)
+ return (0);
- return (0);
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "GetEnvironmentVariableA: %s: %s",
+ variable, __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c
index b043f9c9923..8f5b289062d 100644
--- a/src/os_win/os_map.c
+++ b/src/os_win/os_map.c
@@ -13,106 +13,99 @@
* Map a file into memory.
*/
int
-__wt_win_map(WT_SESSION_IMPL *session,
- WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie)
+__wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
+ void *mapped_regionp, size_t *lenp, void *mapped_cookiep)
{
- WT_DECL_RET;
+ DWORD windows_error;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
size_t len;
wt_off_t file_size;
- void *map;
+ void *map, *mapped_cookie;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
/*
* There's no locking here to prevent the underlying file from changing
* underneath us, our caller needs to ensure consistency of the mapped
* region vs. any other file activity.
*/
- WT_RET(__wt_filesize(session, fh, &file_size));
+ WT_RET(__wt_win_fs_size(file_handle->file_system,
+ wt_session, file_handle->name, &file_size));
len = (size_t)file_size;
(void)__wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len);
-
- *mappingcookie =
- CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL);
- if (*mappingcookie == NULL)
- WT_RET_MSG(session, __wt_getlasterror(),
- "%s: memory-map: CreateFileMappingA", fh->name);
+ "%s: memory-map: %" WT_SIZET_FMT " bytes", file_handle->name, len);
+
+ mapped_cookie = CreateFileMappingA(
+ win_fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL);
+ if (mapped_cookie == NULL) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: memory-map: CreateFileMappingA: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
+ }
if ((map =
- MapViewOfFile(*mappingcookie, FILE_MAP_READ, 0, 0, len)) == NULL) {
+ MapViewOfFile(mapped_cookie, FILE_MAP_READ, 0, 0, len)) == NULL) {
/* Retrieve the error before cleaning up. */
- ret = __wt_getlasterror();
- CloseHandle(*mappingcookie);
- *mappingcookie = NULL;
+ windows_error = __wt_getlasterror();
+
+ (void)CloseHandle(mapped_cookie);
- WT_RET_MSG(session, ret,
- "%s: memory-map: MapViewOfFile", fh->name);
+ __wt_errx(session,
+ "%s: memory-map: MapViewOfFile: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
- *(void **)mapp = map;
+ *(void **)mapped_cookiep = mapped_cookie;
+ *(void **)mapped_regionp = map;
*lenp = len;
return (0);
}
/*
- * __wt_win_map_preload --
- * Cause a section of a memory map to be faulted in.
- */
-int
-__wt_win_map_preload(
- WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size)
-{
- WT_UNUSED(session);
- WT_UNUSED(fh);
- WT_UNUSED(p);
- WT_UNUSED(size);
-
- return (ENOTSUP);
-}
-
-/*
- * __wt_win_map_discard --
- * Discard a chunk of the memory map.
- */
-int
-__wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size)
-{
- WT_UNUSED(session);
- WT_UNUSED(fh);
- WT_UNUSED(p);
- WT_UNUSED(size);
-
- return (ENOTSUP);
-}
-
-/*
- * __wt_win_map_unmap --
+ * __wt_win_unmap --
* Remove a memory mapping.
*/
int
-__wt_win_map_unmap(WT_SESSION_IMPL *session,
- WT_FH *fh, void *map, size_t len, void **mappingcookie)
+__wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
+ void *mapped_region, size_t length, void *mapped_cookie)
{
+ DWORD windows_error;
WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
- (void)__wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len);
-
- WT_ASSERT(session, *mappingcookie != NULL);
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
- if (UnmapViewOfFile(map) == 0) {
- ret = __wt_getlasterror();
- __wt_err(session, ret,
- "%s: memory-unmap: UnmapViewOfFile", fh->name);
+ (void)__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: memory-unmap: %" WT_SIZET_FMT " bytes",
+ file_handle->name, length);
+
+ if (UnmapViewOfFile(mapped_region) == 0) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: memory-unmap: UnmapViewOfFile: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ ret = __wt_map_windows_error(windows_error);
}
- if (CloseHandle(*mappingcookie) == 0) {
- ret = __wt_getlasterror();
- __wt_err(session, ret,
- "%s: memory-unmap: CloseHandle", fh->name);
+ if (CloseHandle(*(void **)mapped_cookie) == 0) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session,
+ "%s: memory-unmap: CloseHandle: %s",
+ file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ ret = __wt_map_windows_error(windows_error);
}
- *mappingcookie = NULL;
-
return (ret);
}
diff --git a/src/os_win/os_mtx_cond.c b/src/os_win/os_mtx_cond.c
index af4a5035076..8645fdaccb3 100644
--- a/src/os_win/os_mtx_cond.c
+++ b/src/os_win/os_mtx_cond.c
@@ -45,10 +45,10 @@ int
__wt_cond_wait_signal(
WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled)
{
- DWORD err, milliseconds;
- WT_DECL_RET;
- uint64_t milliseconds64;
+ BOOL sleepret;
+ DWORD milliseconds, windows_error;
bool locked;
+ uint64_t milliseconds64;
locked = false;
@@ -88,33 +88,35 @@ __wt_cond_wait_signal(
if (milliseconds == 0)
milliseconds = 1;
- ret = SleepConditionVariableCS(
+ sleepret = SleepConditionVariableCS(
&cond->cond, &cond->mtx, milliseconds);
} else
- ret = SleepConditionVariableCS(
+ sleepret = SleepConditionVariableCS(
&cond->cond, &cond->mtx, INFINITE);
/*
* SleepConditionVariableCS returns non-zero on success, 0 on timeout
- * or failure. Check for timeout, else convert to a WiredTiger error
- * value and fail.
+ * or failure.
*/
- if (ret == 0) {
- if ((err = GetLastError()) == ERROR_TIMEOUT)
+ if (sleepret == 0) {
+ windows_error = __wt_getlasterror();
+ if (windows_error == ERROR_TIMEOUT) {
*signalled = false;
- else
- ret = __wt_getlasterror();
- } else
- ret = 0;
+ sleepret = 1;
+ }
+ }
(void)__wt_atomic_subi32(&cond->waiters, 1);
if (locked)
LeaveCriticalSection(&cond->mtx);
- if (ret == 0)
+ if (sleepret != 0)
return (0);
- WT_RET_MSG(session, ret, "SleepConditionVariableCS");
+
+ __wt_errx(session, "SleepConditionVariableCS: %s",
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
/*
diff --git a/src/os_win/os_path.c b/src/os_win/os_path.c
index e9532de2b38..220752ce7a1 100644
--- a/src/os_win/os_path.c
+++ b/src/os_win/os_path.c
@@ -19,7 +19,7 @@ __wt_absolute_path(const char *path)
* Check for a drive name (for example, "D:"), allow both forward and
* backward slashes.
*/
- if (strlen(path) >= 3 && isalpha(path[0]) && path[1] == ':')
+ if (strlen(path) >= 3 && __wt_isalpha(path[0]) && path[1] == ':')
path += 2;
return (path[0] == '/' || path[0] == '\\');
}
diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c
index 94c5a8b0ab2..a34dff776b6 100644
--- a/src/os_win/os_thread.c
+++ b/src/os_win/os_thread.c
@@ -21,7 +21,7 @@ __wt_thread_create(WT_SESSION_IMPL *session,
if (*tidret != 0)
return (0);
- WT_RET_MSG(session, __wt_errno, "thread create: _beginthreadex");
+ WT_RET_MSG(session, __wt_errno(), "thread create: _beginthreadex");
}
/*
@@ -31,19 +31,24 @@ __wt_thread_create(WT_SESSION_IMPL *session,
int
__wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid)
{
- WT_DECL_RET;
+ DWORD windows_error;
- if ((ret = WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0)
- /*
- * If we fail to wait, we will leak handles so do not continue
- */
- WT_PANIC_RET(session,
- ret == WAIT_FAILED ? __wt_getlasterror() : ret,
- "thread join: WaitForSingleObject");
+ if ((windows_error =
+ WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0) {
+ if (windows_error == WAIT_FAILED)
+ windows_error = __wt_getlasterror();
+ __wt_errx(session, "thread join: WaitForSingleObject: %s",
+ __wt_formatmessage(session, windows_error));
+
+ /* If we fail to wait, we will leak handles, do not continue. */
+ return (WT_PANIC);
+ }
if (CloseHandle(tid) == 0) {
- WT_RET_MSG(session,
- __wt_getlasterror(), "thread join: CloseHandle");
+ windows_error = __wt_getlasterror();
+ __wt_errx(session, "thread join: CloseHandle: %s",
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
}
return (0);
diff --git a/src/os_win/os_winerr.c b/src/os_win/os_winerr.c
new file mode 100644
index 00000000000..70499580c48
--- /dev/null
+++ b/src/os_win/os_winerr.c
@@ -0,0 +1,130 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_getlasterror --
+ * Return GetLastError, or a relatively generic Windows error if the system
+ * error code isn't set.
+ */
+DWORD
+__wt_getlasterror(void)
+{
+ DWORD windows_error;
+
+ /*
+ * Check for ERROR_SUCCESS:
+ * It's easy to introduce a problem by calling the wrong error function,
+ * for example, this function when the MSVC function set the C runtime
+ * error value. Handle gracefully and always return an error.
+ */
+ windows_error = GetLastError();
+ return (windows_error == ERROR_SUCCESS ?
+ ERROR_INVALID_PARAMETER : windows_error);
+}
+
+/*
+ * __wt_map_windows_error --
+ * Map Windows errors to POSIX/ANSI errors.
+ */
+int
+__wt_map_windows_error(DWORD windows_error)
+{
+ static const struct {
+ int windows_error;
+ int posix_error;
+ } list[] = {
+ { ERROR_ACCESS_DENIED, EACCES },
+ { ERROR_ALREADY_EXISTS, EEXIST },
+ { ERROR_ARENA_TRASHED, EFAULT },
+ { ERROR_BAD_COMMAND, EFAULT },
+ { ERROR_BAD_ENVIRONMENT, EFAULT },
+ { ERROR_BAD_FORMAT, EFAULT },
+ { ERROR_BAD_NETPATH, ENOENT },
+ { ERROR_BAD_NET_NAME, ENOENT },
+ { ERROR_BAD_PATHNAME, ENOENT },
+ { ERROR_BROKEN_PIPE, EPIPE },
+ { ERROR_CANNOT_MAKE, EACCES },
+ { ERROR_CHILD_NOT_COMPLETE, ECHILD },
+ { ERROR_CURRENT_DIRECTORY, EACCES },
+ { ERROR_DIRECT_ACCESS_HANDLE, EBADF },
+ { ERROR_DIR_NOT_EMPTY, ENOTEMPTY },
+ { ERROR_DISK_FULL, ENOSPC },
+ { ERROR_DRIVE_LOCKED, EACCES },
+ { ERROR_FAIL_I24, EACCES },
+ { ERROR_FILENAME_EXCED_RANGE, ENOENT },
+ { ERROR_FILE_EXISTS, EEXIST },
+ { ERROR_FILE_NOT_FOUND, ENOENT },
+ { ERROR_GEN_FAILURE, EFAULT },
+ { ERROR_INVALID_ACCESS, EACCES },
+ { ERROR_INVALID_BLOCK, EFAULT },
+ { ERROR_INVALID_DATA, EFAULT },
+ { ERROR_INVALID_DRIVE, ENOENT },
+ { ERROR_INVALID_FUNCTION, EINVAL },
+ { ERROR_INVALID_HANDLE, EBADF },
+ { ERROR_INVALID_PARAMETER, EINVAL },
+ { ERROR_INVALID_TARGET_HANDLE, EBADF },
+ { ERROR_LOCK_FAILED, EBUSY },
+ { ERROR_LOCK_VIOLATION, EBUSY },
+ { ERROR_MAX_THRDS_REACHED, EAGAIN },
+ { ERROR_NEGATIVE_SEEK, EINVAL },
+ { ERROR_NESTING_NOT_ALLOWED, EAGAIN },
+ { ERROR_NETWORK_ACCESS_DENIED, EACCES },
+ { ERROR_NOT_ENOUGH_MEMORY, ENOMEM },
+ { ERROR_NOT_ENOUGH_QUOTA, ENOMEM },
+ { ERROR_NOT_LOCKED, EACCES },
+ { ERROR_NOT_READY, EBUSY },
+ { ERROR_NOT_SAME_DEVICE, EXDEV },
+ { ERROR_NO_DATA, EPIPE },
+ { ERROR_NO_MORE_FILES, EMFILE },
+ { ERROR_NO_PROC_SLOTS, EAGAIN },
+ { ERROR_PATH_NOT_FOUND, ENOENT },
+ { ERROR_READ_FAULT, EFAULT },
+ { ERROR_RETRY, EINTR },
+ { ERROR_SEEK_ON_DEVICE, EACCES },
+ { ERROR_SHARING_VIOLATION, EBUSY },
+ { ERROR_TOO_MANY_OPEN_FILES, EMFILE },
+ { ERROR_WAIT_NO_CHILDREN, ECHILD },
+ { ERROR_WRITE_FAULT, EFAULT },
+ { ERROR_WRITE_PROTECT, EACCES },
+ };
+ int i;
+
+ for (i = 0; i < WT_ELEMENTS(list); ++i)
+ if (windows_error == list[i].windows_error)
+ return (list[i].posix_error);
+
+ /* Untranslatable error, go generic. */
+ return (WT_ERROR);
+}
+
+/*
+ * __wt_formatmessage --
+ * Windows error formatting.
+ */
+const char *
+__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error)
+{
+ /*
+ * !!!
+ * This function MUST handle a NULL session handle.
+ *
+ * Grow the session error buffer as necessary.
+ */
+ if (session != NULL &&
+ __wt_buf_initsize(session, &session->err, 512) == 0 &&
+ FormatMessageA(
+ FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL, windows_error,
+ 0, /* Let system choose the correct LANGID. */
+ session->err.mem, (DWORD)512, NULL) != 0)
+ return (session->err.data);
+
+ return ("Unable to format Windows error string");
+}
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 26123f6b66d..b49946bb10e 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -299,13 +299,13 @@ static int __rec_cell_build_ovfl(WT_SESSION_IMPL *,
WT_RECONCILE *, WT_KV *, uint8_t, uint64_t);
static int __rec_cell_build_val(WT_SESSION_IMPL *,
WT_RECONCILE *, const void *, size_t, uint64_t);
-static int __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
+static int __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *);
static int __rec_col_fix_slvg(WT_SESSION_IMPL *,
- WT_RECONCILE *, WT_PAGE *, WT_SALVAGE_COOKIE *);
-static int __rec_col_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
+ WT_RECONCILE *, WT_REF *, WT_SALVAGE_COOKIE *);
+static int __rec_col_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *);
static int __rec_col_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
static int __rec_col_var(WT_SESSION_IMPL *,
- WT_RECONCILE *, WT_PAGE *, WT_SALVAGE_COOKIE *);
+ WT_RECONCILE *, WT_REF *, WT_SALVAGE_COOKIE *);
static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *,
WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t);
static int __rec_destroy_session(WT_SESSION_IMPL *);
@@ -383,24 +383,27 @@ __wt_reconcile(WT_SESSION_IMPL *session,
mod->last_oldest_id = oldest_id;
/* Initialize the reconciliation structure for each new run. */
- WT_RET(__rec_write_init(
- session, ref, flags, salvage, &session->reconcile));
+ if ((ret = __rec_write_init(
+ session, ref, flags, salvage, &session->reconcile)) != 0) {
+ WT_TRET(__wt_fair_unlock(session, &page->page_lock));
+ return (ret);
+ }
r = session->reconcile;
/* Reconcile the page. */
switch (page->type) {
case WT_PAGE_COL_FIX:
if (salvage != NULL)
- ret = __rec_col_fix_slvg(session, r, page, salvage);
+ ret = __rec_col_fix_slvg(session, r, ref, salvage);
else
- ret = __rec_col_fix(session, r, page);
+ ret = __rec_col_fix(session, r, ref);
break;
case WT_PAGE_COL_INT:
WT_WITH_PAGE_INDEX(session,
- ret = __rec_col_int(session, r, page));
+ ret = __rec_col_int(session, r, ref));
break;
case WT_PAGE_COL_VAR:
- ret = __rec_col_var(session, r, page, salvage);
+ ret = __rec_col_var(session, r, ref, salvage);
break;
case WT_PAGE_ROW_INT:
WT_WITH_PAGE_INDEX(session,
@@ -630,12 +633,12 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
*/
switch (page->type) {
case WT_PAGE_COL_INT:
- WT_RET(__wt_page_alloc(session, WT_PAGE_COL_INT,
- 1, mod->mod_multi_entries, false, &next));
+ WT_RET(__wt_page_alloc(session,
+ WT_PAGE_COL_INT, mod->mod_multi_entries, false, &next));
break;
case WT_PAGE_ROW_INT:
- WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_INT,
- WT_RECNO_OOB, mod->mod_multi_entries, false, &next));
+ WT_RET(__wt_page_alloc(session,
+ WT_PAGE_ROW_INT, mod->mod_multi_entries, false, &next));
break;
WT_ILLEGAL_VALUE(session);
}
@@ -1038,6 +1041,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
bool append_origv, skipped;
*updp = NULL;
+ append = NULL; /* -Wconditional-uninitialized */
btree = S2BT(session);
page = r->page;
@@ -2425,7 +2429,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
break;
case SPLIT_TRACKING_RAW:
- WT_ILLEGAL_VALUE(session);
+ return (__wt_illegal_value(session, NULL));
}
/*
@@ -2465,7 +2469,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
WT_SESSION *wt_session;
size_t corrected_page_size, extra_skip, len, result_len;
uint64_t recno;
- uint32_t entry, i, result_slots, slots;
+ uint32_t entry, i, max_image_slot, result_slots, slots;
bool last_block;
uint8_t *dsk_start;
@@ -2525,7 +2529,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
if (dsk->type == WT_PAGE_COL_VAR)
recno = last->recno;
- entry = slots = 0;
+ entry = max_image_slot = slots = 0;
WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
++entry;
@@ -2575,6 +2579,15 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
dsk->type == WT_PAGE_COL_VAR)
r->raw_recnos[slots] = recno;
r->raw_entries[slots] = entry;
+
+ /*
+ * Don't create an image so large that any future update will
+ * cause a split in memory. Use half of the maximum size so
+ * we split very compressible pages that have reached the
+ * maximum size in memory into two equal blocks.
+ */
+ if (len > (size_t)btree->maxmempage / 2)
+ max_image_slot = slots;
}
/*
@@ -2634,21 +2647,32 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
ret = compressor->compress_raw(compressor, wt_session,
r->page_size_orig, btree->split_pct,
WT_BLOCK_COMPRESS_SKIP + extra_skip,
- (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP,
- r->raw_offsets, slots,
+ (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, r->raw_offsets,
+ no_more_rows || max_image_slot == 0 ? slots : max_image_slot,
(uint8_t *)dst->mem + WT_BLOCK_COMPRESS_SKIP,
- result_len, no_more_rows, &result_len, &result_slots);
+ result_len,
+ no_more_rows || max_image_slot != 0,
+ &result_len, &result_slots);
switch (ret) {
case EAGAIN:
/*
- * The compression function wants more rows; accumulate and
- * retry.
+ * The compression function wants more rows, accumulate and
+ * retry if possible.
*
- * Reset the resulting slots count, just in case the compression
- * function modified it before giving up.
+ * First, reset the resulting slots count, just in case the
+ * compression function modified it before giving up.
*/
result_slots = 0;
- break;
+
+ /*
+ * If the image is too large and there are more rows to gather,
+ * act as if the compression engine gave up on this chunk of
+ * data. That doesn't make sense (we flagged the engine that we
+ * wouldn't give it any more rows, but it's a possible return).
+ */
+ if (no_more_rows || max_image_slot == 0)
+ break;
+ /* FALLTHROUGH */
case 0:
/*
* If the compression function returned zero result slots, it's
@@ -2936,7 +2960,6 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r)
* wrote anything, or there's a remaindered block of data.
*/
break;
- WT_ILLEGAL_VALUE(session);
}
/*
@@ -3307,6 +3330,8 @@ supd_check_complete:
}
bnd->entries = r->entries;
+
+#ifdef HAVE_VERBOSE
/* Output a verbose message if we create a page without many entries */
if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6)
WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
@@ -3316,6 +3341,7 @@ supd_check_complete:
r->entries, r->page->memory_footprint, r->bnd_next,
F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint",
r->bnd_state));
+#endif
WT_ERR(__wt_bt_write(session,
buf, addr, &addr_size, false, bnd->already_compressed));
@@ -3431,7 +3457,7 @@ __rec_update_las(WT_SESSION_IMPL *session,
case WT_PAGE_ROW_LEAF:
if (list->ins == NULL) {
slot = WT_ROW_SLOT(page, list->rip);
- upd = page->pg_row_upd[slot];
+ upd = page->modify->mod_row_update[slot];
} else
upd = list->ins->upd;
break;
@@ -3504,6 +3530,7 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
r = cbulk->reconcile;
r->is_bulk_load = true;
+ recno = WT_RECNO_OOB; /* -Werror=maybe-uninitialized */
switch (btree->type) {
case BTREE_COL_FIX:
case BTREE_COL_VAR:
@@ -3512,7 +3539,6 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
case BTREE_ROW:
recno = WT_RECNO_OOB;
break;
- WT_ILLEGAL_VALUE(session);
}
return (__rec_split_init(
@@ -3546,7 +3572,6 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
break;
case BTREE_ROW:
break;
- WT_ILLEGAL_VALUE(session);
}
WT_RET(__rec_split_finish(session, r));
@@ -3787,7 +3812,7 @@ __rec_vtype(WT_ADDR *addr)
* Reconcile a column-store internal page.
*/
static int
-__rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
+__rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
{
WT_ADDR *addr;
WT_BTREE *btree;
@@ -3795,11 +3820,12 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_CHILD_STATE state;
WT_DECL_RET;
WT_KV *val;
- WT_PAGE *child;
+ WT_PAGE *child, *page;
WT_REF *ref;
bool hazard;
btree = S2BT(session);
+ page = pageref->page;
child = NULL;
hazard = false;
@@ -3807,12 +3833,12 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
vpack = &_vpack;
WT_RET(__rec_split_init(
- session, r, page, page->pg_intl_recno, btree->maxintlpage));
+ session, r, page, pageref->ref_recno, btree->maxintlpage));
/* For each entry in the in-memory page... */
WT_INTL_FOREACH_BEGIN(session, page, ref) {
/* Update the starting record number in case we split. */
- r->recno = ref->key.recno;
+ r->recno = ref->ref_recno;
/*
* Modified child.
@@ -3886,7 +3912,7 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
} else
__rec_cell_build_addr(session, r,
addr->addr, addr->size,
- __rec_vtype(addr), ref->key.recno);
+ __rec_vtype(addr), ref->ref_recno);
WT_CHILD_RELEASE_ERR(session, hazard, ref);
/* Boundary: split or write the page. */
@@ -3951,31 +3977,34 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* Reconcile a fixed-width, column-store leaf page.
*/
static int
-__rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
+__rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
{
WT_BTREE *btree;
WT_INSERT *ins;
+ WT_PAGE *page;
WT_UPDATE *upd;
uint64_t recno;
uint32_t entry, nrecs;
btree = S2BT(session);
+ page = pageref->page;
WT_RET(__rec_split_init(
- session, r, page, page->pg_fix_recno, btree->maxleafpage));
+ session, r, page, pageref->ref_recno, btree->maxleafpage));
+
+ /* Copy the original, disk-image bytes into place. */
+ memcpy(r->first_free, page->pg_fix_bitf,
+ __bitstr_size((size_t)page->pg_fix_entries * btree->bitcnt));
/* Update any changes to the original on-page data items. */
WT_SKIP_FOREACH(ins, WT_COL_UPDATE_SINGLE(page)) {
WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, &upd));
if (upd != NULL)
- __bit_setv_recno(page, WT_INSERT_RECNO(ins),
- btree->bitcnt, ((uint8_t *)WT_UPDATE_DATA(upd))[0]);
+ __bit_setv(r->first_free,
+ WT_INSERT_RECNO(ins) - pageref->ref_recno,
+ btree->bitcnt, *(uint8_t *)WT_UPDATE_DATA(upd));
}
- /* Copy the updated, disk-image bytes into place. */
- memcpy(r->first_free, page->pg_fix_bitf,
- __bitstr_size((size_t)page->pg_fix_entries * btree->bitcnt));
-
/* Calculate the number of entries per page remainder. */
entry = page->pg_fix_entries;
nrecs = WT_FIX_BYTES_TO_ENTRIES(
@@ -4002,7 +4031,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* the last key on this page, we have to decrement it.
*/
if ((recno =
- page->modify->mod_split_recno) == WT_RECNO_OOB)
+ page->modify->mod_col_split_recno) == WT_RECNO_OOB)
break;
recno -= 1;
@@ -4032,7 +4061,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
if (nrecs > 0) {
__bit_setv(r->first_free, entry, btree->bitcnt,
upd == NULL ? 0 :
- ((uint8_t *)WT_UPDATE_DATA(upd))[0]);
+ *(uint8_t *)WT_UPDATE_DATA(upd));
--nrecs;
++entry;
++r->recno;
@@ -4076,13 +4105,15 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
*/
static int
__rec_col_fix_slvg(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage)
+ WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
{
WT_BTREE *btree;
+ WT_PAGE *page;
uint64_t page_start, page_take;
uint32_t entry, nrecs;
btree = S2BT(session);
+ page = pageref->page;
/*
* !!!
@@ -4097,7 +4128,7 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session,
* don't want to have to retrofit the code later.
*/
WT_RET(__rec_split_init(
- session, r, page, page->pg_fix_recno, btree->maxleafpage));
+ session, r, page, pageref->ref_recno, btree->maxleafpage));
/* We may not be taking all of the entries on the original page. */
page_take = salvage->take == 0 ? page->pg_fix_entries : salvage->take;
@@ -4220,7 +4251,7 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
*/
static int
__rec_col_var(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage)
+ WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
{
enum { OVFL_IGNORE, OVFL_UNUSED, OVFL_USED } ovfl_state;
WT_BTREE *btree;
@@ -4231,6 +4262,7 @@ __rec_col_var(WT_SESSION_IMPL *session,
WT_DECL_RET;
WT_INSERT *ins;
WT_ITEM *last;
+ WT_PAGE *page;
WT_UPDATE *upd;
uint64_t n, nrepeat, repeat_count, rle, skip, src_recno;
uint32_t i, size;
@@ -4238,17 +4270,18 @@ __rec_col_var(WT_SESSION_IMPL *session,
const void *data;
btree = S2BT(session);
+ page = pageref->page;
last = r->last;
vpack = &_vpack;
+ WT_RET(__rec_split_init(
+ session, r, page, pageref->ref_recno, btree->maxleafpage));
+
WT_RET(__wt_scr_alloc(session, 0, &orig));
data = NULL;
size = 0;
upd = NULL;
- WT_RET(__rec_split_init(
- session, r, page, page->pg_var_recno, btree->maxleafpage));
-
/*
* The salvage code may be calling us to reconcile a page where there
* were missing records in the column-store name space. If taking the
@@ -4561,7 +4594,8 @@ compare: /*
* first key on the split page, that is, one larger than
* the last key on this page, we have to decrement it.
*/
- if ((n = page->modify->mod_split_recno) == WT_RECNO_OOB)
+ if ((n = page->
+ modify->mod_col_split_recno) == WT_RECNO_OOB)
break;
WT_ASSERT(session, n >= src_recno);
n -= 1;
@@ -4990,8 +5024,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
* Temporary buffers in which to instantiate any uninstantiated keys
* or value items we need.
*/
- WT_RET(__wt_scr_alloc(session, 0, &tmpkey));
- WT_RET(__wt_scr_alloc(session, 0, &tmpval));
+ WT_ERR(__wt_scr_alloc(session, 0, &tmpkey));
+ WT_ERR(__wt_scr_alloc(session, 0, &tmpval));
/* For each entry in the page... */
WT_ROW_FOREACH(page, rip, i) {
@@ -5151,7 +5185,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
* can't remove them from the in-memory
* tree; if an overflow key was deleted
* without being instantiated (for
- * example, cursor-based truncation, do
+ * example, cursor-based truncation), do
* it now.
*/
if (ikey == NULL)
@@ -5430,18 +5464,24 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page)
__wt_free(session, multi->key.ikey);
break;
}
- if (multi->disk_image == NULL) {
- if (multi->addr.reuse)
- multi->addr.addr = NULL;
- else {
- WT_RET(__wt_btree_block_free(session,
- multi->addr.addr, multi->addr.size));
- __wt_free(session, multi->addr.addr);
- }
- } else {
- __wt_free(session, multi->supd);
- __wt_free(session, multi->disk_image);
+
+ /*
+ * If the page was re-written free the backing disk blocks used
+ * in the previous write (unless the blocks were reused in this
+ * write). The page may instead have been a disk image with
+ * associated saved updates: ownership of the disk image is
+ * transferred when rewriting the page in-memory and there may
+ * not have been saved updates. We've gotten this wrong a few
+ * times, so use the existence of an address to confirm backing
+ * blocks we care about, and free any disk image/saved updates.
+ */
+ if (multi->addr.addr != NULL && !multi->addr.reuse) {
+ WT_RET(__wt_btree_block_free(
+ session, multi->addr.addr, multi->addr.size));
+ __wt_free(session, multi->addr.addr);
}
+ __wt_free(session, multi->supd);
+ __wt_free(session, multi->disk_image);
}
__wt_free(session, mod->mod_multi);
mod->mod_multi_entries = 0;
diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c
index 756f1fdcc6c..f250612d0ae 100644
--- a/src/schema/schema_create.c
+++ b/src/schema/schema_create.c
@@ -35,7 +35,7 @@ __wt_direct_io_size_check(WT_SESSION_IMPL *session,
* units of its happy place.
*/
if (FLD_ISSET(conn->direct_io,
- WT_FILE_TYPE_CHECKPOINT | WT_FILE_TYPE_DATA)) {
+ WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) {
align = (int64_t)conn->buffer_alignment;
if (align != 0 && (cval.val < align || cval.val % align != 0))
WT_RET_MSG(session, EINVAL,
@@ -578,7 +578,7 @@ __create_table(WT_SESSION_IMPL *session,
WT_ERR(EEXIST);
exists = true;
}
- WT_RET_NOTFOUND_OK(ret);
+ WT_ERR_NOTFOUND_OK(ret);
WT_ERR(__wt_config_gets(session, cfg, "colgroups", &cval));
WT_ERR(__wt_config_subinit(session, &conf, &cval));
diff --git a/src/schema/schema_list.c b/src/schema/schema_list.c
index 5e9caf94b7a..79e3ef1da7c 100644
--- a/src/schema/schema_list.c
+++ b/src/schema/schema_list.c
@@ -20,6 +20,8 @@ __schema_add_table(WT_SESSION_IMPL *session,
WT_TABLE *table;
uint64_t bucket;
+ table = NULL; /* -Wconditional-uninitialized */
+
/* Make sure the metadata is open before getting other locks. */
WT_RET(__wt_metadata_cursor(session, NULL));
diff --git a/src/schema/schema_open.c b/src/schema/schema_open.c
index e7ce4e42498..1554d021953 100644
--- a/src/schema/schema_open.c
+++ b/src/schema/schema_open.c
@@ -427,6 +427,8 @@ __schema_open_table(WT_SESSION_IMPL *session,
const char *tconfig;
char *tablename;
+ *tablep = NULL;
+
cursor = NULL;
table = NULL;
tablename = NULL;
@@ -527,6 +529,8 @@ __wt_schema_get_colgroup(WT_SESSION_IMPL *session,
const char *tablename, *tend;
u_int i;
+ if (tablep != NULL)
+ *tablep = NULL;
*colgroupp = NULL;
tablename = uri;
@@ -571,6 +575,8 @@ __wt_schema_get_index(WT_SESSION_IMPL *session,
const char *tablename, *tend;
u_int i;
+ if (tablep != NULL)
+ *tablep = NULL;
*indexp = NULL;
tablename = uri;
diff --git a/src/schema/schema_project.c b/src/schema/schema_project.c
index 4d29b2baa13..fd59539ae89 100644
--- a/src/schema/schema_project.c
+++ b/src/schema/schema_project.c
@@ -353,7 +353,8 @@ __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp,
/* Make sure the types are compatible. */
WT_ASSERT(session,
- tolower(pv.type) == tolower(vpv.type));
+ __wt_tolower((u_char)pv.type) ==
+ __wt_tolower((u_char)vpv.type));
pv.u = vpv.u;
len = __pack_size(session, &pv);
@@ -459,7 +460,8 @@ __wt_schema_project_merge(WT_SESSION_IMPL *session,
WT_RET(__pack_next(&vpack, &vpv));
/* Make sure the types are compatible. */
WT_ASSERT(session,
- tolower(pv.type) == tolower(vpv.type));
+ __wt_tolower((u_char)pv.type) ==
+ __wt_tolower((u_char)vpv.type));
vpv.u = pv.u;
len = __pack_size(session, &vpv);
WT_RET(__wt_buf_grow(session,
diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c
index 21402ed9332..8f4d374fd22 100644
--- a/src/schema/schema_rename.c
+++ b/src/schema/schema_rename.c
@@ -55,7 +55,7 @@ __rename_file(
default:
WT_ERR(ret);
}
- WT_ERR(__wt_exist(session, newfile, &exist));
+ WT_ERR(__wt_fs_exist(session, newfile, &exist));
if (exist)
WT_ERR_MSG(session, EEXIST, "%s", newfile);
@@ -64,7 +64,7 @@ __rename_file(
WT_ERR(__wt_metadata_insert(session, newuri, oldvalue));
/* Rename the underlying file. */
- WT_ERR(__wt_rename(session, filename, newfile));
+ WT_ERR(__wt_fs_rename(session, filename, newfile));
if (WT_META_TRACKING(session))
WT_ERR(__wt_meta_track_fileop(session, uri, newuri));
diff --git a/src/schema/schema_stat.c b/src/schema/schema_stat.c
index d3d0605c60a..c204d6b1a24 100644
--- a/src/schema/schema_stat.c
+++ b/src/schema/schema_stat.c
@@ -69,6 +69,7 @@ __curstat_size_only(WT_SESSION_IMPL *session,
WT_ITEM namebuf;
wt_off_t filesize;
char *tableconf;
+ bool exist;
WT_CLEAR(namebuf);
*was_fast = false;
@@ -96,10 +97,11 @@ __curstat_size_only(WT_SESSION_IMPL *session,
* are concurrent schema level operations (for example drop). That is
* fine - failing here results in falling back to the slow path of
* opening the handle.
- * !!! Deliberately discard the return code from a failed call - the
- * error is flagged by not setting fast to true.
*/
- if (__wt_filesize_name(session, namebuf.data, true, &filesize) == 0) {
+ WT_ERR(__wt_fs_exist(session, namebuf.data, &exist));
+ if (exist) {
+ WT_ERR(__wt_fs_size(session, namebuf.data, &filesize));
+
/* Setup and populate the statistics structure */
__wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
cst->u.dsrc_stats.block_size = filesize;
diff --git a/src/session/session_api.c b/src/session/session_api.c
index eaa3781169b..77d1dc74c84 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -796,8 +796,8 @@ static int
__session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor,
WT_CURSOR *ref_cursor, const char *config)
{
- WT_CURSOR *firstcg;
WT_CONFIG_ITEM cval;
+ WT_CURSOR *firstcg;
WT_CURSOR_INDEX *cindex;
WT_CURSOR_JOIN *cjoin;
WT_CURSOR_TABLE *ctable;
@@ -805,15 +805,18 @@ __session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor,
WT_INDEX *idx;
WT_SESSION_IMPL *session;
WT_TABLE *table;
+ bool nested;
uint64_t count;
uint32_t bloom_bit_count, bloom_hash_count;
uint8_t flags, range;
- count = 0;
- firstcg = NULL;
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, join, config, cfg);
+
+ firstcg = NULL;
table = NULL;
+ nested = false;
+ count = 0;
if (!WT_PREFIX_MATCH(join_cursor->uri, "join:"))
WT_ERR_MSG(session, EINVAL, "not a join cursor");
@@ -828,19 +831,25 @@ __session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor,
ctable = (WT_CURSOR_TABLE *)ref_cursor;
table = ctable->table;
firstcg = ctable->cg_cursors[0];
+ } else if (WT_PREFIX_MATCH(ref_cursor->uri, "join:")) {
+ idx = NULL;
+ table = ((WT_CURSOR_JOIN *)ref_cursor)->table;
+ nested = true;
} else
- WT_ERR_MSG(session, EINVAL, "not an index or table cursor");
+ WT_ERR_MSG(session, EINVAL,
+ "ref_cursor must be an index, table or join cursor");
- if (!F_ISSET(firstcg, WT_CURSTD_KEY_SET))
+ if (firstcg != NULL && !F_ISSET(firstcg, WT_CURSTD_KEY_SET))
WT_ERR_MSG(session, EINVAL,
"requires reference cursor be positioned");
cjoin = (WT_CURSOR_JOIN *)join_cursor;
if (cjoin->table != table)
WT_ERR_MSG(session, EINVAL,
- "table for join cursor does not match table for index");
+ "table for join cursor does not match table for "
+ "ref_cursor");
if (F_ISSET(ref_cursor, WT_CURSTD_JOINED))
WT_ERR_MSG(session, EINVAL,
- "index cursor already used in a join");
+ "cursor already used in a join");
/* "ge" is the default */
range = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ;
@@ -879,15 +888,20 @@ __session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor,
WT_ERR_MSG(session, EINVAL,
"bloom_hash_count: value too large");
bloom_hash_count = (uint32_t)cval.val;
- if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)) {
- if (count == 0)
- WT_ERR_MSG(session, EINVAL,
- "count must be nonzero when strategy=bloom");
- if (cjoin->entries_next == 0)
- WT_ERR_MSG(session, EINVAL,
- "the first joined cursor cannot specify "
- "strategy=bloom");
- }
+ if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && count == 0)
+ WT_ERR_MSG(session, EINVAL,
+ "count must be nonzero when strategy=bloom");
+
+ WT_ERR(__wt_config_gets(session, cfg, "operation", &cval));
+ if (cval.len != 0 && WT_STRING_MATCH("or", cval.str, cval.len))
+ LF_SET(WT_CURJOIN_ENTRY_DISJUNCTION);
+
+ if (nested && (count != 0 || range != WT_CURJOIN_END_EQ ||
+ LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)))
+ WT_ERR_MSG(session, EINVAL,
+ "joining a nested join cursor is incompatible with "
+ "setting \"strategy\", \"compare\" or \"count\"");
+
WT_ERR(__wt_curjoin_join(session, cjoin, idx, ref_cursor, flags,
range, count, bloom_bit_count, bloom_hash_count));
/*
@@ -1106,7 +1120,7 @@ __session_truncate(WT_SESSION *wt_session,
if (!WT_STREQ(uri, "log:"))
WT_ERR_MSG(session, EINVAL,
"the truncate method should not specify any"
- "target after the log: URI prefix.");
+ "target after the log: URI prefix");
WT_ERR(__wt_log_truncate_files(session, start, cfg));
} else if (WT_PREFIX_MATCH(uri, "file:"))
WT_ERR(__wt_session_range_truncate(
@@ -1509,11 +1523,11 @@ err: WT_TRET(__wt_writeunlock(session, txn_global->nsnap_rwlock));
}
/*
- * __session_strerror --
+ * __wt_session_strerror --
* WT_SESSION->strerror method.
*/
-static const char *
-__session_strerror(WT_SESSION *wt_session, int error)
+const char *
+__wt_session_strerror(WT_SESSION *wt_session, int error)
{
WT_SESSION_IMPL *session;
@@ -1536,7 +1550,7 @@ __open_session(WT_CONNECTION_IMPL *conn,
NULL,
__session_close,
__session_reconfigure,
- __session_strerror,
+ __wt_session_strerror,
__session_open_cursor,
__session_create,
__wt_session_compact,
@@ -1563,7 +1577,7 @@ __open_session(WT_CONNECTION_IMPL *conn,
NULL,
__session_close,
__session_reconfigure,
- __session_strerror,
+ __wt_session_strerror,
__session_open_cursor,
__session_create_readonly,
__wt_session_compact_readonly,
@@ -1672,7 +1686,7 @@ __open_session(WT_CONNECTION_IMPL *conn,
* __wt_hazard_close ensures the array is cleared - so it is safe to
* reset the starting size on each open.
*/
- session_ret->hazard_size = WT_HAZARD_INCR;
+ session_ret->hazard_size = 0;
/*
* Configuration: currently, the configuration for open_session is the
diff --git a/src/support/err.c b/src/support/err.c
index f64492f1561..93c0af37328 100644
--- a/src/support/err.c
+++ b/src/support/err.c
@@ -24,7 +24,7 @@ __handle_error_default(WT_EVENT_HANDLER *handler,
session = (WT_SESSION_IMPL *)wt_session;
WT_RET(__wt_fprintf(session, WT_STDERR(session), "%s\n", errmsg));
- WT_RET(__wt_fsync(session, WT_STDERR(session), true));
+ WT_RET(__wt_fflush(session, WT_STDERR(session)));
return (0);
}
@@ -42,7 +42,7 @@ __handle_message_default(WT_EVENT_HANDLER *handler,
session = (WT_SESSION_IMPL *)wt_session;
WT_RET(__wt_fprintf(session, WT_STDOUT(session), "%s\n", message));
- WT_RET(__wt_fsync(session, WT_STDOUT(session), true));
+ WT_RET(__wt_fflush(session, WT_STDOUT(session)));
return (0);
}
@@ -469,6 +469,9 @@ void
__wt_assert(WT_SESSION_IMPL *session,
int error, const char *file_name, int line_number, const char *fmt, ...)
WT_GCC_FUNC_ATTRIBUTE((format (printf, 5, 6)))
+#ifdef HAVE_DIAGNOSTIC
+ WT_GCC_FUNC_ATTRIBUTE((noreturn))
+#endif
{
va_list ap;
@@ -493,7 +496,10 @@ __wt_panic(WT_SESSION_IMPL *session)
F_SET(S2C(session), WT_CONN_PANIC);
__wt_err(session, WT_PANIC, "the process must exit and restart");
-#if !defined(HAVE_DIAGNOSTIC)
+#if defined(HAVE_DIAGNOSTIC)
+ __wt_abort(session); /* Drop core if testing. */
+ /* NOTREACHED */
+#else
/*
* Chaos reigns within.
* Reflect, repent, and reboot.
@@ -501,9 +507,6 @@ __wt_panic(WT_SESSION_IMPL *session)
*/
return (WT_PANIC);
#endif
-
- __wt_abort(session); /* Drop core if testing. */
- /* NOTREACHED */
}
/*
@@ -517,12 +520,12 @@ __wt_illegal_value(WT_SESSION_IMPL *session, const char *name)
name == NULL ? "" : name, name == NULL ? "" : ": ",
"encountered an illegal file format or internal value");
-#if !defined(HAVE_DIAGNOSTIC)
- return (__wt_panic(session));
-#endif
-
+#if defined(HAVE_DIAGNOSTIC)
__wt_abort(session); /* Drop core if testing. */
/* NOTREACHED */
+#else
+ return (__wt_panic(session));
+#endif
}
/*
diff --git a/src/support/global.c b/src/support/global.c
index e0d5bafeaa8..eba88bf2b20 100644
--- a/src/support/global.c
+++ b/src/support/global.c
@@ -111,11 +111,13 @@ void
__wt_attach(WT_SESSION_IMPL *session)
{
#ifdef HAVE_ATTACH
+ u_int i;
+
__wt_errx(session, "process ID %" PRIdMAX
": waiting for debugger...", (intmax_t)getpid());
/* Sleep forever, the debugger will interrupt us when it attaches. */
- for (;;)
+ for (i = 0; i < WT_MILLION; ++i)
__wt_sleep(10, 0);
#else
WT_UNUSED(session);
diff --git a/src/support/hash_city.c b/src/support/hash_city.c
index 7a700aa809c..8354532e820 100644
--- a/src/support/hash_city.c
+++ b/src/support/hash_city.c
@@ -85,6 +85,7 @@ static uint32_t UNALIGNED_LOAD32(const char *p) {
return (result);
}
+#ifdef WORDS_BIGENDIAN
#ifdef _MSC_VER
#include <stdlib.h>
@@ -132,7 +133,6 @@ static uint32_t UNALIGNED_LOAD32(const char *p) {
#endif
-#ifdef WORDS_BIGENDIAN
#define uint32_in_expected_order(x) (bswap_32(x))
#define uint64_in_expected_order(x) (bswap_64(x))
#else
diff --git a/src/support/hazard.c b/src/support/hazard.c
index 13e0eb3b9ac..dee85586a4d 100644
--- a/src/support/hazard.c
+++ b/src/support/hazard.c
@@ -121,7 +121,8 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
return (0);
}
- __wt_errx(session, "session %p: hazard pointer table full", session);
+ __wt_errx(session,
+ "session %p: hazard pointer table full", (void *)session);
#ifdef HAVE_DIAGNOSTIC
__hazard_dump(session);
#endif
@@ -176,7 +177,8 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page)
* because using a page we didn't have pinned down implies corruption.
*/
WT_PANIC_RET(session, EINVAL,
- "session %p: clear hazard pointer: %p: not found", session, page);
+ "session %p: clear hazard pointer: %p: not found",
+ (void *)session, (void *)page);
}
/*
@@ -204,7 +206,8 @@ __wt_hazard_close(WT_SESSION_IMPL *session)
return;
__wt_errx(session,
- "session %p: close hazard pointer table: table not empty", session);
+ "session %p: close hazard pointer table: table not empty",
+ (void *)session);
#ifdef HAVE_DIAGNOSTIC
__hazard_dump(session);
@@ -232,7 +235,7 @@ __wt_hazard_close(WT_SESSION_IMPL *session)
__wt_errx(session,
"session %p: close hazard pointer table: count didn't "
"match entries",
- session);
+ (void *)session);
}
#ifdef HAVE_DIAGNOSTIC
@@ -250,6 +253,7 @@ __hazard_dump(WT_SESSION_IMPL *session)
if (hp->page != NULL)
__wt_errx(session,
"session %p: hazard pointer %p: %s, line %d",
- session, hp->page, hp->file, hp->line);
+ (void *)session,
+ (void *)hp->page, hp->file, hp->line);
}
#endif
diff --git a/src/support/hex.c b/src/support/hex.c
index d42a84154ca..5c48ce8b74a 100644
--- a/src/support/hex.c
+++ b/src/support/hex.c
@@ -84,7 +84,7 @@ __wt_raw_to_esc_hex(
WT_RET(__wt_buf_init(session, to, size * 3 + 1));
for (p = from, t = to->mem, i = size; i > 0; --i, ++p)
- if (isprint((int)*p)) {
+ if (__wt_isprint((u_char)*p)) {
if (*p == '\\')
*t++ = '\\';
*t++ = *p;
diff --git a/src/support/huffman.c b/src/support/huffman.c
index 1e1aaeab5b5..05612cdbe80 100644
--- a/src/support/huffman.c
+++ b/src/support/huffman.c
@@ -230,19 +230,19 @@ set_codes(WT_FREQTREE_NODE *node,
* lower-order bits for consecutive numbering.
*/
if (len < MAX_CODE_LENGTH &&
- ((half = 1 << (remaining - 1)) < node->left->weight ||
- half < node->right->weight)) {
- pattern = pattern << remaining;
+ ((half = (uint16_t)(1 << (remaining - 1))) <
+ node->left->weight || half < node->right->weight)) {
+ pattern = (uint16_t)(pattern << remaining);
len = MAX_CODE_LENGTH;
}
if (len < MAX_CODE_LENGTH) {
- patternleft = (pattern << 1) | 0;
- patternright = (pattern << 1) | 1;
+ patternleft = (uint16_t)((pattern << 1) | 0);
+ patternright = (uint16_t)((pattern << 1) | 1);
len++;
} else { /* "low bit mode" */
patternleft = pattern;
- patternright = pattern + node->left->weight;
+ patternright = (uint16_t)(pattern + node->left->weight);
/* len unchanged */
}
@@ -284,12 +284,12 @@ make_table(WT_SESSION_IMPL *session, uint8_t *code2symbol,
* than necessary, we allocate (2 ^ max-code-length) of them.
*/
c = codes[i].pattern;
- shift = max_depth - len;
+ shift = (uint8_t)(max_depth - len);
c1 = (uint32_t)c << shift;
c2 = (uint32_t)(c + 1) << shift;
for (j = c1; j < c2; j++) {
WT_ASSERT(session, code2symbol[j] == 0);
- code2symbol[j] = i;
+ code2symbol[j] = (uint8_t)i;
}
}
}
@@ -694,7 +694,7 @@ __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg,
* used in the last byte, unless they're 0, in which case there are 8
* bits used in the last byte.
*/
- padding_info = (bitpos % 8) << (8 - WT_HUFFMAN_HEADER);
+ padding_info = (uint8_t)((bitpos % 8) << (8 - WT_HUFFMAN_HEADER));
((uint8_t *)tmp->mem)[0] |= padding_info;
/* Copy result of exact known size into caller's buffer. */
@@ -808,11 +808,12 @@ __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg,
valid += 8;
from_bytes--;
}
- pattern = valid >= max ? /* short patterns near end */
- (bits >> (valid - max)) : (bits << (max - valid));
+ pattern = (uint16_t)
+ (valid >= max ? /* short patterns near end */
+ (bits >> (valid - max)) : (bits << (max - valid)));
symbol = huffman->code2symbol[pattern & mask];
len = huffman->codes[symbol].length;
- valid -= len;
+ valid -= (uint8_t)len;
/*
* from_len_bits is the total number of input bits, reduced by
diff --git a/src/support/scratch.c b/src/support/scratch.c
index aea98dc49ef..69987ebc852 100644
--- a/src/support/scratch.c
+++ b/src/support/scratch.c
@@ -117,7 +117,7 @@ __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...)
p = (char *)((uint8_t *)buf->mem + buf->size);
WT_ASSERT(session, buf->memsize >= buf->size);
space = buf->memsize - buf->size;
- len = (size_t)vsnprintf(p, (size_t)space, fmt, ap);
+ len = (size_t)vsnprintf(p, space, fmt, ap);
va_end(ap);
/* Check if there was enough space. */
@@ -135,6 +135,64 @@ __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...)
}
/*
+ * __wt_buf_set_printable --
+ * Set the contents of the buffer to a printable representation of a
+ * byte string.
+ */
+const char *
+__wt_buf_set_printable(
+ WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf)
+{
+ if (__wt_raw_to_esc_hex(session, p, size, buf)) {
+ buf->data = "[Error]";
+ buf->size = strlen("[Error]");
+ }
+ return (buf->data);
+}
+
+/*
+ * __wt_buf_set_size --
+ * Set the contents of the buffer to a printable representation of a
+ * byte size.
+ */
+const char *
+__wt_buf_set_size(
+ WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf)
+{
+ WT_DECL_RET;
+
+ if (size >= WT_EXABYTE)
+ ret = __wt_buf_fmt(session, buf,
+ "%" PRIu64 "EB", size / WT_EXABYTE);
+ else if (size >= WT_PETABYTE)
+ ret = __wt_buf_fmt(session, buf,
+ "%" PRIu64 "PB", size / WT_PETABYTE);
+ else if (size >= WT_TERABYTE)
+ ret = __wt_buf_fmt(session, buf,
+ "%" PRIu64 "TB", size / WT_TERABYTE);
+ else if (size >= WT_GIGABYTE)
+ ret = __wt_buf_fmt(session, buf,
+ "%" PRIu64 "GB", size / WT_GIGABYTE);
+ else if (size >= WT_MEGABYTE)
+ ret = __wt_buf_fmt(session, buf,
+ "%" PRIu64 "MB", size / WT_MEGABYTE);
+ else if (size >= WT_KILOBYTE)
+ ret = __wt_buf_fmt(session, buf,
+ "%" PRIu64 "KB", size / WT_KILOBYTE);
+ else
+ ret = __wt_buf_fmt(session, buf, "%" PRIu64 "B", size);
+
+ if (ret == 0 && exact && size >= WT_KILOBYTE)
+ ret = __wt_buf_catfmt(session, buf, " (%" PRIu64 ")", size);
+
+ if (ret != 0) {
+ buf->data = "[Error]";
+ buf->size = strlen("[Error]");
+ }
+ return (buf->data);
+}
+
+/*
* __wt_scr_alloc_func --
* Scratch buffer allocation function.
*/
diff --git a/src/support/stat.c b/src/support/stat.c
index 2f5609567da..d972f0c140f 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -43,7 +43,6 @@ static const char * const __stats_dsrc_desc[] = {
"btree: pages rewritten by compaction",
"btree: row-store internal pages",
"btree: row-store leaf pages",
- "cache: bytes currently in the cache",
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
@@ -61,6 +60,7 @@ static const char * const __stats_dsrc_desc[] = {
"cache: page written requiring lookaside records",
"cache: pages read into cache",
"cache: pages read into cache requiring lookaside entries",
+ "cache: pages requested from the cache",
"cache: pages written from cache",
"cache: pages written requiring in-memory restoration",
"cache: unmodified pages evicted",
@@ -173,7 +173,6 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->btree_compact_rewrite = 0;
stats->btree_row_internal = 0;
stats->btree_row_leaf = 0;
- /* not clearing cache_bytes_inuse */
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
@@ -191,6 +190,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_write_lookaside = 0;
stats->cache_read = 0;
stats->cache_read_lookaside = 0;
+ stats->cache_pages_requested = 0;
stats->cache_write = 0;
stats->cache_write_restore = 0;
stats->cache_eviction_clean = 0;
@@ -300,7 +300,6 @@ __wt_stat_dsrc_aggregate_single(
to->btree_compact_rewrite += from->btree_compact_rewrite;
to->btree_row_internal += from->btree_row_internal;
to->btree_row_leaf += from->btree_row_leaf;
- to->cache_bytes_inuse += from->cache_bytes_inuse;
to->cache_bytes_read += from->cache_bytes_read;
to->cache_bytes_write += from->cache_bytes_write;
to->cache_eviction_checkpoint += from->cache_eviction_checkpoint;
@@ -319,6 +318,7 @@ __wt_stat_dsrc_aggregate_single(
to->cache_write_lookaside += from->cache_write_lookaside;
to->cache_read += from->cache_read;
to->cache_read_lookaside += from->cache_read_lookaside;
+ to->cache_pages_requested += from->cache_pages_requested;
to->cache_write += from->cache_write;
to->cache_write_restore += from->cache_write_restore;
to->cache_eviction_clean += from->cache_eviction_clean;
@@ -433,7 +433,6 @@ __wt_stat_dsrc_aggregate(
WT_STAT_READ(from, btree_compact_rewrite);
to->btree_row_internal += WT_STAT_READ(from, btree_row_internal);
to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf);
- to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint +=
@@ -459,6 +458,8 @@ __wt_stat_dsrc_aggregate(
WT_STAT_READ(from, cache_write_lookaside);
to->cache_read += WT_STAT_READ(from, cache_read);
to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
+ to->cache_pages_requested +=
+ WT_STAT_READ(from, cache_pages_requested);
to->cache_write += WT_STAT_READ(from, cache_write);
to->cache_write_restore += WT_STAT_READ(from, cache_write_restore);
to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
@@ -547,17 +548,25 @@ static const char * const __stats_connection_desc[] = {
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
+ "cache: eviction calls to get a page",
+ "cache: eviction calls to get a page found queue empty",
+ "cache: eviction calls to get a page found queue empty after locking",
"cache: eviction currently operating in aggressive mode",
"cache: eviction server candidate queue empty when topping up",
"cache: eviction server candidate queue not empty when topping up",
"cache: eviction server evicting pages",
"cache: eviction server populating queue, but not evicting pages",
+ "cache: eviction server skipped very large page",
+ "cache: eviction server slept, because we did not make progress with eviction",
"cache: eviction server unable to reach eviction goal",
"cache: eviction worker thread evicting pages",
"cache: failed eviction of pages that exceeded the in-memory maximum",
"cache: files with active eviction walks",
"cache: files with new eviction walks started",
"cache: hazard pointer blocked page eviction",
+ "cache: hazard pointer check calls",
+ "cache: hazard pointer check entries walked",
+ "cache: hazard pointer maximum array length",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
"cache: internal pages evicted",
@@ -579,6 +588,7 @@ static const char * const __stats_connection_desc[] = {
"cache: pages queued for urgent eviction",
"cache: pages read into cache",
"cache: pages read into cache requiring lookaside entries",
+ "cache: pages requested from the cache",
"cache: pages seen by eviction walk",
"cache: pages selected for eviction unable to be evicted",
"cache: pages walked for eviction",
@@ -600,6 +610,7 @@ static const char * const __stats_connection_desc[] = {
"connection: pthread mutex condition wait calls",
"connection: pthread mutex shared lock read-lock calls",
"connection: pthread mutex shared lock write-lock calls",
+ "connection: total fsync I/Os",
"connection: total read I/Os",
"connection: total write I/Os",
"cursor: cursor create calls",
@@ -642,7 +653,9 @@ static const char * const __stats_connection_desc[] = {
"log: log server thread advances write LSN",
"log: log server thread write LSN walk skipped",
"log: log sync operations",
+ "log: log sync time duration (usecs)",
"log: log sync_dir operations",
+ "log: log sync_dir time duration (usecs)",
"log: log write operations",
"log: logging bytes consolidated",
"log: maximum log file size",
@@ -664,6 +677,9 @@ static const char * const __stats_connection_desc[] = {
"reconciliation: split objects currently awaiting free",
"session: open cursor count",
"session: open session count",
+ "thread-state: active filesystem fsync calls",
+ "thread-state: active filesystem read calls",
+ "thread-state: active filesystem write calls",
"thread-yield: page acquire busy blocked",
"thread-yield: page acquire eviction blocked",
"thread-yield: page acquire locked blocked",
@@ -680,6 +696,10 @@ static const char * const __stats_connection_desc[] = {
"transaction: transaction checkpoint total time (msecs)",
"transaction: transaction checkpoints",
"transaction: transaction failures due to cache overflow",
+ "transaction: transaction fsync calls for checkpoint after allocating the transaction ID",
+ "transaction: transaction fsync calls for checkpoint before allocating the transaction ID",
+ "transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)",
+ "transaction: transaction fsync duration for checkpoint before allocating the transaction ID (usecs)",
"transaction: transaction range of IDs currently pinned",
"transaction: transaction range of IDs currently pinned by a checkpoint",
"transaction: transaction range of IDs currently pinned by named snapshots",
@@ -750,17 +770,25 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
+ stats->cache_eviction_get_ref = 0;
+ stats->cache_eviction_get_ref_empty = 0;
+ stats->cache_eviction_get_ref_empty2 = 0;
/* not clearing cache_eviction_aggressive_set */
stats->cache_eviction_queue_empty = 0;
stats->cache_eviction_queue_not_empty = 0;
stats->cache_eviction_server_evicting = 0;
stats->cache_eviction_server_not_evicting = 0;
+ stats->cache_eviction_server_toobig = 0;
+ stats->cache_eviction_server_slept = 0;
stats->cache_eviction_slow = 0;
stats->cache_eviction_worker_evicting = 0;
stats->cache_eviction_force_fail = 0;
/* not clearing cache_eviction_walks_active */
stats->cache_eviction_walks_started = 0;
stats->cache_eviction_hazard = 0;
+ stats->cache_hazard_checks = 0;
+ stats->cache_hazard_walks = 0;
+ stats->cache_hazard_max = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
stats->cache_eviction_internal = 0;
@@ -782,6 +810,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_eviction_pages_queued_oldest = 0;
stats->cache_read = 0;
stats->cache_read_lookaside = 0;
+ stats->cache_pages_requested = 0;
stats->cache_eviction_pages_seen = 0;
stats->cache_eviction_fail = 0;
stats->cache_eviction_walk = 0;
@@ -803,6 +832,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cond_wait = 0;
stats->rwlock_read = 0;
stats->rwlock_write = 0;
+ stats->fsync_io = 0;
stats->read_io = 0;
stats->write_io = 0;
stats->cursor_create = 0;
@@ -845,7 +875,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->log_write_lsn = 0;
stats->log_write_lsn_skip = 0;
stats->log_sync = 0;
+ stats->log_sync_duration = 0;
stats->log_sync_dir = 0;
+ stats->log_sync_dir_duration = 0;
stats->log_writes = 0;
stats->log_slot_consolidated = 0;
/* not clearing log_max_filesize */
@@ -867,6 +899,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing rec_split_stashed_objects */
/* not clearing session_cursor_open */
/* not clearing session_open */
+ /* not clearing fsync_active */
+ /* not clearing read_active */
+ /* not clearing write_active */
stats->page_busy_blocked = 0;
stats->page_forcible_evict_blocked = 0;
stats->page_locked_blocked = 0;
@@ -883,6 +918,10 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing txn_checkpoint_time_total */
stats->txn_checkpoint = 0;
stats->txn_fail_cache = 0;
+ stats->txn_checkpoint_fsync_post = 0;
+ stats->txn_checkpoint_fsync_pre = 0;
+ stats->txn_checkpoint_fsync_post_duration = 0;
+ stats->txn_checkpoint_fsync_pre_duration = 0;
/* not clearing txn_pinned_range */
/* not clearing txn_pinned_checkpoint_range */
/* not clearing txn_pinned_snapshot_range */
@@ -904,6 +943,8 @@ void
__wt_stat_connection_aggregate(
WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to)
{
+ int64_t v;
+
to->lsm_work_queue_app += WT_STAT_READ(from, lsm_work_queue_app);
to->lsm_work_queue_manager +=
WT_STAT_READ(from, lsm_work_queue_manager);
@@ -944,6 +985,12 @@ __wt_stat_connection_aggregate(
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint +=
WT_STAT_READ(from, cache_eviction_checkpoint);
+ to->cache_eviction_get_ref +=
+ WT_STAT_READ(from, cache_eviction_get_ref);
+ to->cache_eviction_get_ref_empty +=
+ WT_STAT_READ(from, cache_eviction_get_ref_empty);
+ to->cache_eviction_get_ref_empty2 +=
+ WT_STAT_READ(from, cache_eviction_get_ref_empty2);
to->cache_eviction_aggressive_set +=
WT_STAT_READ(from, cache_eviction_aggressive_set);
to->cache_eviction_queue_empty +=
@@ -954,6 +1001,10 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, cache_eviction_server_evicting);
to->cache_eviction_server_not_evicting +=
WT_STAT_READ(from, cache_eviction_server_not_evicting);
+ to->cache_eviction_server_toobig +=
+ WT_STAT_READ(from, cache_eviction_server_toobig);
+ to->cache_eviction_server_slept +=
+ WT_STAT_READ(from, cache_eviction_server_slept);
to->cache_eviction_slow += WT_STAT_READ(from, cache_eviction_slow);
to->cache_eviction_worker_evicting +=
WT_STAT_READ(from, cache_eviction_worker_evicting);
@@ -965,6 +1016,10 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, cache_eviction_walks_started);
to->cache_eviction_hazard +=
WT_STAT_READ(from, cache_eviction_hazard);
+ to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks);
+ to->cache_hazard_walks += WT_STAT_READ(from, cache_hazard_walks);
+ if ((v = WT_STAT_READ(from, cache_hazard_max)) > to->cache_hazard_max)
+ to->cache_hazard_max = v;
to->cache_inmem_splittable +=
WT_STAT_READ(from, cache_inmem_splittable);
to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
@@ -999,6 +1054,8 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, cache_eviction_pages_queued_oldest);
to->cache_read += WT_STAT_READ(from, cache_read);
to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
+ to->cache_pages_requested +=
+ WT_STAT_READ(from, cache_pages_requested);
to->cache_eviction_pages_seen +=
WT_STAT_READ(from, cache_eviction_pages_seen);
to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail);
@@ -1021,6 +1078,7 @@ __wt_stat_connection_aggregate(
to->cond_wait += WT_STAT_READ(from, cond_wait);
to->rwlock_read += WT_STAT_READ(from, rwlock_read);
to->rwlock_write += WT_STAT_READ(from, rwlock_write);
+ to->fsync_io += WT_STAT_READ(from, fsync_io);
to->read_io += WT_STAT_READ(from, read_io);
to->write_io += WT_STAT_READ(from, write_io);
to->cursor_create += WT_STAT_READ(from, cursor_create);
@@ -1065,7 +1123,10 @@ __wt_stat_connection_aggregate(
to->log_write_lsn += WT_STAT_READ(from, log_write_lsn);
to->log_write_lsn_skip += WT_STAT_READ(from, log_write_lsn_skip);
to->log_sync += WT_STAT_READ(from, log_sync);
+ to->log_sync_duration += WT_STAT_READ(from, log_sync_duration);
to->log_sync_dir += WT_STAT_READ(from, log_sync_dir);
+ to->log_sync_dir_duration +=
+ WT_STAT_READ(from, log_sync_dir_duration);
to->log_writes += WT_STAT_READ(from, log_writes);
to->log_slot_consolidated +=
WT_STAT_READ(from, log_slot_consolidated);
@@ -1090,6 +1151,9 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, rec_split_stashed_objects);
to->session_cursor_open += WT_STAT_READ(from, session_cursor_open);
to->session_open += WT_STAT_READ(from, session_open);
+ to->fsync_active += WT_STAT_READ(from, fsync_active);
+ to->read_active += WT_STAT_READ(from, read_active);
+ to->write_active += WT_STAT_READ(from, write_active);
to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked);
to->page_forcible_evict_blocked +=
WT_STAT_READ(from, page_forcible_evict_blocked);
@@ -1115,6 +1179,14 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, txn_checkpoint_time_total);
to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint);
to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache);
+ to->txn_checkpoint_fsync_post +=
+ WT_STAT_READ(from, txn_checkpoint_fsync_post);
+ to->txn_checkpoint_fsync_pre +=
+ WT_STAT_READ(from, txn_checkpoint_fsync_pre);
+ to->txn_checkpoint_fsync_post_duration +=
+ WT_STAT_READ(from, txn_checkpoint_fsync_post_duration);
+ to->txn_checkpoint_fsync_pre_duration +=
+ WT_STAT_READ(from, txn_checkpoint_fsync_pre_duration);
to->txn_pinned_range += WT_STAT_READ(from, txn_pinned_range);
to->txn_pinned_checkpoint_range +=
WT_STAT_READ(from, txn_pinned_checkpoint_range);
@@ -1126,9 +1198,11 @@ __wt_stat_connection_aggregate(
}
static const char * const __stats_join_desc[] = {
- ": accesses",
- ": actual count of items",
+ ": accesses to the main table",
": bloom filter false positives",
+ ": checks that conditions of membership are satisfied",
+ ": items inserted into a bloom filter",
+ ": items iterated",
};
int
@@ -1148,9 +1222,11 @@ __wt_stat_join_init_single(WT_JOIN_STATS *stats)
void
__wt_stat_join_clear_single(WT_JOIN_STATS *stats)
{
- stats->accesses = 0;
- stats->actual_count = 0;
+ stats->main_access = 0;
stats->bloom_false_positive = 0;
+ stats->membership_check = 0;
+ stats->bloom_insert = 0;
+ stats->iterated = 0;
}
void
@@ -1166,7 +1242,9 @@ void
__wt_stat_join_aggregate(
WT_JOIN_STATS **from, WT_JOIN_STATS *to)
{
- to->accesses += WT_STAT_READ(from, accesses);
- to->actual_count += WT_STAT_READ(from, actual_count);
+ to->main_access += WT_STAT_READ(from, main_access);
to->bloom_false_positive += WT_STAT_READ(from, bloom_false_positive);
+ to->membership_check += WT_STAT_READ(from, membership_check);
+ to->bloom_insert += WT_STAT_READ(from, bloom_insert);
+ to->iterated += WT_STAT_READ(from, iterated);
}
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 9d5975b2bc5..dd4384d9a9a 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -346,6 +346,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
if (WT_TXNID_LT(txn_global->last_running, last_running)) {
txn_global->last_running = last_running;
+#ifdef HAVE_VERBOSE
/* Output a verbose message about long-running transactions,
* but only when some progress is being made. */
if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
@@ -358,6 +359,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
oldest_session->lastop,
oldest_session->txn.snap_min));
}
+#endif
}
done: WT_TRET(__wt_writeunlock(session, txn_global->scan_rwlock));
@@ -522,7 +524,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
*/
if (F_ISSET(txn, WT_TXN_SYNC_SET))
WT_RET_MSG(session, EINVAL,
- "Sync already set during begin_transaction.");
+ "Sync already set during begin_transaction");
if (WT_STRING_MATCH("background", cval.str, cval.len))
txn->txn_logsync = WT_LOG_BACKGROUND;
else if (WT_STRING_MATCH("off", cval.str, cval.len))
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 5c0c55963a3..51d26b9aed6 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -350,6 +350,7 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session,
static int
__txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
+ struct timespec fsync_start, fsync_stop;
struct timespec start, stop, verb_timer;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -359,6 +360,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN_STATE *txn_state;
void *saved_meta_next;
u_int i;
+ uint64_t fsync_duration_usecs;
bool full, idle, logging, tracking;
const char *txn_cfg[] = { WT_CONFIG_BASE(session,
WT_SESSION_begin_transaction), "isolation=snapshot", NULL };
@@ -425,7 +427,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* completion. Do it after flushing the pages to give the
* asynchronous flush as much time as possible before we wait.
*/
+ WT_ERR(__wt_epoch(session, &fsync_start));
WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
+ WT_ERR(__wt_epoch(session, &fsync_stop));
+ fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
+ WT_STAT_FAST_CONN_INCR(session, txn_checkpoint_fsync_pre);
+ WT_STAT_FAST_CONN_INCRV(session,
+ txn_checkpoint_fsync_pre_duration, fsync_duration_usecs);
/* Tell logging that we are about to start a database checkpoint. */
if (full && logging)
@@ -524,7 +532,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* Checkpoints have to hit disk (it would be reasonable to configure for
* lazy checkpoints, but we don't support them yet).
*/
+ WT_ERR(__wt_epoch(session, &fsync_start));
WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
+ WT_ERR(__wt_epoch(session, &fsync_stop));
+ fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
+ WT_STAT_FAST_CONN_INCR(session, txn_checkpoint_fsync_post);
+ WT_STAT_FAST_CONN_INCRV(session,
+ txn_checkpoint_fsync_post_duration, fsync_duration_usecs);
WT_ERR(__checkpoint_verbose_track(session,
"sync completed", &verb_timer));
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index da2670fb344..470515244f3 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -156,6 +156,7 @@ err: __wt_logrec_free(session, &logrec);
int
__wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
+ WT_DECL_RET;
WT_ITEM *logrec;
WT_TXN *txn;
WT_TXN_OP *op;
@@ -179,24 +180,25 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
switch (op->type) {
case WT_TXN_OP_BASIC:
- return (__txn_op_log(session, logrec, op, cbt));
+ ret = __txn_op_log(session, logrec, op, cbt);
+ break;
case WT_TXN_OP_INMEM:
case WT_TXN_OP_REF:
/* Nothing to log, we're done. */
- return (0);
+ break;
case WT_TXN_OP_TRUNCATE_COL:
- return (__wt_logop_col_truncate_pack(session, logrec,
+ ret = __wt_logop_col_truncate_pack(session, logrec,
op->fileid,
- op->u.truncate_col.start, op->u.truncate_col.stop));
+ op->u.truncate_col.start, op->u.truncate_col.stop);
+ break;
case WT_TXN_OP_TRUNCATE_ROW:
- return (__wt_logop_row_truncate_pack(session, txn->logrec,
+ ret = __wt_logop_row_truncate_pack(session, txn->logrec,
op->fileid,
&op->u.truncate_row.start, &op->u.truncate_row.stop,
- (uint32_t)op->u.truncate_row.mode));
- WT_ILLEGAL_VALUE(session);
+ (uint32_t)op->u.truncate_row.mode);
+ break;
}
-
- /* NOTREACHED */
+ return (ret);
}
/*
diff --git a/src/txn/txn_nsnap.c b/src/txn/txn_nsnap.c
index eddcca9248f..5b8fed23a9f 100644
--- a/src/txn/txn_nsnap.c
+++ b/src/txn/txn_nsnap.c
@@ -343,7 +343,7 @@ __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session,
if (!*has_create && !*has_drops)
WT_RET_MSG(session, EINVAL,
"WT_SESSION::snapshot API called without any drop or "
- "name option.");
+ "name option");
return (0);
}
diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c
index 1ea4dba1152..bd004e0f837 100644
--- a/src/txn/txn_recover.c
+++ b/src/txn/txn_recover.c
@@ -424,6 +424,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
false, WT_SESSION_NO_LOGGING, &session));
r.session = session;
+ F_SET(conn, WT_CONN_RECOVERING);
WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config));
WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac));
@@ -566,6 +567,7 @@ err: WT_TRET(__recovery_free(&r));
WT_TRET(__wt_evict_destroy(session));
WT_TRET(session->iface.close(&session->iface, NULL));
+ F_CLR(conn, WT_CONN_RECOVERING);
return (ret);
}
diff --git a/src/utilities/util_backup.c b/src/utilities/util_backup.c
index 190c0878f38..5dc9671fb45 100644
--- a/src/utilities/util_backup.c
+++ b/src/utilities/util_backup.c
@@ -23,7 +23,7 @@ append_target(WT_SESSION *session, const char *target, char **bufp)
static char *buf = NULL;
/* 20 bytes of slop */
- if (remain < strlen(target) + 20) {
+ if (buf == NULL || remain < strlen(target) + 20) {
len += strlen(target) + 512;
remain += strlen(target) + 512;
if ((buf = realloc(buf, len)) == NULL)
diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c
index aedd9168fbd..da70aea35be 100644
--- a/src/utilities/util_dump.c
+++ b/src/utilities/util_dump.c
@@ -7,25 +7,21 @@
*/
#include "util.h"
+#include "util_dump.h"
-static int dump_config(WT_SESSION *, const char *, bool);
+static int dump_config(WT_SESSION *, const char *, bool, bool);
static int dump_json_begin(WT_SESSION *);
static int dump_json_end(WT_SESSION *);
static int dump_json_separator(WT_SESSION *);
-static int dump_json_table_begin(
- WT_SESSION *, WT_CURSOR *, const char *, const char *);
-static int dump_json_table_cg(
- WT_SESSION *, WT_CURSOR *, const char *, const char *, const char *);
-static int dump_json_table_config(WT_SESSION *, const char *);
static int dump_json_table_end(WT_SESSION *);
-static int dump_prefix(WT_SESSION *, bool);
+static int dump_prefix(WT_SESSION *, bool, bool);
static int dump_record(WT_CURSOR *, bool, bool);
-static int dump_suffix(WT_SESSION *);
-static int dump_table_config(WT_SESSION *, WT_CURSOR *, const char *);
+static int dump_suffix(WT_SESSION *, bool);
+static int dump_table_config(WT_SESSION *, WT_CURSOR *, const char *, bool);
static int dump_table_config_complex(
- WT_SESSION *, WT_CURSOR *, WT_CURSOR *, const char *, const char *);
+ WT_SESSION *, WT_CURSOR *, WT_CURSOR *, const char *, const char *, bool);
static int dup_json_string(const char *, char **);
-static int print_config(WT_SESSION *, const char *, char *[]);
+static int print_config(WT_SESSION *, const char *, char *[], bool, bool);
static int usage(void);
int
@@ -78,7 +74,9 @@ util_dump(WT_SESSION *session, int argc, char *argv[])
if (argc < 1 || (argc != 1 && !json))
return (usage());
- if (json && (ret = dump_json_begin(session)) != 0)
+ if (json &&
+ ((ret = dump_json_begin(session)) != 0 ||
+ (ret = dump_prefix(session, hex, json)) != 0))
goto err;
for (i = 0; i < argc; i++) {
@@ -91,9 +89,7 @@ util_dump(WT_SESSION *session, int argc, char *argv[])
if ((name = util_name(session, argv[i], "table")) == NULL)
goto err;
- if (json && dump_json_table_config(session, name) != 0)
- goto err;
- if (!json && dump_config(session, name, hex) != 0)
+ if (dump_config(session, name, hex, json) != 0)
goto err;
len =
@@ -142,7 +138,7 @@ err: ret = 1;
* Dump the config for the uri.
*/
static int
-dump_config(WT_SESSION *session, const char *uri, bool hex)
+dump_config(WT_SESSION *session, const char *uri, bool hex, bool json)
{
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -162,9 +158,9 @@ dump_config(WT_SESSION *session, const char *uri, bool hex)
*/
cursor->set_key(cursor, uri);
if ((ret = cursor->search(cursor)) == 0) {
- if (dump_prefix(session, hex) != 0 ||
- dump_table_config(session, cursor, uri) != 0 ||
- dump_suffix(session) != 0)
+ if ((!json && dump_prefix(session, hex, json) != 0) ||
+ dump_table_config(session, cursor, uri, json) != 0 ||
+ dump_suffix(session, json) != 0)
ret = 1;
} else if (ret == WT_NOTFOUND)
ret = util_err(session, 0, "%s: No such object exists", uri);
@@ -217,225 +213,6 @@ dump_json_separator(WT_SESSION *session)
}
/*
- * dump_json_table_begin --
- * Output the JSON syntax that starts a table, along with its config.
- */
-static int
-dump_json_table_begin(
- WT_SESSION *session, WT_CURSOR *cursor, const char *uri, const char *config)
-{
- WT_DECL_RET;
- const char *name;
- char *jsonconfig;
-
- jsonconfig = NULL;
-
- /* Get the table name. */
- if ((name = strchr(uri, ':')) == NULL) {
- fprintf(stderr, "%s: %s: corrupted uri\n", progname, uri);
- return (1);
- }
- ++name;
-
- if ((ret = dup_json_string(config, &jsonconfig)) != 0)
- return (util_cerr(cursor, "config dup", ret));
- if (printf(" \"%s\" : [\n {\n", uri) < 0)
- goto eio;
- if (printf(" \"config\" : \"%s\",\n", jsonconfig) < 0)
- goto eio;
-
- if ((ret = dump_json_table_cg(
- session, cursor, name, "colgroup:", "colgroups")) == 0) {
- if (printf(",\n") < 0)
- goto eio;
- ret = dump_json_table_cg(
- session, cursor, name, "index:", "indices");
- }
-
- if (printf("\n },\n {\n \"data\" : [") < 0)
- goto eio;
-
- if (0) {
-eio: ret = util_err(session, EIO, NULL);
- }
-
- free(jsonconfig);
- return (ret);
-}
-
-/*
- * dump_json_table_cg --
- * Dump the column groups or indices for a table.
- */
-static int
-dump_json_table_cg(WT_SESSION *session, WT_CURSOR *cursor,
- const char *name, const char *entry, const char *header)
-{
- static const char * const indent = " ";
- WT_DECL_RET;
- int exact;
- bool once;
- const char *key, *skip, *value;
- char *jsonconfig;
-
- once = false;
- if (printf(" \"%s\" : [", header) < 0)
- return (util_err(session, EIO, NULL));
-
- /*
- * For table dumps, we're done.
- */
- if (cursor == NULL) {
- if (printf("]") < 0)
- return (util_err(session, EIO, NULL));
- else
- return (0);
- }
-
- /*
- * Search the file looking for column group and index key/value pairs:
- * for each one, look up the related source information and append it
- * to the base record.
- */
- cursor->set_key(cursor, entry);
- if ((ret = cursor->search_near(cursor, &exact)) != 0) {
- if (ret == WT_NOTFOUND)
- return (0);
- return (util_cerr(cursor, "search_near", ret));
- }
- if (exact >= 0)
- goto match;
- while ((ret = cursor->next(cursor)) == 0) {
-match: if ((ret = cursor->get_key(cursor, &key)) != 0)
- return (util_cerr(cursor, "get_key", ret));
-
- /* Check if we've finished the list of entries. */
- if (!WT_PREFIX_MATCH(key, entry))
- break;
-
- /* Check for a table name match. */
- skip = key + strlen(entry);
- if (strncmp(
- skip, name, strlen(name)) != 0 || skip[strlen(name)] != ':')
- continue;
-
- /* Get the value. */
- if ((ret = cursor->get_value(cursor, &value)) != 0)
- return (util_cerr(cursor, "get_value", ret));
-
- if ((ret = dup_json_string(value, &jsonconfig)) != 0)
- return (util_cerr(cursor, "config dup", ret));
- ret = printf("%s\n"
- "%s{\n"
- "%s \"uri\" : \"%s\",\n"
- "%s \"config\" : \"%s\"\n"
- "%s}",
- once ? "," : "",
- indent, indent, key, indent, jsonconfig, indent);
- free(jsonconfig);
- if (ret < 0)
- return (util_err(session, EIO, NULL));
-
- once = true;
- }
- if (printf("%s]", once ? "\n " : "") < 0)
- return (util_err(session, EIO, NULL));
- if (ret == 0 || ret == WT_NOTFOUND)
- return (0);
- return (util_cerr(cursor, "next", ret));
-}
-
-/*
- * dump_json_table_config --
- * Dump the config for the uri.
- */
-static int
-dump_json_table_config(WT_SESSION *session, const char *uri)
-{
- WT_CONFIG_ITEM cval;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- size_t len;
- int tret;
- const char *name, *value;
- char *p;
-
- p = NULL;
-
- /* Get the table name. */
- if ((name = strchr(uri, ':')) == NULL) {
- fprintf(stderr, "%s: %s: corrupted uri\n", progname, uri);
- return (1);
- }
- ++name;
-
- /* Open a metadata cursor. */
- if ((ret = session->open_cursor(
- session, "metadata:create", NULL, NULL, &cursor)) != 0) {
- fprintf(stderr, "%s: %s: session.open_cursor: %s\n",
- progname, "metadata:create",
- session->strerror(session, ret));
- return (1);
- }
-
- /*
- * Search for the object itself, just to make sure it exists, we don't
- * want to output a header if the user entered the wrong name. This is
- * where we find out a table doesn't exist, use a simple error message.
- *
- * Workaround for WiredTiger "simple" table handling. Simple tables
- * have column-group entries, but they aren't listed in the metadata's
- * table entry. Figure out if it's a simple table and in that case,
- * retrieve the column-group entry and use the value from its "source"
- * file.
- */
- if (WT_PREFIX_MATCH(uri, "table:")) {
- len = strlen("colgroup:") + strlen(name) + 1;
- if ((p = malloc(len)) == NULL)
- return (util_err(session, errno, NULL));
- (void)snprintf(p, len, "colgroup:%s", name);
- cursor->set_key(cursor, p);
- if ((ret = cursor->search(cursor)) == 0) {
- if ((ret = cursor->get_value(cursor, &value)) != 0)
- return (util_cerr(cursor, "get_value", ret));
- if ((ret = __wt_config_getones(
- (WT_SESSION_IMPL *)session,
- value, "source", &cval)) != 0)
- return (util_err(
- session, ret, "%s: source entry", p));
- free(p);
- len = cval.len + 10;
- if ((p = malloc(len)) == NULL)
- return (util_err(session, errno, NULL));
- (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str);
- cursor->set_key(cursor, p);
- } else
- cursor->set_key(cursor, uri);
- } else
- cursor->set_key(cursor, uri);
-
- if ((ret = cursor->search(cursor)) == 0) {
- if ((ret = cursor->get_value(cursor, &value)) != 0)
- ret = util_cerr(cursor, "get_value", ret);
- else if (dump_json_table_begin(
- session, cursor, uri, value) != 0)
- ret = 1;
- } else if (ret == WT_NOTFOUND)
- ret = util_err(session, 0, "%s: No such object exists", uri);
- else
- ret = util_err(session, ret, "%s", uri);
-
- if ((tret = cursor->close(cursor)) != 0) {
- tret = util_cerr(cursor, "close", tret);
- if (ret == 0)
- ret = tret;
- }
-
- free(p);
- return (ret);
-}
-
-/*
* dump_json_table_end --
* Output the JSON syntax that ends a table.
*/
@@ -452,7 +229,8 @@ dump_json_table_end(WT_SESSION *session)
* Dump the config for a table.
*/
static int
-dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri)
+dump_table_config(
+ WT_SESSION *session, WT_CURSOR *cursor, const char *uri, bool json)
{
WT_CONFIG_ITEM cval;
WT_CURSOR *srch;
@@ -479,11 +257,11 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri)
*/
cursor->set_key(cursor, uri);
if ((ret = cursor->search(cursor)) != 0)
- return (util_cerr(cursor, "search", ret));
+ WT_ERR(util_cerr(cursor, "search", ret));
if ((ret = cursor->get_value(cursor, &v)) != 0)
- return (util_cerr(cursor, "get_value", ret));
+ WT_ERR(util_cerr(cursor, "get_value", ret));
if ((*--cfg = strdup(v)) == NULL)
- return (util_err(session, errno, NULL));
+ WT_ERR(util_err(session, errno, NULL));
/*
* Workaround for WiredTiger "simple" table handling. Simple tables
@@ -497,37 +275,36 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri)
if (WT_PREFIX_MATCH(uri, "table:")) {
len = strlen("colgroup:") + strlen(name) + 1;
if ((p = malloc(len)) == NULL)
- return (util_err(session, errno, NULL));
+ WT_ERR(util_err(session, errno, NULL));
(void)snprintf(p, len, "colgroup:%s", name);
cursor->set_key(cursor, p);
if ((ret = cursor->search(cursor)) == 0) {
if ((ret = cursor->get_value(cursor, &v)) != 0)
- return (util_cerr(cursor, "get_value", ret));
+ WT_ERR(util_cerr(cursor, "get_value", ret));
if ((*--cfg = strdup(v)) == NULL)
- return (util_err(session, errno, NULL));
+ WT_ERR(util_err(session, errno, NULL));
if ((ret =__wt_config_getones(
(WT_SESSION_IMPL *)session,
*cfg, "source", &cval)) != 0)
- return (util_err(
+ WT_ERR(util_err(
session, ret, "%s: source entry", p));
free(p);
len = cval.len + 10;
if ((p = malloc(len)) == NULL)
- return (util_err(session, errno, NULL));
+ WT_ERR(util_err(session, errno, NULL));
(void)snprintf(p, len, "%.*s", (int)cval.len, cval.str);
cursor->set_key(cursor, p);
if ((ret = cursor->search(cursor)) != 0)
- return (util_cerr(cursor, "search", ret));
+ WT_ERR(util_cerr(cursor, "search", ret));
if ((ret = cursor->get_value(cursor, &v)) != 0)
- return (util_cerr(cursor, "get_value", ret));
+ WT_ERR(util_cerr(cursor, "get_value", ret));
if ((*--cfg = strdup(v)) == NULL)
- return (util_err(session, errno, NULL));
+ WT_ERR(util_err(session, errno, NULL));
} else
complex_table = true;
}
- if (print_config(session, uri, cfg) != 0)
- return (1);
+ WT_ERR(print_config(session, uri, cfg, json, true));
if (complex_table) {
/*
@@ -537,21 +314,24 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri)
*/
if ((ret = session->open_cursor(
session, "metadata:", NULL, NULL, &srch)) != 0)
- return (util_cerr(cursor, "open_cursor", ret));
+ WT_ERR(util_cerr(cursor, "open_cursor", ret));
if ((ret = dump_table_config_complex(
- session, cursor, srch, name, "colgroup:")) == 0)
+ session, cursor, srch, name, "colgroup:", json)) == 0)
ret = dump_table_config_complex(
- session, cursor, srch, name, "index:");
+ session, cursor, srch, name, "index:", json);
if ((tret = srch->close(srch)) != 0) {
tret = util_cerr(cursor, "close", tret);
if (ret == 0)
ret = tret;
}
- }
+ } else if (json && printf(
+ " \"colgroups\" : [],\n"
+ " \"indices\" : []\n") < 0)
+ WT_ERR(util_cerr(cursor, NULL, EIO));
- free(p);
+err: free(p);
free(_cfg[0]);
free(_cfg[1]);
free(_cfg[2]);
@@ -563,17 +343,31 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri)
* Dump the column groups or indices for a table.
*/
static int
-dump_table_config_complex(WT_SESSION *session,
- WT_CURSOR *cursor, WT_CURSOR *srch, const char *name, const char *entry)
+dump_table_config_complex(WT_SESSION *session, WT_CURSOR *cursor,
+ WT_CURSOR *srch, const char *name, const char *entry, bool json)
{
WT_CONFIG_ITEM cval;
WT_DECL_RET;
- const char *key;
+ bool multiple;
+ const char *groupname, *key, *sep;
size_t len;
int exact;
const char *v;
char *p, *cfg[3] = {NULL, NULL, NULL};
+ multiple = false;
+ sep = "";
+
+ if (json) {
+ if (strcmp(entry, "colgroup:") == 0) {
+ groupname = "colgroups";
+ sep = ",";
+ } else {
+ groupname = "indices";
+ }
+ if (printf(" \"%s\" : [", groupname) < 0)
+ return (util_err(session, EIO, NULL));
+ }
/*
* Search the file looking for column group and index key/value pairs:
* for each one, look up the related source information and append it
@@ -594,7 +388,7 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0)
/* Check if we've finished the list of entries. */
if (!WT_PREFIX_MATCH(key, entry))
- return (0);
+ break;
/*
* Check for a table name match. This test will match "simple"
@@ -635,14 +429,19 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0)
if ((cfg[0] = strdup(v)) == NULL)
return (util_err(session, errno, NULL));
+ if (json && printf("%s\n", multiple ? "," : "") < 0)
+ return (util_err(session, EIO, NULL));
/*
* The dumped configuration string is the original key plus the
* source's configuration, where the values of the original key
* override any source configurations of the same name.
*/
- if (print_config(session, key, cfg) != 0)
+ if (print_config(session, key, cfg, json, false) != 0)
return (util_err(session, EIO, NULL));
+ multiple = true;
}
+ if (json && printf("\n ]%s\n", sep) < 0)
+ return (util_err(session, EIO, NULL));
free(cfg[0]);
free(cfg[1]);
@@ -656,18 +455,24 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0)
* Output the dump file header prefix.
*/
static int
-dump_prefix(WT_SESSION *session, bool hex)
+dump_prefix(WT_SESSION *session, bool hex, bool json)
{
int vmajor, vminor, vpatch;
(void)wiredtiger_version(&vmajor, &vminor, &vpatch);
- if (printf(
+ if (!json && (printf(
"WiredTiger Dump (WiredTiger Version %d.%d.%d)\n",
vmajor, vminor, vpatch) < 0 ||
printf("Format=%s\n", hex ? "hex" : "print") < 0 ||
- printf("Header\n") < 0)
+ printf("Header\n") < 0))
+ return (util_err(session, EIO, NULL));
+ else if (json && printf(
+ " \"%s\" : \"%d (%d.%d.%d)\",\n",
+ DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION,
+ vmajor, vminor, vpatch) < 0)
return (util_err(session, EIO, NULL));
+
return (0);
}
@@ -718,10 +523,18 @@ dump_record(WT_CURSOR *cursor, bool reverse, bool json)
* Output the dump file header suffix.
*/
static int
-dump_suffix(WT_SESSION *session)
+dump_suffix(WT_SESSION *session, bool json)
{
- if (printf("Data\n") < 0)
- return (util_err(session, EIO, NULL));
+ if (json) {
+ if (printf(
+ " },\n"
+ " {\n"
+ " \"data\" : [") < 0)
+ return (util_err(session, EIO, NULL));
+ } else {
+ if (printf("Data\n") < 0)
+ return (util_err(session, EIO, NULL));
+ }
return (0);
}
@@ -739,14 +552,15 @@ dup_json_string(const char *str, char **result)
nchars = 0;
for (p = str; *p; p++, nchars++)
- nchars += __wt_json_unpack_char(*p, NULL, 0, false);
+ nchars += __wt_json_unpack_char((u_char)*p, NULL, 0, false);
q = malloc(nchars + 1);
if (q == NULL)
return (1);
*result = q;
left = nchars;
for (p = str; *p; p++, nchars++) {
- nchars = __wt_json_unpack_char(*p, (u_char *)q, left, false);
+ nchars = __wt_json_unpack_char((u_char)*p, (u_char *)q, left,
+ false);
left -= nchars;
q += nchars;
}
@@ -759,21 +573,40 @@ dup_json_string(const char *str, char **result)
* Output a key/value URI pair by combining v1 and v2.
*/
static int
-print_config(WT_SESSION *session, const char *key, char *cfg[])
+print_config(
+ WT_SESSION *session, const char *key, char *cfg[], bool json, bool toplevel)
{
WT_DECL_RET;
- char *value_ret;
+ char *jsonconfig, *value_ret;
/*
* We have all of the object configuration, but don't have the default
* session.create configuration. Have the underlying library add in the
* defaults and collapse it all into one load configuration string.
*/
+ jsonconfig = NULL;
if ((ret = __wt_schema_create_final(
(WT_SESSION_IMPL *)session, cfg, &value_ret)) != 0)
return (util_err(session, ret, NULL));
- ret = printf("%s\n%s\n", key, value_ret);
+ if (json && (ret = dup_json_string(value_ret, &jsonconfig)) != 0) {
+ free(value_ret);
+ return (util_err(session, ret, NULL));
+ }
+ if (json) {
+ if (toplevel)
+ ret = printf(
+ " \"%s\" : [\n {\n "
+ "\"config\" : \"%s\",\n", key, jsonconfig);
+ else
+ ret = printf(
+ " {\n"
+ " \"uri\" : \"%s\",\n"
+ " \"config\" : \"%s\"\n"
+ " }", key, jsonconfig);
+ } else
+ ret = printf("%s\n%s\n", key, value_ret);
free(value_ret);
+ free(jsonconfig);
if (ret < 0)
return (util_err(session, EIO, NULL));
return (0);
diff --git a/src/utilities/util_dump.h b/src/utilities/util_dump.h
new file mode 100644
index 00000000000..e3fd8e6a501
--- /dev/null
+++ b/src/utilities/util_dump.h
@@ -0,0 +1,11 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#define DUMP_JSON_VERSION_MARKER "WiredTiger Dump Version"
+#define DUMP_JSON_CURRENT_VERSION 1
+#define DUMP_JSON_SUPPORTED_VERSION 1
diff --git a/src/utilities/util_load.c b/src/utilities/util_load.c
index 696dc68630a..ac18df80851 100644
--- a/src/utilities/util_load.c
+++ b/src/utilities/util_load.c
@@ -211,6 +211,8 @@ config_list_free(CONFIG_LIST *clp)
free(*entry);
free(clp->list);
clp->list = NULL;
+ clp->entry = 0;
+ clp->max_entry = 0;
}
/*
@@ -366,6 +368,7 @@ config_update(WT_SESSION *session, char **list)
if (WT_PREFIX_MATCH(*listp, "colgroup:") ||
WT_PREFIX_MATCH(*listp, "file:") ||
WT_PREFIX_MATCH(*listp, "index:") ||
+ WT_PREFIX_MATCH(*listp, "lsm:") ||
WT_PREFIX_MATCH(*listp, "table:"))
if (config_rename(session, listp, cmdname))
return (1);
diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c
index 3a1f847a95f..020a4ed9ba9 100644
--- a/src/utilities/util_load_json.c
+++ b/src/utilities/util_load_json.c
@@ -7,6 +7,7 @@
*/
#include "util.h"
+#include "util_dump.h"
#include "util_load.h"
/*
@@ -186,9 +187,8 @@ json_strdup(WT_SESSION *session, JSON_INPUT_STATE *ins, char **resultp)
}
*resultp = result;
resultcpy = result;
- if ((ret = __wt_json_strncpy(&resultcpy, (size_t)resultlen, src,
- srclen))
- != 0) {
+ if ((ret = __wt_json_strncpy(
+ session, &resultcpy, (size_t)resultlen, src, srclen)) != 0) {
ret = util_err(session, ret, NULL);
goto err;
}
@@ -248,7 +248,7 @@ json_data(WT_SESSION *session,
keyformat = cursor->key_format;
isrec = strcmp(keyformat, "r") == 0;
for (nkeys = 0; *keyformat; keyformat++)
- if (!isdigit(*keyformat))
+ if (!__wt_isdigit((u_char)*keyformat))
nkeys++;
recno = 0;
@@ -344,13 +344,16 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags)
{
CONFIG_LIST cl;
WT_DECL_RET;
- int toktype;
static const char *json_markers[] = {
"\"config\"", "\"colgroups\"", "\"indices\"", "\"data\"", NULL };
char *config, *tableuri;
+ int curversion, toktype;
+ bool hasversion;
memset(&cl, 0, sizeof(cl));
tableuri = NULL;
+ hasversion = false;
+
JSON_EXPECT(session, ins, '{');
while (json_peek(session, ins) == 's') {
JSON_EXPECT(session, ins, 's');
@@ -358,6 +361,24 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags)
snprintf(tableuri, ins->toklen, "%.*s",
(int)(ins->toklen - 2), ins->tokstart + 1);
JSON_EXPECT(session, ins, ':');
+ if (!hasversion) {
+ if (strcmp(tableuri, DUMP_JSON_VERSION_MARKER) != 0) {
+ ret = util_err(session, ENOTSUP,
+ "missing \"%s\"", DUMP_JSON_VERSION_MARKER);
+ goto err;
+ }
+ hasversion = true;
+ JSON_EXPECT(session, ins, 's');
+ if ((curversion = atoi(ins->tokstart + 1)) <= 0 ||
+ curversion > DUMP_JSON_SUPPORTED_VERSION) {
+ ret = util_err(session, ENOTSUP,
+ "unsupported JSON dump version \"%.*s\"",
+ (int)(ins->toklen - 1), ins->tokstart + 1);
+ goto err;
+ }
+ JSON_EXPECT(session, ins, ',');
+ continue;
+ }
/*
* Allow any ordering of 'config', 'colgroups',
@@ -406,6 +427,9 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags)
flags)) != 0)
goto err;
config_list_free(&cl);
+ free(ins->kvraw);
+ ins->kvraw = NULL;
+ config_list_free(&cl);
break;
}
else
@@ -447,7 +471,7 @@ json_peek(WT_SESSION *session, JSON_INPUT_STATE *ins)
if (!ins->peeking) {
while (!ins->ateof) {
- while (isspace(*ins->p))
+ while (__wt_isspace((u_char)*ins->p))
ins->p++;
if (*ins->p)
break;
@@ -523,15 +547,14 @@ json_skip(WT_SESSION *session, JSON_INPUT_STATE *ins, const char **matches)
const char *hit;
const char **match;
- if (ins->kvraw != NULL)
- return (1);
-
+ WT_ASSERT((WT_SESSION_IMPL *)session, ins->kvraw == NULL);
hit = NULL;
while (!ins->ateof) {
for (match = matches; *match != NULL; match++)
if ((hit = strstr(ins->p, *match)) != NULL)
goto out;
- if (util_read_line(session, &ins->line, true, &ins->ateof)) {
+ if (util_read_line(session, &ins->line, true, &ins->ateof)
+ != 0) {
ins->toktype = -1;
return (1);
}
diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c
index e18d8d7d1f5..2054b94e3ce 100644
--- a/src/utilities/util_main.c
+++ b/src/utilities/util_main.c
@@ -36,7 +36,6 @@ main(int argc, char *argv[])
conn = NULL;
p = NULL;
- secretkey = NULL;
/* Get the program name. */
if ((progname = strrchr(argv[0], '/')) == NULL)
diff --git a/src/utilities/util_misc.c b/src/utilities/util_misc.c
index f45f6b339f2..3c4e8d2dfa1 100644
--- a/src/utilities/util_misc.c
+++ b/src/utilities/util_misc.c
@@ -108,7 +108,7 @@ util_str2recno(WT_SESSION *session, const char *p, uint64_t *recnop)
* forth -- none of them are OK with us. Check the string starts with
* digit, that turns off the special processing.
*/
- if (!isdigit(p[0]))
+ if (!__wt_isdigit((u_char)p[0]))
goto format;
errno = 0;
diff --git a/src/utilities/util_verify.c b/src/utilities/util_verify.c
index 2df4fa65f43..82bdd780cd3 100644
--- a/src/utilities/util_verify.c
+++ b/src/utilities/util_verify.c
@@ -16,10 +16,10 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
WT_DECL_RET;
size_t size;
int ch;
- bool dump_address, dump_blocks, dump_pages, dump_shape;
+ bool dump_address, dump_blocks, dump_layout, dump_pages;
char *config, *dump_offsets, *name;
- dump_address = dump_blocks = dump_pages = dump_shape = false;
+ dump_address = dump_blocks = dump_layout = dump_pages = false;
config = dump_offsets = name = NULL;
while ((ch = __wt_getopt(progname, argc, argv, "d:")) != EOF)
switch (ch) {
@@ -28,6 +28,8 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
dump_address = true;
else if (strcmp(__wt_optarg, "dump_blocks") == 0)
dump_blocks = true;
+ else if (strcmp(__wt_optarg, "dump_layout") == 0)
+ dump_layout = true;
else if (
WT_PREFIX_MATCH(__wt_optarg, "dump_offsets=")) {
if (dump_offsets != NULL) {
@@ -40,8 +42,6 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
__wt_optarg + strlen("dump_offsets=");
} else if (strcmp(__wt_optarg, "dump_pages") == 0)
dump_pages = true;
- else if (strcmp(__wt_optarg, "dump_shape") == 0)
- dump_shape = true;
else
return (usage());
break;
@@ -60,12 +60,12 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
/* Build the configuration string as necessary. */
if (dump_address ||
- dump_blocks || dump_offsets != NULL || dump_pages || dump_shape) {
+ dump_blocks || dump_layout || dump_offsets != NULL || dump_pages) {
size =
strlen("dump_address,") +
strlen("dump_blocks,") +
+ strlen("dump_layout,") +
strlen("dump_pages,") +
- strlen("dump_shape,") +
strlen("dump_offsets[],") +
(dump_offsets == NULL ? 0 : strlen(dump_offsets)) + 20;
if ((config = malloc(size)) == NULL) {
@@ -76,11 +76,11 @@ util_verify(WT_SESSION *session, int argc, char *argv[])
"%s%s%s%s%s%s%s",
dump_address ? "dump_address," : "",
dump_blocks ? "dump_blocks," : "",
+ dump_layout ? "dump_layout," : "",
dump_offsets != NULL ? "dump_offsets=[" : "",
dump_offsets != NULL ? dump_offsets : "",
dump_offsets != NULL ? "]," : "",
- dump_pages ? "dump_pages," : "",
- dump_shape ? "dump_shape," : "");
+ dump_pages ? "dump_pages," : "");
}
if ((ret = session->verify(session, name, config)) != 0) {
fprintf(stderr, "%s: verify(%s): %s\n",
@@ -109,7 +109,7 @@ usage(void)
"usage: %s %s "
"verify %s\n",
progname, usage_prefix,
- "[-d dump_address | dump_blocks | "
- "dump_offsets=#,# | dump_pages | dump_shape] uri");
+ "[-d dump_address | dump_blocks | dump_layout | "
+ "dump_offsets=#,# | dump_pages] uri");
return (1);
}
diff --git a/test/bloom/Makefile.am b/test/bloom/Makefile.am
index 86d87c70071..81a21f59882 100644
--- a/test/bloom/Makefile.am
+++ b/test/bloom/Makefile.am
@@ -1,9 +1,12 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
t_SOURCES = test_bloom.c
-t_LDADD = $(top_builddir)/libwiredtiger.la
+
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
# Run this during a "make check" smoke test.
@@ -11,4 +14,4 @@ TESTS = $(noinst_PROGRAMS)
LOG_COMPILER = $(TEST_WRAPPER)
clean-local:
- rm -rf WiredTiger* *.core __*
+ rm -rf WiredTiger* *.core
diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c
index f95bc7faaf9..9a7584f951f 100644
--- a/test/bloom/test_bloom.c
+++ b/test/bloom/test_bloom.c
@@ -26,7 +26,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "test_util.i"
+#include "test_util.h"
static struct {
char *progname; /* Program name */
@@ -50,7 +50,8 @@ void cleanup(void);
void populate_entries(void);
void run(void);
void setup(void);
-void usage(void);
+void usage(void)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
extern char *__wt_optarg;
extern int __wt_optind;
@@ -189,9 +190,7 @@ run(void)
* ensure the value doesn't overlap with existing values.
*/
item.size = g.c_key_max + 10;
- item.data = calloc(item.size, 1);
- if (item.data == NULL)
- testutil_die(ENOMEM, "value buffer malloc");
+ item.data = dcalloc(item.size, 1);
memset((void *)item.data, 'a', item.size);
for (i = 0, fp = 0; i < g.c_ops; i++) {
((uint8_t *)item.data)[i % item.size] =
@@ -232,14 +231,10 @@ populate_entries(void)
srand(g.c_srand);
- entries = calloc(g.c_ops, sizeof(uint8_t *));
- if (entries == NULL)
- testutil_die(ENOMEM, "key buffer malloc");
+ entries = dcalloc(g.c_ops, sizeof(uint8_t *));
for (i = 0; i < g.c_ops; i++) {
- entries[i] = calloc(g.c_key_max, sizeof(uint8_t));
- if (entries[i] == NULL)
- testutil_die(ENOMEM, "key buffer malloc 2");
+ entries[i] = dcalloc(g.c_key_max, sizeof(uint8_t));
for (j = 0; j < g.c_key_max; j++)
entries[i][j] = 'a' + ((uint8_t)rand() % 26);
}
diff --git a/test/checkpoint/Makefile.am b/test/checkpoint/Makefile.am
index cf879d046bf..2b5ba800c9c 100644
--- a/test/checkpoint/Makefile.am
+++ b/test/checkpoint/Makefile.am
@@ -1,9 +1,12 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
-t_LDADD = $(top_builddir)/libwiredtiger.la
t_SOURCES = checkpointer.c workers.c test_checkpoint.c
+
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
TESTS = smoke.sh
diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c
index c5524b3c63e..6293d36f916 100644
--- a/test/checkpoint/test_checkpoint.c
+++ b/test/checkpoint/test_checkpoint.c
@@ -32,7 +32,8 @@ GLOBAL g;
static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
-static void onint(int);
+static void onint(int)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static int cleanup(void);
static int usage(void);
static int wt_connect(const char *);
@@ -61,8 +62,7 @@ main(int argc, char *argv[])
working_dir = NULL;
ttype = MIX;
g.checkpoint_name = "WiredTigerCheckpoint";
- if ((g.home = malloc(512)) == NULL)
- testutil_die(ENOMEM, "Unable to allocate memory");
+ g.home = dmalloc(512);
g.nkeys = 10000;
g.nops = 100000;
g.ntables = 3;
diff --git a/test/checkpoint/test_checkpoint.h b/test/checkpoint/test_checkpoint.h
index 09edaeb84bc..0d0d02447d5 100644
--- a/test/checkpoint/test_checkpoint.h
+++ b/test/checkpoint/test_checkpoint.h
@@ -26,19 +26,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <sys/types.h>
-#include <sys/time.h>
+#include "test_util.h"
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "test_util.i"
#define URI_BASE "table:__wt" /* File name */
diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am
new file mode 100644
index 00000000000..f842bc1316f
--- /dev/null
+++ b/test/csuite/Makefile.am
@@ -0,0 +1,27 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/test/utility
+LDADD = $(top_builddir)/test/utility/libtest_util.la \
+ $(top_builddir)/libwiredtiger.la
+AM_LDFLAGS = -static
+
+test_wt1965_col_efficiency_SOURCES = wt1965_col_efficiency/main.c
+noinst_PROGRAMS = test_wt1965_col_efficiency
+
+test_wt2246_col_append_SOURCES = wt2246_col_append/main.c
+noinst_PROGRAMS += test_wt2246_col_append
+
+test_wt2535_insert_race_SOURCES = wt2535_insert_race/main.c
+noinst_PROGRAMS += test_wt2535_insert_race
+
+test_wt2447_join_main_table_SOURCES = wt2447_join_main_table/main.c
+noinst_PROGRAMS += test_wt2447_join_main_table
+
+test_wt2592_join_schema_SOURCES = wt2592_join_schema/main.c
+noinst_PROGRAMS += test_wt2592_join_schema
+
+# Run this during a "make check" smoke test.
+TESTS = $(noinst_PROGRAMS)
+LOG_COMPILER = $(TEST_WRAPPER)
+
+clean-local:
+ rm -rf WT_TEST.* *.core
diff --git a/test/csuite/wt1965_col_efficiency/main.c b/test/csuite/wt1965_col_efficiency/main.c
new file mode 100644
index 00000000000..2882ce9cdf5
--- /dev/null
+++ b/test/csuite/wt1965_col_efficiency/main.c
@@ -0,0 +1,186 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+/*
+ * JIRA ticket reference: WT-1965
+ * Test case description: The reported issue was that column store tables
+ * exhibit high CPU usage when populated with sparse record IDs.
+ * Failure mode: It isn't simple to make this test case failure explicit since
+ * it is demonstrating an inefficiency rather than a correctness bug.
+ */
+
+void (*custom_die)(void) = NULL;
+
+/* If changing field count also need to change set_value and get_value calls */
+#define NR_FIELDS 8
+#define NR_OBJECTS 100
+#define NR_THREADS 4
+
+static uint64_t g_ts = 0;
+
+/*
+ * Each thread inserts a set of keys into the record store database. The keys
+ * are generated in such a way that there are large gaps in the key range.
+ */
+static void *
+thread_func(void *arg)
+{
+ TEST_OPTS *opts;
+ WT_CURSOR *cursor, *idx_cursor;
+ WT_SESSION *session;
+ uint64_t i, ins_rotor, ins_thr_idx, thr_idx, ts;
+ uint64_t *obj_data;
+
+ opts = (TEST_OPTS *)arg;
+ thr_idx = __wt_atomic_fetch_addv64(&opts->next_threadid, 1);
+ ts = g_ts;
+ obj_data = dcalloc(
+ (NR_OBJECTS/NR_THREADS + 1) * NR_FIELDS, sizeof(*obj_data));
+
+ testutil_check(opts->conn->open_session(
+ opts->conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(
+ session, opts->uri, NULL, NULL, &cursor));
+ testutil_check(session->open_cursor(
+ session, "table:index", NULL, NULL, &idx_cursor));
+
+ for (ins_rotor = 1; ins_rotor < 10; ++ins_rotor) {
+ for (ins_thr_idx = thr_idx, i = 0; ins_thr_idx < NR_OBJECTS;
+ ins_thr_idx += NR_THREADS, i += NR_FIELDS) {
+
+ testutil_check(
+ session->begin_transaction(session, "sync=false"));
+
+ cursor->set_key(cursor, ins_thr_idx << 40 | ins_rotor);
+ cursor->set_value(cursor, ts,
+ obj_data[i+0], obj_data[i+1], obj_data[i+2],
+ obj_data[i+3], obj_data[i+4], obj_data[i+5],
+ obj_data[i+6], obj_data[i+7]);
+ testutil_check(cursor->insert(cursor));
+
+ idx_cursor->set_key(
+ idx_cursor, ins_thr_idx << 40 | ts);
+ idx_cursor->set_value(idx_cursor, ins_rotor);
+ testutil_check(idx_cursor->insert(idx_cursor));
+
+ testutil_check(
+ session->commit_transaction(session, NULL));
+
+ /* change object fields */
+ ++obj_data[i + ((ins_thr_idx + ins_rotor) % NR_FIELDS)];
+ ++obj_data[i +
+ ((ins_thr_idx + ins_rotor + 1) % NR_FIELDS)];
+
+ ++g_ts;
+ /* 5K updates/sec */
+ (void)usleep(1000000ULL * NR_THREADS / 5000);
+ }
+ }
+
+ testutil_check(session->close(session, NULL));
+ free(obj_data);
+ return (NULL);
+}
+
+int
+main(int argc, char *argv[])
+{
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ pthread_t thr[NR_THREADS];
+ size_t t;
+ uint64_t f[NR_FIELDS], r, ts;
+ int i, ret;
+ char table_format[256];
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, NULL,
+ "create,cache_size=1G,checkpoint=(wait=30),"
+ "eviction_trigger=80,eviction_target=64,eviction_dirty_target=65,"
+ "log=(enabled,file_max=10M),"
+ "transaction_sync=(enabled=true,method=none)", &opts->conn));
+ testutil_check(opts->conn->open_session(
+ opts->conn, NULL, NULL, &session));
+
+ sprintf(table_format, "key_format=r,value_format=");
+ for (i = 0; i < NR_FIELDS; i++)
+ strcat(table_format, "Q");
+
+ /* recno -> timestamp + NR_FIELDS * Q */
+ testutil_check(session->create(
+ session, opts->uri, table_format));
+ /* timestamp -> recno */
+ testutil_check(session->create(session,
+ "table:index", "key_format=Q,value_format=Q"));
+
+ testutil_check(session->close(session, NULL));
+
+ for (t = 0; t < NR_THREADS; ++t)
+ testutil_check(pthread_create(
+ &thr[t], NULL, thread_func, (void *)opts));
+
+ for (t = 0; t < NR_THREADS; ++t)
+ (void)pthread_join(thr[t], NULL);
+
+ testutil_check(opts->conn->open_session(
+ opts->conn, NULL, NULL, &session));
+
+ /* recno -> timestamp + NR_FIELDS * Q */
+ testutil_check(session->create(session, opts->uri, table_format));
+
+ testutil_check(session->open_cursor(
+ session, opts->uri, NULL, NULL, &cursor));
+
+ while ((ret = cursor->next(cursor)) == 0) {
+ testutil_check(cursor->get_key(cursor, &r));
+ testutil_check(cursor->get_value(cursor, &ts,
+ &f[0], &f[1], &f[2], &f[3], &f[4], &f[5], &f[6], &f[7]));
+
+ if (!opts->verbose)
+ continue;
+
+ printf("(%" PRIu64 ",%llu)\t\t%" PRIu64,
+ (r >> 40), r & ((1ULL << 40) - 1), ts);
+
+ for (i = 0; i < NR_FIELDS; i++)
+ printf("\t%" PRIu64, f[i]);
+ printf("\n");
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+
+ testutil_cleanup(opts);
+
+ return (0);
+}
diff --git a/test/csuite/wt2246_col_append/main.c b/test/csuite/wt2246_col_append/main.c
new file mode 100644
index 00000000000..798970cbb6d
--- /dev/null
+++ b/test/csuite/wt2246_col_append/main.c
@@ -0,0 +1,158 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+/*
+ * JIRA ticket reference: WT-2246
+ * Test case description: The column-store search routine used to search the
+ * target leaf page even when the cursor is configured with append and we're
+ * allocating a record number. That was inefficient, this test case
+ * demonstrates the inefficiency.
+ * Failure mode: It isn't simple to make this test case failure explicit since
+ * it is demonstrating an inefficiency rather than a correctness bug.
+ */
+
+/* Don't move into shared function there is a cross platform solution */
+#include <signal.h>
+
+#define MILLION 1000000
+
+void (*custom_die)(void) = NULL;
+
+/* Needs to be global for signal handling. */
+static TEST_OPTS *opts, _opts;
+
+static void
+page_init(uint64_t n)
+{
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t recno, vrecno;
+ char buf[64];
+
+ conn = opts->conn;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(
+ session->open_cursor(session, opts->uri, NULL, "append", &cursor));
+
+ vrecno = 0;
+ buf[0] = '\2';
+ for (recno = 1;; ++recno) {
+ if (opts->table_type == TABLE_FIX)
+ cursor->set_value(cursor, buf[0]);
+ else {
+ if (recno % 3 == 0)
+ ++vrecno;
+ snprintf(buf,
+ sizeof(buf), "%" PRIu64 " VALUE ------", vrecno);
+ cursor->set_value(cursor, buf);
+ }
+ testutil_check(cursor->insert(cursor));
+ testutil_check(cursor->get_key(cursor, &opts->max_inserted_id));
+ if (opts->max_inserted_id >= n)
+ break;
+ }
+}
+
+/*
+ * TODO: Platform specific?
+ */
+static void
+onsig(int signo)
+{
+ WT_UNUSED(signo);
+ opts->running = false;
+}
+
+#define N_APPEND_THREADS 6
+#define N_RECORDS (20 * WT_MILLION)
+
+int
+main(int argc, char *argv[])
+{
+ WT_SESSION *session;
+ clock_t ce, cs;
+ pthread_t idlist[100];
+ uint64_t i, id;
+ char buf[100];
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ opts->table_type = TABLE_ROW;
+ opts->n_append_threads = N_APPEND_THREADS;
+ opts->nrecords = N_RECORDS;
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ snprintf(buf, sizeof(buf),
+ "create,"
+ "cache_size=%s,"
+ "eviction=(threads_max=5),"
+ "statistics=(fast)",
+ opts->table_type == TABLE_FIX ? "500MB" : "2GB");
+ testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn));
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ snprintf(buf, sizeof(buf),
+ "key_format=r,value_format=%s,"
+ "allocation_size=4K,leaf_page_max=64K",
+ opts->table_type == TABLE_FIX ? "8t" : "S");
+ testutil_check(session->create(session, opts->uri, buf));
+ testutil_check(session->close(session, NULL));
+
+ page_init(5000);
+
+ /* Force to disk and re-open. */
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ testutil_check(wiredtiger_open(opts->home, NULL, NULL, &opts->conn));
+
+ (void)signal(SIGINT, onsig);
+
+ cs = clock();
+ id = 0;
+ for (i = 0; i < opts->n_append_threads; ++i, ++id) {
+ printf("append: %" PRIu64 "\n", id);
+ testutil_check(pthread_create(
+ &idlist[id], NULL, thread_append, (void *)opts));
+ }
+
+ for (i = 0; i < id; ++i)
+ testutil_check(pthread_join(idlist[i], NULL));
+
+ ce = clock();
+ printf("%" PRIu64 "M records: %.2lf processor seconds\n",
+ opts->max_inserted_id / MILLION,
+ (ce - cs) / (double)CLOCKS_PER_SEC);
+
+ testutil_cleanup(opts);
+ /* NOTREACHED */
+
+ return (0);
+}
diff --git a/test/csuite/wt2447_join_main_table/main.c b/test/csuite/wt2447_join_main_table/main.c
new file mode 100644
index 00000000000..a6f19cb0858
--- /dev/null
+++ b/test/csuite/wt2447_join_main_table/main.c
@@ -0,0 +1,189 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+/*
+ * JIRA ticket reference: WT-2447
+ *
+ * Test case description: This test case is adapted from the submitted test
+ * program in the JIRA ticket. We create a database of 10,000 entries, with
+ * every key i having pair of values (i, i). Create indices on both values,
+ * and establish a join: table.v1 >= 5000 AND table.v2 < 5001. There's a
+ * Bloom filter on v2. We expect that although we iterate from 5000 to
+ * 10000, we'll only have accesses to the main table for key 5000, as
+ * 5001-10000 will generally not be in the Bloom filter. For key 5000,
+ * we technically have two accesses to the main table - one occurs when we
+ * see key 5000 is in the Bloom filter, and we need to do a full test, we
+ * make an access to the projection table:tablename(v2), that's just to get
+ * the value of v2, which we'll check by comparison to the cursor at 5001.
+ * That counts as a main table access, and when we see it is satisfied and
+ * return the complete set of values, we'll access the main table with the
+ * full projection (that's the second main table access).
+ *
+ * Failure mode: Before fixes of WT-2447, we saw lots of accesses to the main
+ * table.
+ */
+
+void (*custom_die)(void) = NULL;
+
+#define N_RECORDS 10000
+
+static void
+get_stat_total(WT_SESSION *session, WT_CURSOR *jcursor, const char *descmatch,
+ uint64_t *pval)
+{
+ WT_CURSOR *statcursor;
+ uint64_t val;
+ int ret;
+ bool match;
+ char *desc, *valstr;
+
+ match = false;
+ *pval = 0;
+ testutil_check(session->open_cursor(session, "statistics:join", jcursor,
+ NULL, &statcursor));
+
+ while ((ret = statcursor->next(statcursor)) == 0) {
+ testutil_assert(statcursor->get_value(
+ statcursor, &desc, &valstr, &val) == 0);
+
+ printf("statistics: %s: %s: %" PRIu64 "\n", desc, valstr, val);
+
+ if (strstr(desc, descmatch) != NULL) {
+ *pval += val;
+ match = true;
+ }
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_check(statcursor->close(statcursor));
+ testutil_assert(match);
+}
+
+int
+main(int argc, char *argv[])
+{
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *cursor1, *cursor2, *jcursor;
+ WT_ITEM d;
+ WT_SESSION *session;
+ uint64_t maincount;
+ int half, i, j;
+ const char *tablename;
+ char bloom_cfg[128], index1uri[256], index2uri[256], joinuri[256];
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ tablename = strchr(opts->uri, ':');
+ testutil_assert(tablename != NULL);
+ tablename++;
+ snprintf(index1uri, sizeof(index1uri), "index:%s:index1", tablename);
+ snprintf(index2uri, sizeof(index2uri), "index:%s:index2", tablename);
+ snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri);
+
+ testutil_check(wiredtiger_open(opts->home, NULL,
+ "statistics=(all),create", &opts->conn));
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(session->create(session, opts->uri,
+ "key_format=i,value_format=iiu,columns=(k,v1,v2,d)"));
+ testutil_check(session->create(session, index1uri, "columns=(v1)"));
+ testutil_check(session->create(session, index2uri, "columns=(v2)"));
+
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL,
+ &cursor1));
+
+ d.size = 4100;
+ d.data = dmalloc(d.size);
+ memset((char *)d.data, 7, d.size);
+
+ for (i = 0; i < N_RECORDS; ++i)
+ {
+ cursor1->set_key(cursor1, i);
+ cursor1->set_value(cursor1, i, i, &d);
+ testutil_check(cursor1->insert(cursor1));
+ }
+
+ free((void*)d.data);
+
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ testutil_check(wiredtiger_open(opts->home, NULL,
+ "statistics=(all),create,cache_size=1GB", &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL,
+ &session));
+
+ testutil_check(session->open_cursor(session, index1uri, NULL, NULL,
+ &cursor1));
+ testutil_check(session->open_cursor(session, index2uri, NULL, NULL,
+ &cursor2));
+
+ half = N_RECORDS / 2;
+ cursor1->set_key(cursor1, half);
+ testutil_check(cursor1->search(cursor1));
+
+ cursor2->set_key(cursor2, half + 1);
+ testutil_check(cursor2->search(cursor2));
+
+ sprintf(bloom_cfg, "compare=lt,strategy=bloom,count=%d", half);
+
+ testutil_check(session->open_cursor(session, joinuri, NULL, NULL,
+ &jcursor));
+ testutil_check(session->join(session, jcursor, cursor1, "compare=ge"));
+ testutil_check(session->join(session, jcursor, cursor2, bloom_cfg));
+
+ /* Expect one value returned */
+ testutil_assert(jcursor->next(jcursor) == 0);
+ i = 0;
+ testutil_assert(jcursor->get_key(jcursor, &i) == 0);
+ testutil_assert(i == (int)half);
+ i = j = 0;
+ memset(&d, 0, sizeof(d));
+ testutil_assert(jcursor->get_value(jcursor, &i, &j, &d) == 0);
+ testutil_assert(i == (int)half);
+ testutil_assert(j == (int)half);
+ testutil_assert(d.size == 4100);
+ for (i = 0; i < 4100; i++)
+ testutil_assert(((char *)d.data)[i] == 7);
+
+ testutil_assert(jcursor->next(jcursor) == WT_NOTFOUND);
+
+ /*
+ * Make sure there have been 2 accesses to the main table,
+ * explained in the discussion above.
+ */
+ get_stat_total(session, jcursor, "accesses to the main table",
+ &maincount);
+ testutil_assert(maincount == 2);
+
+ testutil_cleanup(opts);
+
+ return (0);
+}
diff --git a/test/csuite/wt2535_insert_race/main.c b/test/csuite/wt2535_insert_race/main.c
new file mode 100644
index 00000000000..5eaca3279b6
--- /dev/null
+++ b/test/csuite/wt2535_insert_race/main.c
@@ -0,0 +1,159 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+/*
+ * JIRA ticket reference: WT-2535
+ * Test case description: This is a test case that looks for lost updates to
+ * a single record. That is multiple threads each do the same number of read
+ * modify write operations on a single record. At the end verify that the
+ * data contains the expected value.
+ * Failure mode: Check that the data is correct at the end of the run.
+ */
+
+void (*custom_die)(void) = NULL;
+
+void *thread_insert_race(void *);
+
+int
+main(int argc, char *argv[])
+{
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *c;
+ WT_SESSION *session;
+ clock_t ce, cs;
+ pthread_t id[100];
+ uint64_t current_value;
+ int i;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ opts->nthreads = 10;
+ opts->nrecords = 1000;
+ opts->table_type = TABLE_ROW;
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, NULL,
+ "create,"
+ "cache_size=2G,"
+ "eviction=(threads_max=5),"
+ "statistics=(fast)", &opts->conn));
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, opts->uri,
+ "key_format=Q,value_format=Q,"
+ "leaf_page_max=32k,"));
+
+ /* Create the single record. */
+ testutil_check(
+ session->open_cursor(session, opts->uri, NULL, NULL, &c));
+ c->set_key(c, 1);
+ c->set_value(c, 0);
+ testutil_check(c->insert(c));
+ testutil_check(c->close(c));
+ cs = clock();
+ for (i = 0; i < (int)opts->nthreads; ++i) {
+ testutil_check(pthread_create(
+ &id[i], NULL, thread_insert_race, (void *)opts));
+ }
+ while (--i >= 0)
+ testutil_check(pthread_join(id[i], NULL));
+ testutil_check(
+ session->open_cursor(session, opts->uri, NULL, NULL, &c));
+ c->set_key(c, 1);
+ testutil_check(c->search(c));
+ testutil_check(c->get_value(c, &current_value));
+ if (current_value != opts->nthreads * opts->nrecords) {
+ fprintf(stderr,
+ "ERROR: didn't get expected number of changes\n");
+ fprintf(stderr, "got: %" PRIu64 ", expected: %" PRIu64 "\n",
+ current_value, opts->nthreads * opts->nrecords);
+ return (EXIT_FAILURE);
+ }
+ testutil_check(session->close(session, NULL));
+ ce = clock();
+ printf("%" PRIu64 ": %.2lf\n",
+ opts->nrecords, (ce - cs) / (double)CLOCKS_PER_SEC);
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
+}
+
+/*
+ * Append to a table in a "racy" fashion - that is attempt to insert the
+ * same record another thread is likely to also be inserting.
+ */
+void *
+thread_insert_race(void *arg)
+{
+ TEST_OPTS *opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t i, value;
+ int ret;
+
+ opts = (TEST_OPTS *)arg;
+ conn = opts->conn;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(
+ session, opts->uri, NULL, NULL, &cursor));
+
+ printf("Running insert thread\n");
+ for (i = 0; i < opts->nrecords; ++i) {
+ testutil_check(
+ session->begin_transaction(session, "isolation=snapshot"));
+ cursor->set_key(cursor, 1);
+ testutil_check(cursor->search(cursor));
+ testutil_check(cursor->get_value(cursor, &value));
+ cursor->set_key(cursor, 1);
+ cursor->set_value(cursor, value + 1);
+ if ((ret = cursor->update(cursor)) != 0) {
+ if (ret == WT_ROLLBACK) {
+ testutil_check(session->rollback_transaction(
+ session, NULL));
+ i--;
+ continue;
+ }
+ printf("Error in update: %d\n", ret);
+ }
+ testutil_check(session->commit_transaction(session, NULL));
+ if (i % 10000 == 0) {
+ printf("insert: %" PRIu64 "\r", i);
+ fflush(stdout);
+ }
+ }
+ if (i > 10000)
+ printf("\n");
+
+ opts->running = false;
+
+ return (NULL);
+}
diff --git a/test/csuite/wt2592_join_schema/main.c b/test/csuite/wt2592_join_schema/main.c
new file mode 100644
index 00000000000..4ffc9194646
--- /dev/null
+++ b/test/csuite/wt2592_join_schema/main.c
@@ -0,0 +1,222 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+/*
+ * JIRA ticket reference: WT-2592
+ * Test case description: This is an adaptation of the join parts of
+ * ex_schema.c, but written as a test. Though we have join tests in the
+ * Python test suite, the Python API uses raw mode for cursors, so errors
+ * that are specific to non-raw mode are undetected in Python.
+ * Failure mode: The failure seen in WT-2592 was that no items were returned
+ * by a join.
+ */
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <wiredtiger.h>
+
+/* The C struct for the data we are storing in a WiredTiger table. */
+typedef struct {
+ char country[5];
+ uint16_t year;
+ uint64_t population;
+} POP_RECORD;
+
+static POP_RECORD pop_data[] = {
+ { "AU", 1900, 4000000 },
+ { "AU", 1950, 8267337 },
+ { "AU", 2000, 19053186 },
+ { "CAN", 1900, 5500000 },
+ { "CAN", 1950, 14011422 },
+ { "CAN", 2000, 31099561 },
+ { "UK", 1900, 369000000 },
+ { "UK", 1950, 50127000 },
+ { "UK", 2000, 59522468 },
+ { "USA", 1900, 76212168 },
+ { "USA", 1950, 150697361 },
+ { "USA", 2000, 301279593 },
+ { "", 0, 0 }
+};
+
+void (*custom_die)(void) = NULL;
+
+int
+main(int argc, char *argv[])
+{
+ POP_RECORD *p;
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *country_cursor, *country_cursor2, *cursor, *join_cursor,
+ *subjoin_cursor, *year_cursor;
+ WT_SESSION *session;
+ const char *country, *tablename;
+ char countryuri[256], joinuri[256], yearuri[256];
+ uint64_t recno, population;
+ uint16_t year;
+ int count, ret;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ tablename = strchr(opts->uri, ':');
+ testutil_assert(tablename != NULL);
+ tablename++;
+ snprintf(countryuri, sizeof(countryuri), "index:%s:country", tablename);
+ snprintf(yearuri, sizeof(yearuri), "index:%s:year", tablename);
+ snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri);
+
+ testutil_check(wiredtiger_open(opts->home, NULL,
+ "create,cache_size=200M", &opts->conn));
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, opts->uri,
+ "key_format=r,"
+ "value_format=5sHQ,"
+ "columns=(id,country,year,population)"));
+
+ /* Create an index with a simple key. */
+ testutil_check(session->create(session,
+ countryuri, "columns=(country)"));
+
+ /* Create an immutable index. */
+ testutil_check(session->create(session,
+ yearuri, "columns=(year),immutable"));
+
+ /* Insert the records into the table. */
+ testutil_check(session->open_cursor(
+ session, opts->uri, NULL, "append", &cursor));
+ count = 1;
+ for (p = pop_data; p->year != 0; p++) {
+ cursor->set_key(cursor, count);
+ cursor->set_value(cursor, p->country, p->year, p->population);
+ testutil_check(cursor->insert(cursor));
+ count++;
+ }
+ testutil_check(cursor->close(cursor));
+
+ /* Open cursors needed by the join. */
+ testutil_check(session->open_cursor(session,
+ joinuri, NULL, NULL, &join_cursor));
+ testutil_check(session->open_cursor(session,
+ countryuri, NULL, NULL, &country_cursor));
+ testutil_check(session->open_cursor(session,
+ yearuri, NULL, NULL, &year_cursor));
+
+ /* select values WHERE country == "AU" AND year > 1900 */
+ country_cursor->set_key(country_cursor, "AU\0\0\0");
+ testutil_check(country_cursor->search(country_cursor));
+ testutil_check(session->join(session, join_cursor, country_cursor,
+ "compare=eq,count=10"));
+ year_cursor->set_key(year_cursor, (uint16_t)1900);
+ testutil_check(year_cursor->search(year_cursor));
+ testutil_check(session->join(session, join_cursor, year_cursor,
+ "compare=gt,count=10,strategy=bloom"));
+
+ count = 0;
+ /* List the values that are joined */
+ while ((ret = join_cursor->next(join_cursor)) == 0) {
+ testutil_check(join_cursor->get_key(join_cursor, &recno));
+ testutil_check(join_cursor->get_value(join_cursor, &country,
+ &year, &population));
+ printf("ID %" PRIu64, recno);
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
+ country, year, population);
+ count++;
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_assert(count == 2);
+
+ testutil_check(join_cursor->close(join_cursor));
+ testutil_check(year_cursor->close(year_cursor));
+ testutil_check(country_cursor->close(country_cursor));
+
+ /* Open cursors needed by the join. */
+ testutil_check(session->open_cursor(session,
+ joinuri, NULL, NULL, &join_cursor));
+ testutil_check(session->open_cursor(session,
+ joinuri, NULL, NULL, &subjoin_cursor));
+ testutil_check(session->open_cursor(session,
+ countryuri, NULL, NULL, &country_cursor));
+ testutil_check(session->open_cursor(session,
+ countryuri, NULL, NULL, &country_cursor2));
+ testutil_check(session->open_cursor(session,
+ yearuri, NULL, NULL, &year_cursor));
+
+ /*
+ * select values WHERE (country == "AU" OR country == "UK")
+ * AND year > 1900
+ *
+ * First, set up the join representing the country clause.
+ */
+ country_cursor->set_key(country_cursor, "AU\0\0\0");
+ testutil_check(country_cursor->search(country_cursor));
+ testutil_check(session->join(session, subjoin_cursor, country_cursor,
+ "operation=or,compare=eq,count=10"));
+ country_cursor2->set_key(country_cursor2, "UK\0\0\0");
+ testutil_check(country_cursor2->search(country_cursor2));
+ testutil_check(session->join(session, subjoin_cursor, country_cursor2,
+ "operation=or,compare=eq,count=10"));
+
+ /* Join that to the top join, and add the year clause */
+ testutil_check(session->join(session, join_cursor, subjoin_cursor,
+ NULL));
+ year_cursor->set_key(year_cursor, (uint16_t)1900);
+ testutil_check(year_cursor->search(year_cursor));
+ testutil_check(session->join(session, join_cursor, year_cursor,
+ "compare=gt,count=10,strategy=bloom"));
+
+ count = 0;
+ /* List the values that are joined */
+ while ((ret = join_cursor->next(join_cursor)) == 0) {
+ testutil_check(join_cursor->get_key(join_cursor, &recno));
+ testutil_check(join_cursor->get_value(join_cursor, &country,
+ &year, &population));
+ printf("ID %" PRIu64, recno);
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
+ country, year, population);
+ count++;
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_assert(count == 4);
+
+ testutil_check(join_cursor->close(join_cursor));
+ testutil_check(subjoin_cursor->close(subjoin_cursor));
+ testutil_check(country_cursor->close(country_cursor));
+ testutil_check(country_cursor2->close(country_cursor2));
+ testutil_check(year_cursor->close(year_cursor));
+ testutil_check(session->close(session, NULL));
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
+}
diff --git a/test/cursor_order/Makefile.am b/test/cursor_order/Makefile.am
index c0c0ed639bf..c98cf1fa047 100644
--- a/test/cursor_order/Makefile.am
+++ b/test/cursor_order/Makefile.am
@@ -1,13 +1,15 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = cursor_order
-cursor_order_LDADD = $(top_builddir)/libwiredtiger.la
-
cursor_order_SOURCES = cursor_order_file.c cursor_order_ops.c cursor_order.c
+
+cursor_order_LDADD = $(top_builddir)/test/utility/libtest_util.la
+cursor_order_LDADD +=$(top_builddir)/libwiredtiger.la
cursor_order_LDFLAGS = -static
TESTS = $(noinst_PROGRAMS)
clean-local:
- rm -rf WiredTiger* wt.* *.core __stats
+ rm -rf WT_TEST *.core
diff --git a/test/cursor_order/cursor_order.c b/test/cursor_order/cursor_order.c
index d8cfc0c1421..aa351e6fea8 100644
--- a/test/cursor_order/cursor_order.c
+++ b/test/cursor_order/cursor_order.c
@@ -34,7 +34,8 @@ static FILE *logfp; /* Log file */
static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
-static void onint(int);
+static void onint(int)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void shutdown(void);
static int usage(void);
static void wt_connect(SHARED_CONFIG *, char *);
diff --git a/test/cursor_order/cursor_order.h b/test/cursor_order/cursor_order.h
index dd49fce124b..98a7d03c6f3 100644
--- a/test/cursor_order/cursor_order.h
+++ b/test/cursor_order/cursor_order.h
@@ -28,7 +28,7 @@
#include <signal.h>
-#include "test_util.i"
+#include "test_util.h"
#define FNAME "file:cursor_order.%03d" /* File name */
diff --git a/test/cursor_order/cursor_order_ops.c b/test/cursor_order/cursor_order_ops.c
index d44505ab2f3..a2185dd123f 100644
--- a/test/cursor_order/cursor_order_ops.c
+++ b/test/cursor_order/cursor_order_ops.c
@@ -59,22 +59,16 @@ ops_start(SHARED_CONFIG *cfg)
total_nops = 0;
/* Create per-thread structures. */
- if ((run_info = calloc(
- (size_t)(cfg->reverse_scanners + cfg->append_inserters),
- sizeof(*run_info))) == NULL)
- testutil_die(errno, "calloc");
-
- if ((tids = calloc(
- (size_t)(cfg->reverse_scanners + cfg->append_inserters),
- sizeof(*tids))) == NULL)
- testutil_die(errno, "calloc");
+ run_info = dcalloc((size_t)
+ (cfg->reverse_scanners + cfg->append_inserters), sizeof(*run_info));
+ tids = dcalloc((size_t)
+ (cfg->reverse_scanners + cfg->append_inserters), sizeof(*tids));
/* Create the files and load the initial records. */
for (i = 0; i < cfg->append_inserters; ++i) {
run_info[i].cfg = cfg;
if (i == 0 || cfg->multiple_files) {
- if ((run_info[i].name = malloc(64)) == NULL)
- testutil_die(errno, "malloc");
+ run_info[i].name = dmalloc(64);
snprintf(run_info[i].name, 64, FNAME, (int)i);
/* Vary by orders of magnitude */
@@ -96,8 +90,7 @@ ops_start(SHARED_CONFIG *cfg)
offset = i + cfg->append_inserters;
run_info[offset].cfg = cfg;
if (cfg->multiple_files) {
- if ((run_info[offset].name = malloc(64)) == NULL)
- testutil_die(errno, "malloc");
+ run_info[offset].name = dmalloc(64);
/* Have reverse scans read from tables with writes. */
name_index = i % cfg->append_inserters;
snprintf(
diff --git a/test/fops/Makefile.am b/test/fops/Makefile.am
index a4fa7175f1b..f8a76de82bc 100644
--- a/test/fops/Makefile.am
+++ b/test/fops/Makefile.am
@@ -1,10 +1,13 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
-t_LDADD = $(top_builddir)/libwiredtiger.la
t_SOURCES = thread.h file.c fops.c t.c
+
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
# Run this during a "make check" smoke test.
diff --git a/test/fops/fops.c b/test/fops/fops.c
index 3333ff16858..3c4de161423 100644
--- a/test/fops/fops.c
+++ b/test/fops/fops.c
@@ -59,10 +59,8 @@ fop_start(u_int nthreads)
tids = NULL; /* Silence GCC 4.1 warning. */
/* Create statistics and thread structures. */
- if ((run_stats = calloc(
- (size_t)(nthreads), sizeof(*run_stats))) == NULL ||
- (tids = calloc((size_t)(nthreads), sizeof(*tids))) == NULL)
- testutil_die(errno, "calloc");
+ run_stats = dcalloc((size_t)(nthreads), sizeof(*run_stats));
+ tids = dcalloc((size_t)(nthreads), sizeof(*tids));
(void)gettimeofday(&start, NULL);
diff --git a/test/fops/t.c b/test/fops/t.c
index 24994404c7c..bf0588d5a53 100644
--- a/test/fops/t.c
+++ b/test/fops/t.c
@@ -41,7 +41,8 @@ static char home[512];
static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
-static void onint(int);
+static void onint(int)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void shutdown(void);
static int usage(void);
static void wt_startup(char *);
diff --git a/test/fops/thread.h b/test/fops/thread.h
index f9707c14590..89b7984a166 100644
--- a/test/fops/thread.h
+++ b/test/fops/thread.h
@@ -26,25 +26,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <sys/types.h>
-#ifndef _WIN32
-#include <sys/time.h>
-#endif
+#include "test_util.h"
-#include <errno.h>
-#include <inttypes.h>
-#ifndef _WIN32
-#include <pthread.h>
-#endif
#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include "test_util.i"
extern WT_CONNECTION *conn; /* WiredTiger connection */
diff --git a/test/format/Makefile.am b/test/format/Makefile.am
index 8a2e2b49e4b..5d946e5b63d 100644
--- a/test/format/Makefile.am
+++ b/test/format/Makefile.am
@@ -1,21 +1,24 @@
-AM_CPPFLAGS = -I$(top_builddir) \
- -I$(top_srcdir)/src/include -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
if HAVE_BERKELEY_DB
-AM_CPPFLAGS += -DHAVE_BERKELEY_DB \
- -DBERKELEY_DB_PATH=\"$(BERKELEY_DB_PATH)\" -I$(BERKELEY_DB_PATH)/include
+AM_CPPFLAGS +=-DHAVE_BERKELEY_DB
+AM_CPPFLAGS +=-DBERKELEY_DB_PATH=\"$(BERKELEY_DB_PATH)\"
+AM_CPPFLAGS +=-I$(BERKELEY_DB_PATH)/include
endif
noinst_PROGRAMS = t
noinst_SCRIPTS = s_dumpcmp
t_SOURCES =\
- config.h format.h backup.c bulk.c compact.c config.c lrt.c ops.c \
- rebalance.c salvage.c t.c util.c wts.c
+ backup.c bulk.c compact.c config.c lrt.c ops.c rebalance.c \
+ salvage.c t.c util.c wts.c
if HAVE_BERKELEY_DB
t_SOURCES += bdb.c
endif
-t_LDADD = $(top_builddir)/libwiredtiger.la
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
if HAVE_BERKELEY_DB
t_LDADD += -L$(BERKELEY_DB_PATH)/lib -ldb
endif
diff --git a/test/format/backup.c b/test/format/backup.c
index 2b1463bd0e3..69fdf771de9 100644
--- a/test/format/backup.c
+++ b/test/format/backup.c
@@ -38,7 +38,7 @@ check_copy(void)
WT_CONNECTION *conn;
WT_SESSION *session;
- wts_open(g.home_backup, 0, &conn);
+ wts_open(g.home_backup, false, &conn);
testutil_checkfmt(
conn->open_session(conn, NULL, NULL, &session),
@@ -53,27 +53,30 @@ check_copy(void)
/*
* copy_file --
- * Copy a single file into the backup directory.
+ * Copy a single file into the backup directories.
*/
static void
-copy_file(const char *name)
+copy_file(WT_SESSION *session, const char *name)
{
size_t len;
- char *cmd;
-
- len = strlen(g.home) + strlen(g.home_backup) + strlen(name) * 2 + 20;
- cmd = dmalloc(len);
- (void)snprintf(cmd, len,
- "cp %s/%s %s/%s", g.home, name, g.home_backup, name);
- testutil_checkfmt(system(cmd), "backup copy: %s", cmd);
- free(cmd);
-
- len = strlen(g.home) + strlen(g.home_backup2) + strlen(name) * 2 + 20;
- cmd = dmalloc(len);
- (void)snprintf(cmd, len,
- "cp %s/%s %s/%s", g.home, name, g.home_backup2, name);
- testutil_checkfmt(system(cmd), "backup copy: %s", cmd);
- free(cmd);
+ char *first, *second;
+
+ len = strlen("BACKUP") + strlen(name) + 10;
+ first = dmalloc(len);
+ (void)snprintf(first, len, "BACKUP/%s", name);
+ testutil_check(__wt_copy_and_sync(session, name, first));
+
+ /*
+ * Save another copy of the original file to make debugging recovery
+ * errors easier.
+ */
+ len = strlen("BACKUP_COPY") + strlen(name) + 10;
+ second = dmalloc(len);
+ (void)snprintf(second, len, "BACKUP_COPY/%s", name);
+ testutil_check(__wt_copy_and_sync(session, first, second));
+
+ free(first);
+ free(second);
}
/*
@@ -85,10 +88,11 @@ backup(void *arg)
{
WT_CONNECTION *conn;
WT_CURSOR *backup_cursor;
+ WT_DECL_RET;
WT_SESSION *session;
- u_int period;
- int ret;
- const char *key;
+ u_int incremental, period;
+ bool full;
+ const char *config, *key;
(void)(arg);
@@ -102,48 +106,86 @@ backup(void *arg)
testutil_check(conn->open_session(conn, NULL, NULL, &session));
/*
- * Perform a backup at somewhere under 10 seconds (so we get at
- * least one done), and then at 45 second intervals.
+ * Perform a full backup at somewhere under 10 seconds (that way there's
+ * at least one), then at larger intervals, optionally do incremental
+ * backups between full backups.
*/
- for (period = mmrand(NULL, 1, 10);; period = 45) {
+ incremental = 0;
+ for (period = mmrand(NULL, 1, 10);; period = mmrand(NULL, 20, 45)) {
/* Sleep for short periods so we don't make the run wait. */
while (period > 0 && !g.workers_finished) {
--period;
sleep(1);
}
- if (g.workers_finished)
- break;
- /* Lock out named checkpoints */
+ /*
+ * We can't drop named checkpoints while there's a backup in
+ * progress, serialize backups with named checkpoints. Wait
+ * for the checkpoint to complete, otherwise backups might be
+ * starved out.
+ */
testutil_check(pthread_rwlock_wrlock(&g.backup_lock));
+ if (g.workers_finished) {
+ testutil_check(pthread_rwlock_unlock(&g.backup_lock));
+ break;
+ }
- /* Re-create the backup directory. */
- testutil_checkfmt(
- system(g.home_backup_init),
- "%s", "backup directory creation failed");
+ if (incremental) {
+ config = "target=(\"log:\")";
+ full = false;
+ } else {
+ /* Re-create the backup directory. */
+ testutil_checkfmt(
+ system(g.home_backup_init),
+ "%s", "backup directory creation failed");
+
+ config = NULL;
+ full = true;
+ }
/*
- * open_cursor can return EBUSY if a metadata operation is
- * currently happening - retry in that case.
+ * open_cursor can return EBUSY if concurrent with a metadata
+ * operation, retry in that case.
*/
- while ((ret = session->open_cursor(session,
- "backup:", NULL, NULL, &backup_cursor)) == EBUSY)
- sleep(1);
+ while ((ret = session->open_cursor(
+ session, "backup:", NULL, config, &backup_cursor)) == EBUSY)
+ __wt_yield();
if (ret != 0)
testutil_die(ret, "session.open_cursor: backup");
while ((ret = backup_cursor->next(backup_cursor)) == 0) {
testutil_check(
backup_cursor->get_key(backup_cursor, &key));
- copy_file(key);
+ copy_file(session, key);
}
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "backup-cursor");
+
+ /* After an incremental backup, truncate the log files. */
+ if (incremental)
+ testutil_check(session->truncate(
+ session, "log:", backup_cursor, NULL, NULL));
testutil_check(backup_cursor->close(backup_cursor));
testutil_check(pthread_rwlock_unlock(&g.backup_lock));
- check_copy();
+ /*
+ * If automatic log archival isn't configured, optionally do
+ * incremental backups after each full backup. If we're not
+ * doing any more incrementals, verify the backup (we can't
+ * verify intermediate states, once we perform recovery on the
+ * backup database, we can't do any more incremental backups).
+ */
+ if (full)
+ incremental =
+ g.c_logging_archive ? 1 : mmrand(NULL, 1, 5);
+ if (--incremental == 0)
+ check_copy();
}
+ if (incremental != 0)
+ check_copy();
+
testutil_check(session->close(session, NULL));
return (NULL);
diff --git a/test/format/bdb.c b/test/format/bdb.c
index 823fc8ff888..e56281f2c3e 100644
--- a/test/format/bdb.c
+++ b/test/format/bdb.c
@@ -30,7 +30,7 @@
#include "format.h"
static DBT key, value;
-static uint8_t *keybuf;
+static WT_ITEM keyitem;
static int
bdb_compare_reverse(DB *dbp, const DBT *k1, const DBT *k2
@@ -78,7 +78,7 @@ bdb_open(void)
assert(db->cursor(db, NULL, &dbc, 0) == 0);
g.dbc = dbc;
- key_gen_setup(&keybuf);
+ key_gen_setup(&keyitem);
}
void
@@ -95,8 +95,7 @@ bdb_close(void)
assert(db->close(db, 0) == 0);
assert(dbenv->close(dbenv, 0) == 0);
- free(keybuf);
- keybuf = NULL;
+ free(keyitem.mem);
}
void
@@ -107,9 +106,9 @@ bdb_insert(
DBC *dbc;
key.data = (void *)key_data;
- key.size = (uint32_t)key_size;
+ key.size = (u_int32_t)key_size;
value.data = (void *)value_data;
- value.size = (uint32_t)value_size;
+ value.size = (u_int32_t)value_size;
dbc = g.dbc;
@@ -144,12 +143,11 @@ void
bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp)
{
DBC *dbc = g.dbc;
- size_t size;
int ret;
- key_gen(keybuf, &size, keyno);
- key.data = keybuf;
- key.size = (uint32_t)size;
+ key_gen(&keyitem, keyno);
+ key.data = (void *)keyitem.data;
+ key.size = (u_int32_t)keyitem.size;
*notfoundp = 0;
if ((ret = dbc->get(dbc, &key, &value, DB_SET)) != 0) {
@@ -165,25 +163,20 @@ bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp)
void
bdb_update(const void *arg_key, size_t arg_key_size,
- const void *arg_value, size_t arg_value_size, int *notfoundp)
+ const void *arg_value, size_t arg_value_size)
{
DBC *dbc = g.dbc;
int ret;
key.data = (void *)arg_key;
- key.size = (uint32_t)arg_key_size;
+ key.size = (u_int32_t)arg_key_size;
value.data = (void *)arg_value;
- value.size = (uint32_t)arg_value_size;
+ value.size = (u_int32_t)arg_value_size;
- *notfoundp = 0;
- if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) {
- if (ret != DB_NOTFOUND) {
- testutil_die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}",
- (int)key.size, (char *)key.data,
- (int)value.size, (char *)value.data);
- }
- *notfoundp = 1;
- }
+ if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0)
+ testutil_die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}",
+ (int)key.size, (char *)key.data,
+ (int)value.size, (char *)value.data);
}
void
@@ -193,12 +186,12 @@ bdb_remove(uint64_t keyno, int *notfoundp)
size_t size;
int ret;
- key_gen(keybuf, &size, keyno);
- key.data = keybuf;
- key.size = (uint32_t)size;
+ key_gen(&keyitem, keyno);
+ key.data = (void *)keyitem.data;
+ key.size = (u_int32_t)keyitem.size;
bdb_read(keyno, &value.data, &size, notfoundp);
- value.size = (uint32_t)size;
+ value.size = (u_int32_t)size;
if (*notfoundp)
return;
diff --git a/test/format/bulk.c b/test/format/bulk.c
index 64b005d294f..dab23bed404 100644
--- a/test/format/bulk.c
+++ b/test/format/bulk.c
@@ -33,13 +33,12 @@ wts_load(void)
{
WT_CONNECTION *conn;
WT_CURSOR *cursor;
+ WT_DECL_RET;
WT_ITEM key, value;
WT_SESSION *session;
- uint8_t *keybuf, *valbuf;
bool is_bulk;
conn = g.wts_conn;
- keybuf = valbuf = NULL;
testutil_check(conn->open_session(conn, NULL, NULL, &session));
@@ -63,8 +62,8 @@ wts_load(void)
is_bulk ? "bulk,append" : NULL, &cursor));
/* Set up the key/value buffers. */
- key_gen_setup(&keybuf);
- val_gen_setup(NULL, &valbuf);
+ key_gen_setup(&key);
+ val_gen_setup(NULL, &value);
for (;;) {
if (++g.key_cnt > g.c_rows) {
@@ -73,13 +72,11 @@ wts_load(void)
}
/* Report on progress every 100 inserts. */
- if (g.key_cnt % 100 == 0)
+ if (g.key_cnt % 1000 == 0)
track("bulk load", g.key_cnt, NULL);
- key_gen(keybuf, &key.size, (uint64_t)g.key_cnt);
- key.data = keybuf;
- val_gen(NULL, valbuf, &value.size, (uint64_t)g.key_cnt);
- value.data = valbuf;
+ key_gen(&key, g.key_cnt);
+ val_gen(NULL, &value, g.key_cnt);
switch (g.type) {
case FIX:
@@ -88,7 +85,7 @@ wts_load(void)
cursor->set_value(cursor, *(uint8_t *)value.data);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s %" PRIu32 " {0x%02" PRIx8 "}",
+ "%-10s %" PRIu64 " {0x%02" PRIx8 "}",
"bulk V",
g.key_cnt, ((uint8_t *)value.data)[0]);
break;
@@ -98,7 +95,7 @@ wts_load(void)
cursor->set_value(cursor, &value);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s %" PRIu32 " {%.*s}", "bulk V",
+ "%-10s %" PRIu64 " {%.*s}", "bulk V",
g.key_cnt,
(int)value.size, (char *)value.data);
break;
@@ -106,18 +103,40 @@ wts_load(void)
cursor->set_key(cursor, &key);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s %" PRIu32 " {%.*s}", "bulk K",
+ "%-10s %" PRIu64 " {%.*s}", "bulk K",
g.key_cnt, (int)key.size, (char *)key.data);
cursor->set_value(cursor, &value);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s %" PRIu32 " {%.*s}", "bulk V",
+ "%-10s %" PRIu64 " {%.*s}", "bulk V",
g.key_cnt,
(int)value.size, (char *)value.data);
break;
}
- testutil_check(cursor->insert(cursor));
+ /*
+ * We don't want to size the cache to ensure the initial data
+ * set can load in the in-memory case, guaranteeing the load
+ * succeeds probably means future updates are also guaranteed
+ * to succeed, which isn't what we want. If we run out of space
+ * in the initial load, reset the row counter and continue.
+ *
+ * Decrease inserts, they can't be successful if we're at the
+ * cache limit, and increase the delete percentage to get some
+ * extra space once the run starts.
+ */
+ if ((ret = cursor->insert(cursor)) != 0) {
+ if (ret != WT_CACHE_FULL)
+ testutil_die(ret, "cursor.insert");
+ g.rows = --g.key_cnt;
+ g.c_rows = (uint32_t)g.key_cnt;
+
+ if (g.c_insert_pct > 5)
+ g.c_insert_pct = 5;
+ if (g.c_delete_pct < 20)
+ g.c_delete_pct += 20;
+ break;
+ }
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
@@ -133,6 +152,6 @@ wts_load(void)
testutil_check(session->close(session, NULL));
- free(keybuf);
- free(valbuf);
+ free(key.mem);
+ free(value.mem);
}
diff --git a/test/format/compact.c b/test/format/compact.c
index a75ee4f2adf..240e5553697 100644
--- a/test/format/compact.c
+++ b/test/format/compact.c
@@ -36,9 +36,9 @@ void *
compact(void *arg)
{
WT_CONNECTION *conn;
+ WT_DECL_RET;
WT_SESSION *session;
u_int period;
- int ret;
(void)(arg);
diff --git a/test/format/config.c b/test/format/config.c
index 042316d8344..1b09916bd88 100644
--- a/test/format/config.c
+++ b/test/format/config.c
@@ -35,6 +35,7 @@ static void config_encryption(void);
static const char *config_file_type(u_int);
static CONFIG *config_find(const char *, size_t);
static void config_in_memory(void);
+static void config_in_memory_check(void);
static int config_is_perm(const char *);
static void config_isolation(void);
static void config_lrt(void);
@@ -43,6 +44,7 @@ static void config_map_compression(const char *, u_int *);
static void config_map_encryption(const char *, u_int *);
static void config_map_file_type(const char *, u_int *);
static void config_map_isolation(const char *, u_int *);
+static void config_reset(void);
/*
* config_setup --
@@ -54,14 +56,10 @@ config_setup(void)
CONFIG *cp;
/* Clear any temporary values. */
- config_clear();
+ config_reset();
- /*
- * Periodically, run in-memory; don't do it on the first run, all our
- * smoke tests would hit it.
- */
- if (!config_is_perm("in_memory") && g.run_cnt % 20 == 19)
- g.c_in_memory = 1;
+ /* Periodically run in-memory. */
+ config_in_memory();
/*
* Choose a data source type and a file type: they're interrelated (LSM
@@ -145,7 +143,7 @@ config_setup(void)
/* Some data-sources don't support user-specified collations. */
if (DATASOURCE("helium") || DATASOURCE("kvsbdb"))
- g.c_reverse = 0;
+ config_single("reverse=off", 0);
/*
* Periodically, run single-threaded so we can compare the results to
@@ -159,7 +157,6 @@ config_setup(void)
config_compression("compression");
config_compression("logging_compression");
config_encryption();
- config_in_memory();
config_isolation();
config_lrt();
@@ -169,7 +166,7 @@ config_setup(void)
* Don't do it on the first run, all our smoke tests would hit it.
*/
if (!g.replay && g.run_cnt % 10 == 9 && !config_is_perm("delete_pct"))
- g.c_delete_pct = 0;
+ config_single("delete_pct=0", 0);
/*
* If this is an LSM run, set the cache size and crank up the insert
@@ -187,9 +184,12 @@ config_setup(void)
if (!config_is_perm("cache") && g.c_cache < g.c_threads)
g.c_cache = g.c_threads;
+ /* Give in-memory configuration a final review. */
+ config_in_memory_check();
+
/* Make the default maximum-run length 20 minutes. */
if (!config_is_perm("timer"))
- g.c_timer = 20;
+ config_single("timer=20", 0);
/*
* Key/value minimum/maximum are related, correct unless specified by
@@ -329,43 +329,89 @@ config_encryption(void)
/*
* config_in_memory --
- * In-memory configuration.
+ * Periodically set up an in-memory configuration.
*/
static void
config_in_memory(void)
{
+ /*
+ * Configure in-memory before configuring anything else, in-memory has
+ * many related requirements. Don't configure in-memory if there's any
+ * incompatible configurations, so we don't have to configure in-memory
+ * every time we configure something like LSM, that's too painful.
+ */
+ if (config_is_perm("backups"))
+ return;
+ if (config_is_perm("checkpoints"))
+ return;
+ if (config_is_perm("compression"))
+ return;
+ if (config_is_perm("data_source") && DATASOURCE("lsm"))
+ return;
+ if (config_is_perm("logging"))
+ return;
+ if (config_is_perm("rebalance"))
+ return;
+ if (config_is_perm("salvage"))
+ return;
+ if (config_is_perm("verify"))
+ return;
+
+ if (!config_is_perm("in_memory") && mmrand(NULL, 1, 20) == 1)
+ g.c_in_memory = 1;
+}
+
+/*
+ * config_in_memory_check --
+ * In-memory configuration review.
+ */
+static void
+config_in_memory_check(void)
+{
+ uint32_t cache;
+
if (g.c_in_memory == 0)
return;
/* Turn off a lot of stuff. */
if (!config_is_perm("backups"))
- g.c_backups = 0;
+ config_single("backups=off", 0);
if (!config_is_perm("checkpoints"))
- g.c_checkpoints = 0;
- if (!config_is_perm("compression")) {
- g.c_compression = dstrdup("none");
- g.c_compression_flag = COMPRESS_NONE;
- }
+ config_single("checkpoints=off", 0);
+ if (!config_is_perm("compression"))
+ config_single("compression=none", 0);
if (!config_is_perm("logging"))
- g.c_logging = 0;
+ config_single("logging=off", 0);
if (!config_is_perm("rebalance"))
- g.c_rebalance = 0;
+ config_single("rebalance=off", 0);
if (!config_is_perm("salvage"))
- g.c_salvage = 0;
+ config_single("salvage=off", 0);
if (!config_is_perm("verify"))
- g.c_verify = 0;
+ config_single("verify=off", 0);
/*
- * Ensure there is 250MB of cache per thread; keep keys/values small,
- * overflow items aren't an issue for in-memory configurations and it
- * keeps us from overflowing the cache.
+ * Keep keys/values small, overflow items aren't an issue for in-memory
+ * configurations and it keeps us from overflowing the cache.
*/
- if (!config_is_perm("cache"))
- g.c_cache = g.c_threads * 250;
if (!config_is_perm("key_max"))
- g.c_value_max = 64;
+ config_single("key_max=32", 0);
if (!config_is_perm("value_max"))
- g.c_value_max = 128;
+ config_single("value_max=80", 0);
+
+ /*
+ * Size the cache relative to the initial data set, use 2x the base
+ * size as a minimum.
+ */
+ if (!config_is_perm("cache")) {
+ cache = g.c_value_max;
+ if (g.type == ROW)
+ cache += g.c_key_max;
+ cache *= g.c_rows;
+ cache *= 2;
+ cache /= WT_MEGABYTE;
+ if (g.c_cache < cache)
+ g.c_cache = cache;
+ }
}
/*
@@ -413,11 +459,11 @@ config_lrt(void)
* stores.
*/
if (g.type == FIX) {
- if (g.c_long_running_txn && config_is_perm("long_running_txn"))
+ if (config_is_perm("long_running_txn"))
testutil_die(EINVAL,
"long_running_txn not supported with fixed-length "
"column store");
- g.c_long_running_txn = 0;
+ config_single("long_running_txn=off", 0);
}
}
@@ -503,18 +549,36 @@ config_file(const char *name)
/*
* config_clear --
- * Clear per-run values.
+ * Clear all configuration values.
*/
void
config_clear(void)
{
CONFIG *cp;
- /* Clear configuration data. */
+ /* Clear all allocated configuration data. */
+ for (cp = c; cp->name != NULL; ++cp)
+ if (cp->vstr != NULL) {
+ free((void *)*cp->vstr);
+ *cp->vstr = NULL;
+ }
+ free(g.uri);
+ g.uri = NULL;
+}
+
+/*
+ * config_reset --
+ * Clear per-run configuration values.
+ */
+static void
+config_reset(void)
+{
+ CONFIG *cp;
+
+ /* Clear temporary allocated configuration data. */
for (cp = c; cp->name != NULL; ++cp) {
F_CLR(cp, C_TEMP);
- if (!F_ISSET(cp, C_PERM) &&
- F_ISSET(cp, C_STRING) && cp->vstr != NULL) {
+ if (!F_ISSET(cp, C_PERM) && cp->vstr != NULL) {
free((void *)*cp->vstr);
*cp->vstr = NULL;
}
@@ -531,7 +595,7 @@ void
config_single(const char *s, int perm)
{
CONFIG *cp;
- uint32_t v;
+ long v;
char *p;
const char *ep;
@@ -557,43 +621,59 @@ config_single(const char *s, int perm)
exit(EXIT_FAILURE);
}
+ /*
+ * Free the previous setting if a configuration has been
+ * passed in twice.
+ */
+ if (*cp->vstr != NULL) {
+ free(*cp->vstr);
+ *cp->vstr = NULL;
+ }
+
if (strncmp(s, "checksum", strlen("checksum")) == 0) {
config_map_checksum(ep, &g.c_checksum_flag);
- *cp->vstr = strdup(ep);
+ *cp->vstr = dstrdup(ep);
} else if (strncmp(
s, "compression", strlen("compression")) == 0) {
config_map_compression(ep, &g.c_compression_flag);
- *cp->vstr = strdup(ep);
+ *cp->vstr = dstrdup(ep);
} else if (strncmp(
s, "encryption", strlen("encryption")) == 0) {
config_map_encryption(ep, &g.c_encryption_flag);
- *cp->vstr = strdup(ep);
+ *cp->vstr = dstrdup(ep);
} else if (strncmp(s, "isolation", strlen("isolation")) == 0) {
config_map_isolation(ep, &g.c_isolation_flag);
- *cp->vstr = strdup(ep);
+ *cp->vstr = dstrdup(ep);
} else if (strncmp(s, "file_type", strlen("file_type")) == 0) {
config_map_file_type(ep, &g.type);
- *cp->vstr = strdup(config_file_type(g.type));
+ *cp->vstr = dstrdup(config_file_type(g.type));
} else if (strncmp(s, "logging_compression",
strlen("logging_compression")) == 0) {
config_map_compression(ep,
&g.c_logging_compression_flag);
- *cp->vstr = strdup(ep);
+ *cp->vstr = dstrdup(ep);
} else {
free((void *)*cp->vstr);
- *cp->vstr = strdup(ep);
+ *cp->vstr = dstrdup(ep);
}
- if (*cp->vstr == NULL)
- testutil_die(errno, "malloc");
return;
}
- v = (uint32_t)strtoul(ep, &p, 10);
- if (*p != '\0') {
- fprintf(stderr, "%s: %s: illegal numeric value\n",
- g.progname, s);
- exit(EXIT_FAILURE);
+ v = -1;
+ if (F_ISSET(cp, C_BOOL)) {
+ if (strncmp(ep, "off", strlen("off")) == 0)
+ v = 0;
+ else if (strncmp(ep, "on", strlen("on")) == 0)
+ v = 1;
+ }
+ if (v == -1) {
+ v = strtol(ep, &p, 10);
+ if (*p != '\0') {
+ fprintf(stderr, "%s: %s: illegal numeric value\n",
+ g.progname, s);
+ exit(EXIT_FAILURE);
+ }
}
if (F_ISSET(cp, C_BOOL)) {
if (v != 0 && v != 1) {
@@ -607,7 +687,7 @@ config_single(const char *s, int perm)
g.progname, s, cp->min, cp->maxset);
exit(EXIT_FAILURE);
}
- *cp->v = v;
+ *cp->v = (uint32_t)v;
}
/*
diff --git a/test/format/config.h b/test/format/config.h
index a17614bc044..16fffb6fafe 100644
--- a/test/format/config.h
+++ b/test/format/config.h
@@ -294,6 +294,10 @@ static CONFIG c[] = {
"maximum time to run in minutes (default 20 minutes)",
C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_timer, NULL },
+ { "transaction-frequency",
+ "percent operations done inside an explicit transaction",
+ 0x0, 1, 100, 100, &g.c_txn_freq, NULL },
+
{ "value_max",
"maximum size of values",
0x0, 32, 4096, MEGABYTE(10), &g.c_value_max, NULL },
diff --git a/test/format/format.h b/test/format/format.h
index a129c5395fd..ad5f408ac30 100644
--- a/test/format/format.h
+++ b/test/format/format.h
@@ -26,42 +26,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <sys/stat.h>
-#ifndef _WIN32
-#include <sys/time.h>
-#endif
-#include <sys/types.h>
-
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <limits.h>
-#ifndef _WIN32
-#include <pthread.h>
-#endif
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-#include <time.h>
-
-#include "test_util.i"
+#include "test_util.h"
#ifdef BDB
+#include <assert.h>
#include <db.h>
#endif
-#if defined(__GNUC__)
-#define WT_GCC_ATTRIBUTE(x) __attribute__(x)
-#else
-#define WT_GCC_ATTRIBUTE(x)
-#endif
-
#define EXTPATH "../../ext/" /* Extensions path */
#define LZ4_PATH \
@@ -109,7 +80,6 @@ typedef struct {
char *home; /* Home directory */
char *home_backup; /* Hot-backup directory */
- char *home_backup2; /* Saved Hot-backup directory */
char *home_backup_init; /* Initialize backup command */
char *home_bdb; /* BDB directory */
char *home_config; /* Run CONFIG file path */
@@ -145,7 +115,8 @@ typedef struct {
int replay; /* Replaying a run. */
int workers_finished; /* Operations completed */
- pthread_rwlock_t backup_lock; /* Hot backup running */
+ pthread_rwlock_t backup_lock; /* Backup running */
+ pthread_rwlock_t checkpoint_lock; /* Checkpoint running */
WT_RAND_STATE rnd; /* Global RNG state */
@@ -224,6 +195,7 @@ typedef struct {
uint32_t c_statistics_server;
uint32_t c_threads;
uint32_t c_timer;
+ uint32_t c_txn_freq;
uint32_t c_value_max;
uint32_t c_value_min;
uint32_t c_verify;
@@ -288,7 +260,7 @@ typedef struct {
#define TINFO_COMPLETE 2 /* Finished */
#define TINFO_JOINED 3 /* Resolved */
volatile int state; /* state */
-} TINFO WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+} TINFO WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT);
#ifdef HAVE_BERKELEY_DB
void bdb_close(void);
@@ -297,7 +269,7 @@ void bdb_np(int, void *, size_t *, void *, size_t *, int *);
void bdb_open(void);
void bdb_read(uint64_t, void *, size_t *, int *);
void bdb_remove(uint64_t, int *);
-void bdb_update(const void *, size_t, const void *, size_t, int *);
+void bdb_update(const void *, size_t, const void *, size_t);
#endif
void *backup(void *);
@@ -308,25 +280,23 @@ void config_file(const char *);
void config_print(int);
void config_setup(void);
void config_single(const char *, int);
-void *dmalloc(size_t);
-char *dstrdup(const char *);
void fclose_and_clear(FILE **);
-void key_gen(uint8_t *, size_t *, uint64_t);
-void key_gen_insert(WT_RAND_STATE *, uint8_t *, size_t *, uint64_t);
-void key_gen_setup(uint8_t **);
+void key_gen(WT_ITEM *, uint64_t);
+void key_gen_insert(WT_RAND_STATE *, WT_ITEM *, uint64_t);
+void key_gen_setup(WT_ITEM *);
void key_len_setup(void);
void *lrt(void *);
void path_setup(const char *);
-int read_row(WT_CURSOR *, WT_ITEM *, uint64_t, int);
+int read_row(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
uint32_t rng(WT_RAND_STATE *);
void track(const char *, uint64_t, TINFO *);
-void val_gen(WT_RAND_STATE *, uint8_t *, size_t *, uint64_t);
-void val_gen_setup(WT_RAND_STATE *, uint8_t **);
+void val_gen(WT_RAND_STATE *, WT_ITEM *, uint64_t);
+void val_gen_setup(WT_RAND_STATE *, WT_ITEM *);
void wts_close(void);
-void wts_create(void);
void wts_dump(const char *, int);
+void wts_init(void);
void wts_load(void);
-void wts_open(const char *, int, WT_CONNECTION **);
+void wts_open(const char *, bool, WT_CONNECTION **);
void wts_ops(int);
void wts_read_scan(void);
void wts_rebalance(void);
diff --git a/test/format/lrt.c b/test/format/lrt.c
index 451d2f4fa3c..937525522fa 100644
--- a/test/format/lrt.c
+++ b/test/format/lrt.c
@@ -43,17 +43,15 @@ lrt(void *arg)
uint64_t keyno, saved_keyno;
u_int period;
int pinned, ret;
- uint8_t bitfield, *keybuf;
+ uint8_t bitfield;
void *buf;
(void)(arg); /* Unused parameter */
saved_keyno = 0; /* [-Werror=maybe-uninitialized] */
- key_gen_setup(&keybuf);
- memset(&key, 0, sizeof(key));
- key.data = keybuf;
- memset(&value, 0, sizeof(value));
+ key_gen_setup(&key);
+ val_gen_setup(NULL, &value);
buf = NULL;
buf_len = buf_size = 0;
@@ -67,8 +65,8 @@ lrt(void *arg)
for (pinned = 0;;) {
if (pinned) {
/* Re-read the record at the end of the table. */
- while ((ret = read_row(cursor,
- &key, saved_keyno, 1)) == WT_ROLLBACK)
+ while ((ret = read_row(
+ cursor, &key, &value, saved_keyno)) == WT_ROLLBACK)
;
if (ret != 0)
testutil_die(ret,
@@ -112,7 +110,7 @@ lrt(void *arg)
(u_int)(g.key_cnt - g.key_cnt / 10),
(u_int)g.key_cnt);
while ((ret = read_row(cursor,
- &key, saved_keyno, 1)) == WT_ROLLBACK)
+ &key, &value, saved_keyno)) == WT_ROLLBACK)
;
} while (ret == WT_NOTFOUND);
if (ret != 0)
@@ -129,9 +127,8 @@ lrt(void *arg)
if (ret != 0)
testutil_die(ret,
"cursor.get_value: %" PRIu64, saved_keyno);
- if (buf_len < value.size &&
- (buf = realloc(buf, buf_len = value.size)) == NULL)
- testutil_die(errno, "malloc");
+ if (buf_len < value.size)
+ buf = drealloc(buf, buf_len = value.size);
memcpy(buf, value.data, buf_size = value.size);
/*
@@ -142,7 +139,7 @@ lrt(void *arg)
do {
keyno = mmrand(NULL, 1, (u_int)g.key_cnt / 5);
while ((ret = read_row(cursor,
- &key, keyno, 1)) == WT_ROLLBACK)
+ &key, &value, keyno)) == WT_ROLLBACK)
;
} while (ret == WT_NOTFOUND);
if (ret != 0)
@@ -165,7 +162,8 @@ lrt(void *arg)
testutil_check(session->close(session, NULL));
- free(keybuf);
+ free(key.mem);
+ free(value.mem);
free(buf);
return (NULL);
diff --git a/test/format/ops.c b/test/format/ops.c
index 5d66f4d5391..c97d82809a1 100644
--- a/test/format/ops.c
+++ b/test/format/ops.c
@@ -28,14 +28,14 @@
#include "format.h"
-static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *);
-static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *);
-static int col_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
-static int nextprev(WT_CURSOR *, int, int *);
+static int col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *);
+static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t);
+static int col_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
+static int nextprev(WT_CURSOR *, int);
static void *ops(void *);
-static int row_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
-static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *);
-static int row_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
+static int row_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
+static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t);
+static int row_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
static void table_append_init(void);
#ifdef HAVE_BERKELEY_DB
@@ -103,8 +103,7 @@ wts_ops(int lastrun)
}
/* Create thread structure; start the worker threads. */
- if ((tinfo = calloc((size_t)g.c_threads, sizeof(*tinfo))) == NULL)
- testutil_die(errno, "calloc");
+ tinfo = dcalloc((size_t)g.c_threads, sizeof(*tinfo));
for (i = 0; i < g.c_threads; ++i) {
tinfo[i].id = (int)i + 1;
tinfo[i].state = TINFO_RUNNING;
@@ -184,6 +183,7 @@ wts_ops(int lastrun)
(void)pthread_join(compact_tid, NULL);
if (!SINGLETHREADED && g.c_long_running_txn)
(void)pthread_join(lrt_tid, NULL);
+ g.workers_finished = 0;
if (g.logging != 0) {
(void)g.wt_api->msg_printf(g.wt_api, session,
@@ -193,57 +193,229 @@ wts_ops(int lastrun)
}
/*
- * ops_session_config --
- * Return the current session configuration.
+ * isolation_config --
+ * Return an isolation configuration.
*/
-static const char *
-ops_session_config(WT_RAND_STATE *rnd)
+static inline const char *
+isolation_config(WT_RAND_STATE *rnd, bool *iso_snapshotp)
{
u_int v;
- /*
- * The only current session configuration is the isolation level.
- */
if ((v = g.c_isolation_flag) == ISOLATION_RANDOM)
v = mmrand(rnd, 2, 4);
switch (v) {
case ISOLATION_READ_UNCOMMITTED:
+ *iso_snapshotp = false;
return ("isolation=read-uncommitted");
case ISOLATION_READ_COMMITTED:
+ *iso_snapshotp = false;
return ("isolation=read-committed");
case ISOLATION_SNAPSHOT:
default:
+ *iso_snapshotp = true;
return ("isolation=snapshot");
}
}
+typedef struct {
+ uint64_t keyno; /* Row number */
+
+ void *kdata; /* If an insert, the generated key */
+ size_t ksize;
+ size_t kmemsize;
+
+ void *vdata; /* If not a delete, the value */
+ size_t vsize;
+ size_t vmemsize;
+
+ bool deleted; /* Delete operation */
+ bool insert; /* Insert operation */
+} SNAP_OPS;
+
+/*
+ * snap_track --
+ * Add a single snapshot isolation returned value to the list.
+ */
+static void
+snap_track(SNAP_OPS *snap, uint64_t keyno, WT_ITEM *key, WT_ITEM *value)
+{
+ snap->keyno = keyno;
+ if (key == NULL)
+ snap->insert = false;
+ else {
+ snap->insert = true;
+
+ if (snap->kmemsize < key->size) {
+ snap->kdata = drealloc(snap->kdata, key->size);
+ snap->kmemsize = key->size;
+ }
+ memcpy(snap->kdata, key->data, snap->ksize = key->size);
+ }
+ if (value == NULL)
+ snap->deleted = true;
+ else {
+ snap->deleted = false;
+ if (snap->vmemsize < value->size) {
+ snap->vdata = drealloc(snap->vdata, value->size);
+ snap->vmemsize = value->size;
+ }
+ memcpy(snap->vdata, value->data, snap->vsize = value->size);
+ }
+}
+
+/*
+ * snap_check --
+ * Check snapshot isolation operations are repeatable.
+ */
+static int
+snap_check(WT_CURSOR *cursor,
+ SNAP_OPS *start, SNAP_OPS *stop, WT_ITEM *key, WT_ITEM *value)
+{
+ WT_DECL_RET;
+ SNAP_OPS *p;
+ uint8_t bitfield;
+
+ for (; start < stop; ++start) {
+ /* Check for subsequent changes to this record. */
+ for (p = start + 1; p < stop && p->keyno != start->keyno; ++p)
+ ;
+ if (p != stop)
+ continue;
+
+ /*
+ * Retrieve the key/value pair by key. Row-store inserts have a
+ * unique generated key we saved, else generate the key from the
+ * key number.
+ */
+ if (start->insert == 0) {
+ switch (g.type) {
+ case FIX:
+ case VAR:
+ cursor->set_key(cursor, start->keyno);
+ break;
+ case ROW:
+ key_gen(key, start->keyno);
+ cursor->set_key(cursor, key);
+ break;
+ }
+ } else {
+ key->data = start->kdata;
+ key->size = start->ksize;
+ cursor->set_key(cursor, key);
+ }
+ if ((ret = cursor->search(cursor)) == 0) {
+ if (g.type == FIX) {
+ testutil_check(
+ cursor->get_value(cursor, &bitfield));
+ *(uint8_t *)(value->data) = bitfield;
+ value->size = 1;
+ } else
+ testutil_check(
+ cursor->get_value(cursor, value));
+ } else
+ if (ret != WT_NOTFOUND)
+ return (ret);
+
+ /* Check for simple matches. */
+ if (ret == 0 && !start->deleted &&
+ value->size == start->vsize &&
+ memcmp(value->data, start->vdata, value->size) == 0)
+ continue;
+ if (ret == WT_NOTFOUND && start->deleted)
+ continue;
+
+ /*
+ * In fixed length stores, zero values at the end of the key
+ * space are returned as not-found, and not-found row reads
+ * are saved as zero values. Map back-and-forth for simplicity.
+ */
+ if (g.type == FIX) {
+ if (ret == WT_NOTFOUND &&
+ start->vsize == 1 && *(uint8_t *)start->vdata == 0)
+ continue;
+ if (start->deleted &&
+ value->size == 1 && *(uint8_t *)value->data == 0)
+ continue;
+ }
+
+ /* Things went pear-shaped. */
+ switch (g.type) {
+ case FIX:
+ testutil_die(ret,
+ "snap_check: %" PRIu64 " search: "
+ "expected {0x%02x}, found {0x%02x}",
+ start->keyno,
+ start->deleted ? 0 : *(uint8_t *)start->vdata,
+ ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data);
+ /* NOTREACHED */
+ case ROW:
+ testutil_die(ret,
+ "snap_check: %.*s search: "
+ "expected {%.*s}, found {%.*s}",
+ (int)key->size, key->data,
+ start->deleted ?
+ (int)strlen("deleted") : (int)start->vsize,
+ start->deleted ? "deleted" : start->vdata,
+ ret == WT_NOTFOUND ?
+ (int)strlen("deleted") : (int)value->size,
+ ret == WT_NOTFOUND ? "deleted" : value->data);
+ /* NOTREACHED */
+ case VAR:
+ testutil_die(ret,
+ "snap_check: %" PRIu64 " search: "
+ "expected {%.*s}, found {%.*s}",
+ start->keyno,
+ start->deleted ?
+ (int)strlen("deleted") : (int)start->vsize,
+ start->deleted ? "deleted" : start->vdata,
+ ret == WT_NOTFOUND ?
+ (int)strlen("deleted") : (int)value->size,
+ ret == WT_NOTFOUND ? "deleted" : value->data);
+ /* NOTREACHED */
+ }
+ }
+ return (0);
+}
+
+/*
+ * ops --
+ * Per-thread operations.
+ */
static void *
ops(void *arg)
{
+ SNAP_OPS *snap, snap_list[64];
TINFO *tinfo;
WT_CONNECTION *conn;
WT_CURSOR *cursor, *cursor_insert;
+ WT_DECL_RET;
+ WT_ITEM *key, _key, *value, _value;
WT_SESSION *session;
- WT_ITEM key, value;
uint64_t keyno, ckpt_op, reset_op, session_op;
- uint32_t op;
- uint8_t *keybuf, *valbuf;
- u_int np;
- int ckpt_available, dir, insert, intxn, notfound, readonly;
+ uint32_t op, rnd;
+ u_int i;
+ int dir;
char *ckpt_config, ckpt_name[64];
+ bool ckpt_available, intxn, iso_snapshot, positioned, readonly;
tinfo = arg;
conn = g.wts_conn;
- keybuf = valbuf = NULL;
- readonly = 0; /* -Wconditional-uninitialized */
+ readonly = false; /* -Wconditional-uninitialized */
+
+ /* Initialize tracking of snapshot isolation transaction returns. */
+ snap = NULL;
+ iso_snapshot = false;
+ memset(snap_list, 0, sizeof(snap_list));
/* Initialize the per-thread random number generator. */
__wt_random_init(&tinfo->rnd);
/* Set up the default key and value buffers. */
- key_gen_setup(&keybuf);
- val_gen_setup(&tinfo->rnd, &valbuf);
+ key = &_key;
+ key_gen_setup(key);
+ value = &_value;
+ val_gen_setup(&tinfo->rnd, value);
/* Set the first operation where we'll create sessions and cursors. */
session_op = 0;
@@ -252,12 +424,12 @@ ops(void *arg)
/* Set the first operation where we'll perform checkpoint operations. */
ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0;
- ckpt_available = 0;
+ ckpt_available = false;
/* Set the first operation where we'll reset the session. */
reset_op = mmrand(&tinfo->rnd, 100, 10000);
- for (intxn = 0; !tinfo->quit; ++tinfo->ops) {
+ for (intxn = false; !tinfo->quit; ++tinfo->ops) {
/*
* We can't checkpoint or swap sessions/cursors while in a
* transaction, resolve any running transaction.
@@ -267,7 +439,7 @@ ops(void *arg)
testutil_check(
session->commit_transaction(session, NULL));
++tinfo->commit;
- intxn = 0;
+ intxn = false;
}
/* Open up a new session and cursors. */
@@ -276,8 +448,8 @@ ops(void *arg)
if (session != NULL)
testutil_check(session->close(session, NULL));
- testutil_check(conn->open_session(conn, NULL,
- ops_session_config(&tinfo->rnd), &session));
+ testutil_check(
+ conn->open_session(conn, NULL, NULL, &session));
/*
* 10% of the time, perform some read-only operations
@@ -299,7 +471,7 @@ ops(void *arg)
session_op += 250;
/* Checkpoints are read-only. */
- readonly = 1;
+ readonly = true;
} else {
/*
* Open two cursors: one for overwriting and one
@@ -325,21 +497,32 @@ ops(void *arg)
session_op += mmrand(&tinfo->rnd, 100, 5000);
/* Updates supported. */
- readonly = 0;
+ readonly = false;
}
}
/* Checkpoint the database. */
if (tinfo->ops == ckpt_op && g.c_checkpoints) {
/*
- * LSM and data-sources don't support named checkpoints,
+ * Checkpoints are single-threaded inside WiredTiger,
+ * skip our checkpoint if another thread is already
+ * doing one.
+ */
+ ret = pthread_rwlock_trywrlock(&g.checkpoint_lock);
+ if (ret == EBUSY)
+ goto skip_checkpoint;
+ testutil_check(ret);
+
+ /*
+ * LSM and data-sources don't support named checkpoints
* and we can't drop a named checkpoint while there's a
- * cursor open on it, otherwise 20% of the time name the
- * checkpoint.
+ * backup in progress, otherwise name the checkpoint 5%
+ * of the time.
*/
- if (DATASOURCE("helium") || DATASOURCE("kvsbdb") ||
- DATASOURCE("lsm") ||
- readonly || mmrand(&tinfo->rnd, 1, 5) == 1)
+ if (mmrand(&tinfo->rnd, 1, 20) != 1 ||
+ DATASOURCE("helium") ||
+ DATASOURCE("kvsbdb") || DATASOURCE("lsm") ||
+ pthread_rwlock_trywrlock(&g.backup_lock) == EBUSY)
ckpt_config = NULL;
else {
(void)snprintf(ckpt_name, sizeof(ckpt_name),
@@ -347,18 +530,22 @@ ops(void *arg)
ckpt_config = ckpt_name;
}
- /* Named checkpoints lock out backups */
- if (ckpt_config != NULL)
- testutil_check(
- pthread_rwlock_wrlock(&g.backup_lock));
-
- testutil_checkfmt(
- session->checkpoint(session, ckpt_config),
- "%s", ckpt_config == NULL ? "" : ckpt_config);
+ ret = session->checkpoint(session, ckpt_config);
+ /*
+ * We may be trying to create a named checkpoint while
+ * we hold a cursor open to the previous checkpoint.
+ * Tolerate EBUSY.
+ */
+ if (ret != 0 && ret != EBUSY)
+ testutil_die(ret, "%s",
+ ckpt_config == NULL ? "" : ckpt_config);
+ ret = 0;
if (ckpt_config != NULL)
testutil_check(
pthread_rwlock_unlock(&g.backup_lock));
+ testutil_check(
+ pthread_rwlock_unlock(&g.checkpoint_lock));
/* Rephrase the checkpoint name for cursor open. */
if (ckpt_config == NULL)
@@ -367,9 +554,9 @@ ops(void *arg)
else
(void)snprintf(ckpt_name, sizeof(ckpt_name),
"checkpoint=thread-%d", tinfo->id);
- ckpt_available = 1;
+ ckpt_available = true;
- /* Pick the next checkpoint operation. */
+skip_checkpoint: /* Pick the next checkpoint operation. */
ckpt_op += mmrand(&tinfo->rnd, 5000, 20000);
}
@@ -386,21 +573,24 @@ ops(void *arg)
}
/*
- * If we're not single-threaded and we're not in a transaction,
- * start a transaction 20% of the time.
+ * If we're not single-threaded and not in a transaction, choose
+ * an isolation level and start a transaction some percentage of
+ * the time.
*/
if (!SINGLETHREADED &&
- !intxn && mmrand(&tinfo->rnd, 1, 10) >= 8) {
+ !intxn && mmrand(&tinfo->rnd, 1, 100) >= g.c_txn_freq) {
+ testutil_check(
+ session->reconfigure(session,
+ isolation_config(&tinfo->rnd, &iso_snapshot)));
testutil_check(
session->begin_transaction(session, NULL));
- intxn = 1;
- }
- insert = notfound = 0;
+ snap = iso_snapshot ? snap_list : NULL;
+ intxn = true;
+ }
keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows);
- key.data = keybuf;
- value.data = valbuf;
+ positioned = false;
/*
* Perform some number of operations: the percentage of deletes,
@@ -414,27 +604,30 @@ ops(void *arg)
++tinfo->remove;
switch (g.type) {
case ROW:
- /*
- * If deleting a non-existent record, the cursor
- * won't be positioned, and so can't do a next.
- */
- if (row_remove(cursor, &key, keyno, &notfound))
- goto deadlock;
+ ret = row_remove(cursor, key, keyno);
break;
case FIX:
case VAR:
- if (col_remove(cursor, &key, keyno, &notfound))
- goto deadlock;
+ ret = col_remove(cursor, key, keyno);
break;
}
+ if (ret == 0) {
+ positioned = true;
+ if (snap != NULL && (size_t)
+ (snap - snap_list) < WT_ELEMENTS(snap_list))
+ snap_track(snap++, keyno, NULL, NULL);
+ } else {
+ positioned = false;
+ if (ret == WT_ROLLBACK && intxn)
+ goto deadlock;
+ }
} else if (op < g.c_delete_pct + g.c_insert_pct) {
++tinfo->insert;
switch (g.type) {
case ROW:
- if (row_insert(
- tinfo, cursor, &key, &value, keyno))
- goto deadlock;
- insert = 1;
+ key_gen_insert(&tinfo->rnd, key, keyno);
+ val_gen(&tinfo->rnd, value, keyno);
+ ret = row_insert(cursor, key, value, keyno);
break;
case FIX:
case VAR:
@@ -447,37 +640,60 @@ ops(void *arg)
goto skip_insert;
/* Insert, then reset the insert cursor. */
- if (col_insert(tinfo,
- cursor_insert, &key, &value, &keyno))
- goto deadlock;
+ val_gen(&tinfo->rnd, value, g.rows + 1);
+ ret = col_insert(
+ cursor_insert, key, value, &keyno);
testutil_check(
cursor_insert->reset(cursor_insert));
-
- insert = 1;
break;
}
+ positioned = false;
+ if (ret == 0) {
+ if (snap != NULL && (size_t)
+ (snap - snap_list) < WT_ELEMENTS(snap_list))
+ snap_track(snap++, keyno,
+ g.type == ROW ? key : NULL, value);
+ } else
+ if (ret == WT_ROLLBACK && intxn)
+ goto deadlock;
} else if (
op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) {
++tinfo->update;
switch (g.type) {
case ROW:
- if (row_update(
- tinfo, cursor, &key, &value, keyno))
- goto deadlock;
+ key_gen(key, keyno);
+ val_gen(&tinfo->rnd, value, keyno);
+ ret = row_update(cursor, key, value, keyno);
break;
case FIX:
case VAR:
-skip_insert: if (col_update(tinfo,
- cursor, &key, &value, keyno))
- goto deadlock;
+skip_insert: val_gen(&tinfo->rnd, value, keyno);
+ ret = col_update(cursor, key, value, keyno);
break;
}
+ if (ret == 0) {
+ positioned = true;
+ if (snap != NULL && (size_t)
+ (snap - snap_list) < WT_ELEMENTS(snap_list))
+ snap_track(snap++, keyno, NULL, value);
+ } else {
+ positioned = false;
+ if (ret == WT_ROLLBACK && intxn)
+ goto deadlock;
+ }
} else {
++tinfo->search;
- if (read_row(cursor, &key, keyno, 0))
- if (intxn)
+ ret = read_row(cursor, key, value, keyno);
+ if (ret == 0) {
+ positioned = true;
+ if (snap != NULL && (size_t)
+ (snap - snap_list) < WT_ELEMENTS(snap_list))
+ snap_track(snap++, keyno, NULL, value);
+ } else {
+ positioned = false;
+ if (ret == WT_ROLLBACK && intxn)
goto deadlock;
- continue;
+ }
}
/*
@@ -485,55 +701,64 @@ skip_insert: if (col_update(tinfo,
* insert, do a small number of next/prev cursor operations in
* a random direction.
*/
- if (!insert) {
+ if (positioned) {
dir = (int)mmrand(&tinfo->rnd, 0, 1);
- for (np = 0; np < mmrand(&tinfo->rnd, 1, 100); ++np) {
- if (notfound)
- break;
- if (nextprev(cursor, dir, &notfound))
+ for (i = 0; i < mmrand(&tinfo->rnd, 1, 100); ++i) {
+ if ((ret = nextprev(cursor, dir)) == 0)
+ continue;
+ if (ret == WT_ROLLBACK && intxn)
goto deadlock;
+ break;
}
}
- /* Read to confirm the operation. */
- ++tinfo->search;
- if (read_row(cursor, &key, keyno, 0))
- goto deadlock;
-
/* Reset the cursor: there is no reason to keep pages pinned. */
testutil_check(cursor->reset(cursor));
/*
- * If we're in the transaction, commit 40% of the time and
+ * If we're in a transaction, commit 40% of the time and
* rollback 10% of the time.
*/
- if (intxn)
- switch (mmrand(&tinfo->rnd, 1, 10)) {
- case 1: case 2: case 3: case 4: /* 40% */
- testutil_check(session->commit_transaction(
- session, NULL));
- ++tinfo->commit;
- intxn = 0;
- break;
- case 5: /* 10% */
- if (0) {
-deadlock: ++tinfo->deadlock;
- }
- testutil_check(session->rollback_transaction(
- session, NULL));
- ++tinfo->rollback;
- intxn = 0;
- break;
- default:
- break;
+ if (!intxn || (rnd = mmrand(&tinfo->rnd, 1, 10)) > 5)
+ continue;
+
+ /*
+ * Ending the transaction. If in snapshot isolation, repeat the
+ * operations and confirm they're unchanged.
+ */
+ if (snap != NULL && (ret = snap_check(
+ cursor, snap_list, snap, key, value)) == WT_ROLLBACK)
+ goto deadlock;
+
+ switch (rnd) {
+ case 1: case 2: case 3: case 4: /* 40% */
+ testutil_check(
+ session->commit_transaction(session, NULL));
+ ++tinfo->commit;
+ break;
+ case 5: /* 10% */
+ if (0) {
+deadlock: ++tinfo->deadlock;
}
+ testutil_check(
+ session->rollback_transaction(session, NULL));
+ ++tinfo->rollback;
+ break;
+ }
+
+ intxn = false;
+ snap = NULL;
}
if (session != NULL)
testutil_check(session->close(session, NULL));
- free(keybuf);
- free(valbuf);
+ for (i = 0; i < WT_ELEMENTS(snap_list); ++i) {
+ free(snap_list[i].kdata);
+ free(snap_list[i].vdata);
+ }
+ free(key->mem);
+ free(value->mem);
tinfo->state = TINFO_COMPLETE;
return (NULL);
@@ -548,40 +773,47 @@ wts_read_scan(void)
{
WT_CONNECTION *conn;
WT_CURSOR *cursor;
- WT_ITEM key;
+ WT_DECL_RET;
+ WT_ITEM key, value;
WT_SESSION *session;
- uint64_t cnt, last_cnt;
- uint8_t *keybuf;
+ uint64_t keyno, last_keyno;
conn = g.wts_conn;
- /* Set up the default key buffer. */
- key_gen_setup(&keybuf);
+ /* Set up the default key/value buffers. */
+ key_gen_setup(&key);
+ val_gen_setup(NULL, &value);
/* Open a session and cursor pair. */
- testutil_check(conn->open_session(
- conn, NULL, ops_session_config(NULL), &session));
- testutil_check(session->open_cursor(
- session, g.uri, NULL, NULL, &cursor));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(
+ session->open_cursor(session, g.uri, NULL, NULL, &cursor));
/* Check a random subset of the records using the key. */
- for (last_cnt = cnt = 0; cnt < g.key_cnt;) {
- cnt += mmrand(NULL, 1, 17);
- if (cnt > g.rows)
- cnt = g.rows;
- if (cnt - last_cnt > 1000) {
- track("read row scan", cnt, NULL);
- last_cnt = cnt;
+ for (last_keyno = keyno = 0; keyno < g.key_cnt;) {
+ keyno += mmrand(NULL, 1, 17);
+ if (keyno > g.rows)
+ keyno = g.rows;
+ if (keyno - last_keyno > 1000) {
+ track("read row scan", keyno, NULL);
+ last_keyno = keyno;
}
- key.data = keybuf;
- testutil_checkfmt(
- read_row(cursor, &key, cnt, 0), "%s", "read_scan");
+ switch (ret = read_row(cursor, &key, &value, keyno)) {
+ case 0:
+ case WT_NOTFOUND:
+ case WT_ROLLBACK:
+ break;
+ default:
+ testutil_die(
+ ret, "wts_read_scan: read row %" PRIu64, keyno);
+ }
}
testutil_check(session->close(session, NULL));
- free(keybuf);
+ free(key.mem);
+ free(value.mem);
}
/*
@@ -589,10 +821,9 @@ wts_read_scan(void)
* Read and verify a single element in a row- or column-store file.
*/
int
-read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err)
+read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
{
static int sn = 0;
- WT_ITEM value;
WT_SESSION *session;
int exact, ret;
uint8_t bitfield;
@@ -611,7 +842,7 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err)
cursor->set_key(cursor, keyno);
break;
case ROW:
- key_gen((uint8_t *)key->data, &key->size, keyno);
+ key_gen(key, keyno);
cursor->set_key(cursor, key);
break;
}
@@ -628,37 +859,33 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err)
switch (ret) {
case 0:
if (g.type == FIX) {
- ret = cursor->get_value(cursor, &bitfield);
- value.data = &bitfield;
- value.size = 1;
+ testutil_check(cursor->get_value(cursor, &bitfield));
+ *(uint8_t *)(value->data) = bitfield;
+ value->size = 1;
} else
- ret = cursor->get_value(cursor, &value);
+ testutil_check(cursor->get_value(cursor, value));
break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
case WT_NOTFOUND:
- if (notfound_err)
- return (WT_NOTFOUND);
+ /*
+ * In fixed length stores, zero values at the end of the key
+ * space are returned as not found. Treat this the same as
+ * a zero value in the key space, to match BDB's behavior.
+ */
+ if (g.type == FIX) {
+ *(uint8_t *)(value->data) = 0;
+ value->size = 1;
+ ret = 0;
+ }
break;
+ case WT_ROLLBACK:
+ return (WT_ROLLBACK);
default:
testutil_die(ret, "read_row: read row %" PRIu64, keyno);
}
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
- return (0);
-
- /*
- * In fixed length stores, zero values at the end of the key space are
- * returned as not found. Treat this the same as a zero value in the
- * key space, to match BDB's behavior.
- */
- if (ret == WT_NOTFOUND && g.type == FIX) {
- bitfield = 0;
- value.data = &bitfield;
- value.size = 1;
- ret = 0;
- }
+ return (ret);
/* Retrieve the BDB value. */
{
@@ -669,20 +896,20 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err)
/* Check for not-found status. */
if (notfound_chk("read_row", ret, notfound, keyno))
- return (0);
+ return (ret);
/* Compare the two. */
- if (value.size != bdb_value.size ||
- memcmp(value.data, bdb_value.data, value.size) != 0) {
+ if (value->size != bdb_value.size ||
+ memcmp(value->data, bdb_value.data, value->size) != 0) {
fprintf(stderr,
"read_row: value mismatch %" PRIu64 ":\n", keyno);
print_item("bdb", &bdb_value);
- print_item(" wt", &value);
+ print_item(" wt", value);
testutil_die(0, NULL);
}
}
#endif
- return (0);
+ return (ret);
}
/*
@@ -690,21 +917,19 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int notfound_err)
* Read and verify the next/prev element in a row- or column-store file.
*/
static int
-nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
+nextprev(WT_CURSOR *cursor, int next)
{
+ WT_DECL_RET;
WT_ITEM key, value;
uint64_t keyno;
- int ret;
uint8_t bitfield;
const char *which;
+ keyno = 0;
which = next ? "next" : "prev";
- keyno = 0;
- ret = next ? cursor->next(cursor) : cursor->prev(cursor);
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
- if (ret == 0)
+ switch (ret = (next ? cursor->next(cursor) : cursor->prev(cursor))) {
+ case 0:
switch (g.type) {
case FIX:
if ((ret = cursor->get_key(cursor, &keyno)) == 0 &&
@@ -722,13 +947,20 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
ret = cursor->get_value(cursor, &value);
break;
}
- if (ret != 0 && ret != WT_NOTFOUND)
+ if (ret != 0)
+ testutil_die(ret, "nextprev: get_key/get_value");
+ break;
+ case WT_NOTFOUND:
+ break;
+ case WT_ROLLBACK:
+ return (WT_ROLLBACK);
+ default:
testutil_die(ret, "%s", which);
- *notfoundp = (ret == WT_NOTFOUND);
+ }
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
- return (0);
+ return (ret);
{
WT_ITEM bdb_key, bdb_value;
@@ -743,7 +975,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
&bdb_value.data, &bdb_value.size, &notfound);
if (notfound_chk(
next ? "nextprev(next)" : "nextprev(prev)", ret, notfound, keyno))
- return (0);
+ return (ret);
/* Compare the two. */
if (g.type == ROW) {
@@ -794,7 +1026,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
}
}
#endif
- return (0);
+ return (ret);
}
/*
@@ -802,43 +1034,38 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
* Update a row in a row-store file.
*/
static int
-row_update(TINFO *tinfo,
- WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
+row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int ret;
session = cursor->session;
- key_gen((uint8_t *)key->data, &key->size, keyno);
- val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno);
-
/* Log the operation */
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s{%.*s}\n%-10s{%.*s}",
- "putK", (int)key->size, (char *)key->data,
- "putV", (int)value->size, (char *)value->data);
+ "%-10s{%.*s}, {%.*s}",
+ "put",
+ (int)key->size, key->data, (int)value->size, value->data);
cursor->set_key(cursor, key);
cursor->set_value(cursor, value);
- ret = cursor->update(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret = cursor->update(cursor)) {
+ case 0:
+ break;
+ case WT_CACHE_FULL:
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
+ default:
testutil_die(ret,
"row_update: update row %" PRIu64 " by key", keyno);
+ }
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- {
- int notfound;
-
- bdb_update(key->data, key->size, value->data, value->size, &notfound);
- (void)notfound_chk("row_update", ret, notfound, keyno);
- }
+ bdb_update(key->data, key->size, value->data, value->size);
#endif
return (0);
}
@@ -848,16 +1075,13 @@ row_update(TINFO *tinfo,
* Update a row in a column-store file.
*/
static int
-col_update(TINFO *tinfo,
- WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
+col_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int ret;
session = cursor->session;
- val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno);
-
/* Log the operation */
if (g.logging == LOG_OPS) {
if (g.type == FIX)
@@ -877,23 +1101,22 @@ col_update(TINFO *tinfo,
cursor->set_value(cursor, *(uint8_t *)value->data);
else
cursor->set_value(cursor, value);
- ret = cursor->update(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret = cursor->update(cursor)) {
+ case 0:
+ break;
+ case WT_CACHE_FULL:
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
+ default:
testutil_die(ret, "col_update: %" PRIu64, keyno);
+ }
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- {
- int notfound;
-
- key_gen((uint8_t *)key->data, &key->size, keyno);
- bdb_update(key->data, key->size, value->data, value->size, &notfound);
- (void)notfound_chk("col_update", ret, notfound, keyno);
- }
+ key_gen(key, keyno);
+ bdb_update(key->data, key->size, value->data, value->size);
#else
(void)key; /* [-Wunused-variable] */
#endif
@@ -912,8 +1135,7 @@ table_append_init(void)
g.append_cnt = 0;
free(g.append);
- if ((g.append = calloc(g.append_max, sizeof(uint64_t))) == NULL)
- testutil_die(errno, "calloc");
+ g.append = dcalloc(g.append_max, sizeof(uint64_t));
}
/*
@@ -1005,43 +1227,38 @@ table_append(uint64_t keyno)
* Insert a row in a row-store file.
*/
static int
-row_insert(TINFO *tinfo,
- WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
+row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int ret;
session = cursor->session;
- key_gen_insert(&tinfo->rnd, (uint8_t *)key->data, &key->size, keyno);
- val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno);
-
/* Log the operation */
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s{%.*s}\n%-10s{%.*s}",
- "insertK", (int)key->size, (char *)key->data,
- "insertV", (int)value->size, (char *)value->data);
+ "%-10s{%.*s}, {%.*s}",
+ "insert",
+ (int)key->size, key->data, (int)value->size, value->data);
cursor->set_key(cursor, key);
cursor->set_value(cursor, value);
- ret = cursor->insert(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret = cursor->insert(cursor)) {
+ case 0:
+ break;
+ case WT_CACHE_FULL:
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
+ default:
testutil_die(ret,
"row_insert: insert row %" PRIu64 " by key", keyno);
+ }
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- {
- int notfound;
-
- bdb_update(key->data, key->size, value->data, value->size, &notfound);
- (void)notfound_chk("row_insert", ret, notfound, keyno);
- }
+ bdb_update(key->data, key->size, value->data, value->size);
#endif
return (0);
}
@@ -1051,24 +1268,25 @@ row_insert(TINFO *tinfo,
* Insert an element in a column-store file.
*/
static int
-col_insert(TINFO *tinfo,
- WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop)
+col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop)
{
+ WT_DECL_RET;
WT_SESSION *session;
uint64_t keyno;
- int ret;
session = cursor->session;
- val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, g.rows + 1);
-
if (g.type == FIX)
cursor->set_value(cursor, *(uint8_t *)value->data);
else
cursor->set_value(cursor, value);
- if ((ret = cursor->insert(cursor)) != 0) {
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
+ switch (ret = cursor->insert(cursor)) {
+ case 0:
+ break;
+ case WT_CACHE_FULL:
+ case WT_ROLLBACK:
+ return (WT_ROLLBACK);
+ default:
testutil_die(ret, "cursor.insert");
}
testutil_check(cursor->get_key(cursor, &keyno));
@@ -1093,12 +1311,8 @@ col_insert(TINFO *tinfo,
if (!SINGLETHREADED)
return (0);
- {
- int notfound;
-
- key_gen((uint8_t *)key->data, &key->size, keyno);
- bdb_update(key->data, key->size, value->data, value->size, &notfound);
- }
+ key_gen(key, keyno);
+ bdb_update(key->data, key->size, value->data, value->size);
#else
(void)key; /* [-Wunused-variable] */
#endif
@@ -1110,14 +1324,14 @@ col_insert(TINFO *tinfo,
* Remove an row from a row-store file.
*/
static int
-row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
+row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int ret;
session = cursor->session;
- key_gen((uint8_t *)key->data, &key->size, keyno);
+ key_gen(key, keyno);
/* Log the operation */
if (g.logging == LOG_OPS)
@@ -1128,16 +1342,20 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
ret = cursor->remove(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret) {
+ case 0:
+ case WT_NOTFOUND:
+ break;
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
+ default:
testutil_die(ret,
"row_remove: remove %" PRIu64 " by key", keyno);
- *notfoundp = (ret == WT_NOTFOUND);
+ }
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
- return (0);
+ return (ret);
{
int notfound;
@@ -1148,7 +1366,7 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
#else
(void)key; /* [-Wunused-variable] */
#endif
- return (0);
+ return (ret);
}
/*
@@ -1156,10 +1374,10 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
* Remove a row from a column-store file.
*/
static int
-col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
+col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int ret;
session = cursor->session;
@@ -1172,35 +1390,38 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
ret = cursor->remove(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret) {
+ case 0:
+ case WT_NOTFOUND:
+ break;
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
+ default:
testutil_die(ret,
"col_remove: remove %" PRIu64 " by key", keyno);
- *notfoundp = (ret == WT_NOTFOUND);
+ }
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
- return (0);
-
- {
- int notfound;
+ return (ret);
/*
* Deleting a fixed-length item is the same as setting the bits to 0;
* do the same thing for the BDB store.
*/
if (g.type == FIX) {
- key_gen((uint8_t *)key->data, &key->size, keyno);
- bdb_update(key->data, key->size, "\0", 1, &notfound);
- } else
+ key_gen(key, keyno);
+ bdb_update(key->data, key->size, "\0", 1);
+ } else {
+ int notfound;
+
bdb_remove(keyno, &notfound);
- (void)notfound_chk("col_remove", ret, notfound, keyno);
+ (void)notfound_chk("col_remove", ret, notfound, keyno);
}
#else
(void)key; /* [-Wunused-variable] */
#endif
- return (0);
+ return (ret);
}
#ifdef HAVE_BERKELEY_DB
@@ -1244,7 +1465,7 @@ print_item(const char *tag, WT_ITEM *item)
static const char hex[] = "0123456789abcdef";
const uint8_t *data;
size_t size;
- int ch;
+ u_char ch;
data = item->data;
size = item->size;
@@ -1255,8 +1476,8 @@ print_item(const char *tag, WT_ITEM *item)
else
for (; size > 0; --size, ++data) {
ch = data[0];
- if (isprint(ch))
- fprintf(stderr, "%c", ch);
+ if (__wt_isprint(ch))
+ fprintf(stderr, "%c", (int)ch);
else
fprintf(stderr, "%x%x",
hex[(data[0] & 0xf0) >> 4],
diff --git a/test/format/salvage.c b/test/format/salvage.c
index 526e1563390..8274c556364 100644
--- a/test/format/salvage.c
+++ b/test/format/salvage.c
@@ -36,8 +36,8 @@ static void
salvage(void)
{
WT_CONNECTION *conn;
+ WT_DECL_RET;
WT_SESSION *session;
- int ret;
conn = g.wts_conn;
track("salvage", 0ULL, NULL);
@@ -141,7 +141,7 @@ found: if (fstat(fd, &sb) == -1)
void
wts_salvage(void)
{
- int ret;
+ WT_DECL_RET;
/* Some data-sources don't support salvage. */
if (DATASOURCE("helium") || DATASOURCE("kvsbdb"))
@@ -158,7 +158,7 @@ wts_salvage(void)
testutil_die(ret, "salvage copy step failed");
/* Salvage, then verify. */
- wts_open(g.home, 1, &g.wts_conn);
+ wts_open(g.home, true, &g.wts_conn);
salvage();
wts_verify("post-salvage verify");
wts_close();
@@ -174,7 +174,7 @@ wts_salvage(void)
/* Corrupt the file randomly, salvage, then verify. */
if (corrupt()) {
- wts_open(g.home, 1, &g.wts_conn);
+ wts_open(g.home, true, &g.wts_conn);
salvage();
wts_verify("post-corrupt-salvage verify");
wts_close();
diff --git a/test/format/t.c b/test/format/t.c
index 28c22e23cb8..2eb2b078804 100644
--- a/test/format/t.c
+++ b/test/format/t.c
@@ -32,7 +32,8 @@ GLOBAL g;
static void format_die(void);
static void startup(void);
-static void usage(void);
+static void usage(void)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
extern int __wt_optind;
extern char *__wt_optarg;
@@ -181,6 +182,7 @@ main(int argc, char *argv[])
*/
testutil_check(pthread_rwlock_init(&g.append_lock, NULL));
testutil_check(pthread_rwlock_init(&g.backup_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.checkpoint_lock, NULL));
testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid());
@@ -198,8 +200,8 @@ main(int argc, char *argv[])
if (SINGLETHREADED)
bdb_open(); /* Initial file config */
#endif
- wts_open(g.home, 1, &g.wts_conn);
- wts_create();
+ wts_open(g.home, true, &g.wts_conn);
+ wts_init();
wts_load(); /* Load initial records */
wts_verify("post-bulk verify"); /* Verify */
@@ -275,6 +277,8 @@ main(int argc, char *argv[])
testutil_check(pthread_rwlock_destroy(&g.append_lock));
testutil_check(pthread_rwlock_destroy(&g.backup_lock));
+ testutil_check(pthread_rwlock_destroy(&g.checkpoint_lock));
+ testutil_check(pthread_rwlock_destroy(&g.death_lock));
config_clear();
@@ -288,7 +292,7 @@ main(int argc, char *argv[])
static void
startup(void)
{
- int ret;
+ WT_DECL_RET;
/* Flush/close any logging information. */
fclose_and_clear(&g.logfp);
diff --git a/test/format/util.c b/test/format/util.c
index 2e4c869366c..cebe2153b3e 100644
--- a/test/format/util.c
+++ b/test/format/util.c
@@ -32,56 +32,11 @@
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
-/*
- * dmalloc --
- * Call malloc, dying on failure.
- */
-void *
-dmalloc(size_t len)
-{
- void *p;
-
- if ((p = malloc(len)) == NULL)
- testutil_die(errno, "malloc");
- return (p);
-}
-
-/*
- * dstrdup --
- * Call strdup, dying on failure.
- */
-char *
-dstrdup(const char *str)
-{
- char *p;
-
- if ((p = strdup(str)) == NULL)
- testutil_die(errno, "strdup");
- return (p);
-}
-
-static inline uint32_t
-kv_len(WT_RAND_STATE *rnd, uint64_t keyno, uint32_t min, uint32_t max)
-{
- /*
- * Focus on relatively small key/value items, admitting the possibility
- * of larger items. Pick a size close to the minimum most of the time,
- * only create a larger item 1 in 20 times, and a really big item 1 in
- * 1000 times. (Configuration can force large key/value minimum sizes,
- * where every key/value item is an overflow.)
- */
- if (keyno % 1000 == 0 && max < KILOBYTE(80)) {
- min = KILOBYTE(80);
- max = KILOBYTE(100);
- } else if (keyno % 20 != 0 && max > min + 20)
- max = min + 20;
- return (mmrand(rnd, min, max));
-}
-
void
key_len_setup(void)
{
size_t i;
+ uint32_t max;
/*
* The key is a variable length item with a leading 10-digit value.
@@ -91,72 +46,113 @@ key_len_setup(void)
* the pre-loaded lengths.
*
* Fill in the random key lengths.
+ *
+ * Focus on relatively small items, admitting the possibility of larger
+ * items. Pick a size close to the minimum most of the time, only create
+ * a larger item 1 in 20 times.
*/
- for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i)
- g.key_rand_len[i] =
- kv_len(NULL, (uint64_t)i, g.c_key_min, g.c_key_max);
+ for (i = 0;
+ i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) {
+ max = g.c_key_max;
+ if (i % 20 != 0 && max > g.c_key_min + 20)
+ max = g.c_key_min + 20;
+ g.key_rand_len[i] = mmrand(NULL, g.c_key_min, max);
+ }
}
void
-key_gen_setup(uint8_t **keyp)
+key_gen_setup(WT_ITEM *key)
{
- uint8_t *key;
size_t i, len;
-
- *keyp = NULL;
+ char *p;
len = MAX(KILOBYTE(100), g.c_key_max);
- key = dmalloc(len);
+ p = dmalloc(len);
for (i = 0; i < len; ++i)
- key[i] = (uint8_t)("abcdefghijklmnopqrstuvwxyz"[i % 26]);
- *keyp = key;
+ p[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26];
+
+ key->mem = p;
+ key->memsize = len;
+ key->data = key->mem;
+ key->size = 0;
}
static void
-key_gen_common(uint8_t *key, size_t *sizep, uint64_t keyno, int suffix)
+key_gen_common(WT_ITEM *key, uint64_t keyno, int suffix)
{
int len;
+ char *p;
+
+ p = key->mem;
/*
* The key always starts with a 10-digit string (the specified cnt)
* followed by two digits, a random number between 1 and 15 if it's
* an insert, otherwise 00.
*/
- len = sprintf((char *)key, "%010" PRIu64 ".%02d", keyno, suffix);
+ len = sprintf(p, "%010" PRIu64 ".%02d", keyno, suffix);
/*
- * In a column-store, the key is only used for BDB, and so it doesn't
- * need a random length.
+ * In a column-store, the key is only used for Berkeley DB inserts,
+ * and so it doesn't need a random length.
*/
if (g.type == ROW) {
- key[len] = '/';
- len = (int)g.key_rand_len[keyno %
- (sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]))];
+ p[len] = '/';
+
+ /*
+ * Because we're doing table lookup for key sizes, we weren't
+ * able to set really big keys sizes in the table, the table
+ * isn't big enough to keep our hash from selecting too many
+ * big keys and blowing out the cache. Handle that here, use a
+ * really big key 1 in 2500 times.
+ */
+ len = keyno % 2500 == 0 && g.c_key_max < KILOBYTE(80) ?
+ KILOBYTE(80) :
+ (int)g.key_rand_len[keyno % WT_ELEMENTS(g.key_rand_len)];
}
- *sizep = (size_t)len;
+
+ key->data = key->mem;
+ key->size = (size_t)len;
}
void
-key_gen(uint8_t *key, size_t *sizep, uint64_t keyno)
+key_gen(WT_ITEM *key, uint64_t keyno)
{
- key_gen_common(key, sizep, keyno, 0);
+ key_gen_common(key, keyno, 0);
}
void
-key_gen_insert(WT_RAND_STATE *rnd, uint8_t *key, size_t *sizep, uint64_t keyno)
+key_gen_insert(WT_RAND_STATE *rnd, WT_ITEM *key, uint64_t keyno)
{
- key_gen_common(key, sizep, keyno, (int)mmrand(rnd, 1, 15));
+ key_gen_common(key, keyno, (int)mmrand(rnd, 1, 15));
}
static uint32_t val_dup_data_len; /* Length of duplicate data items */
+static inline uint32_t
+value_len(WT_RAND_STATE *rnd, uint64_t keyno, uint32_t min, uint32_t max)
+{
+ /*
+ * Focus on relatively small items, admitting the possibility of larger
+ * items. Pick a size close to the minimum most of the time, only create
+ * a larger item 1 in 20 times, and a really big item 1 in somewhere
+ * around 2500 items.
+ */
+ if (keyno % 2500 == 0 && max < KILOBYTE(80)) {
+ min = KILOBYTE(80);
+ max = KILOBYTE(100);
+ } else if (keyno % 20 != 0 && max > min + 20)
+ max = min + 20;
+ return (mmrand(rnd, min, max));
+}
+
void
-val_gen_setup(WT_RAND_STATE *rnd, uint8_t **valp)
+val_gen_setup(WT_RAND_STATE *rnd, WT_ITEM *value)
{
- uint8_t *val;
size_t i, len;
+ char *p;
- *valp = NULL;
+ memset(value, 0, sizeof(WT_ITEM));
/*
* Set initial buffer contents to recognizable text.
@@ -166,35 +162,43 @@ val_gen_setup(WT_RAND_STATE *rnd, uint8_t **valp)
* data for column-store run-length encoded files.
*/
len = MAX(KILOBYTE(100), g.c_value_max) + 20;
- val = dmalloc(len);
+ p = dmalloc(len);
for (i = 0; i < len; ++i)
- val[i] = (uint8_t)("ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26]);
+ p[i] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26];
- *valp = val;
+ value->mem = p;
+ value->memsize = len;
+ value->data = value->mem;
+ value->size = 0;
- val_dup_data_len = kv_len(rnd,
+ val_dup_data_len = value_len(rnd,
(uint64_t)mmrand(rnd, 1, 20), g.c_value_min, g.c_value_max);
}
void
-val_gen(WT_RAND_STATE *rnd, uint8_t *val, size_t *sizep, uint64_t keyno)
+val_gen(WT_RAND_STATE *rnd, WT_ITEM *value, uint64_t keyno)
{
+ char *p;
+
+ p = value->mem;
+ value->data = value->mem;
+
/*
* Fixed-length records: take the low N bits from the last digit of
* the record number.
*/
if (g.type == FIX) {
switch (g.c_bitcnt) {
- case 8: val[0] = (uint8_t)mmrand(rnd, 1, 0xff); break;
- case 7: val[0] = (uint8_t)mmrand(rnd, 1, 0x7f); break;
- case 6: val[0] = (uint8_t)mmrand(rnd, 1, 0x3f); break;
- case 5: val[0] = (uint8_t)mmrand(rnd, 1, 0x1f); break;
- case 4: val[0] = (uint8_t)mmrand(rnd, 1, 0x0f); break;
- case 3: val[0] = (uint8_t)mmrand(rnd, 1, 0x07); break;
- case 2: val[0] = (uint8_t)mmrand(rnd, 1, 0x03); break;
- case 1: val[0] = 1; break;
+ case 8: p[0] = (char)mmrand(rnd, 1, 0xff); break;
+ case 7: p[0] = (char)mmrand(rnd, 1, 0x7f); break;
+ case 6: p[0] = (char)mmrand(rnd, 1, 0x3f); break;
+ case 5: p[0] = (char)mmrand(rnd, 1, 0x1f); break;
+ case 4: p[0] = (char)mmrand(rnd, 1, 0x0f); break;
+ case 3: p[0] = (char)mmrand(rnd, 1, 0x07); break;
+ case 2: p[0] = (char)mmrand(rnd, 1, 0x03); break;
+ case 1: p[0] = 1; break;
}
- *sizep = 1;
+ value->size = 1;
return;
}
@@ -203,29 +207,24 @@ val_gen(WT_RAND_STATE *rnd, uint8_t *val, size_t *sizep, uint64_t keyno)
* test that by inserting a zero-length data item every so often.
*/
if (keyno % 63 == 0) {
- val[0] = '\0';
- *sizep = 0;
+ p[0] = '\0';
+ value->size = 0;
return;
}
/*
- * Start the data with a 10-digit number.
- *
- * For row and non-repeated variable-length column-stores, change the
- * leading number to ensure every data item is unique. For repeated
- * variable-length column-stores (that is, to test run-length encoding),
- * use the same data value all the time.
+ * Data items have unique leading numbers by default and random lengths;
+ * variable-length column-stores use a duplicate data value to test RLE.
*/
- if ((g.type == ROW || g.type == VAR) &&
- g.c_repeat_data_pct != 0 &&
- mmrand(rnd, 1, 100) < g.c_repeat_data_pct) {
- (void)strcpy((char *)val, "DUPLICATEV");
- val[10] = '/';
- *sizep = val_dup_data_len;
+ if (g.type == VAR && mmrand(rnd, 1, 100) < g.c_repeat_data_pct) {
+ (void)strcpy(p, "DUPLICATEV");
+ p[10] = '/';
+ value->size = val_dup_data_len;
} else {
- (void)sprintf((char *)val, "%010" PRIu64, keyno);
- val[10] = '/';
- *sizep = kv_len(rnd, keyno, g.c_value_min, g.c_value_max);
+ (void)sprintf(p, "%010" PRIu64, keyno);
+ p[10] = '/';
+ value->size =
+ value_len(rnd, keyno, g.c_value_min, g.c_value_max);
}
}
@@ -305,15 +304,6 @@ path_setup(const char *home)
g.home_stats = dmalloc(len);
snprintf(g.home_stats, len, "%s/%s", g.home, "stats");
- /* Backup directory. */
- len = strlen(g.home) + strlen("BACKUP") + 2;
- g.home_backup = dmalloc(len);
- snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP");
-
- len = strlen(g.home) + strlen("BACKUP2") + 2;
- g.home_backup2 = dmalloc(len);
- snprintf(g.home_backup2, len, "%s/%s", g.home, "BACKUP2");
-
/* BDB directory. */
len = strlen(g.home) + strlen("bdb") + 2;
g.home_bdb = dmalloc(len);
@@ -341,18 +331,27 @@ path_setup(const char *home)
g.home_init = dmalloc(len);
snprintf(g.home_init, len, CMD, g.home, g.home, g.home);
- /* Backup directory initialize command, remove and re-create it. */
+ /* Primary backup directory. */
+ len = strlen(g.home) + strlen("BACKUP") + 2;
+ g.home_backup = dmalloc(len);
+ snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP");
+
+ /*
+ * Backup directory initialize command, remove and re-create the primary
+ * backup directory, plus a copy we maintain for recovery testing.
+ */
#undef CMD
#ifdef _WIN32
-#define CMD "del /s /q >:nul && mkdir %s %s"
+#define CMD "del %s/%s %s/%s /s /q >:nul && mkdir %s/%s %s/%s"
#else
-#define CMD "rm -rf %s %s && mkdir %s %s"
+#define CMD "rm -rf %s/%s %s/%s && mkdir %s/%s %s/%s"
#endif
- len = strlen(g.home_backup) * 2 +
- strlen(g.home_backup2) * 2 + strlen(CMD) + 1;
+ len = strlen(g.home) * 4 +
+ strlen("BACKUP") * 2 + strlen("BACKUP_COPY") * 2 + strlen(CMD) + 1;
g.home_backup_init = dmalloc(len);
- snprintf(g.home_backup_init, len, CMD, g.home_backup, g.home_backup2,
- g.home_backup, g.home_backup2);
+ snprintf(g.home_backup_init, len, CMD,
+ g.home, "BACKUP", g.home, "BACKUP_COPY",
+ g.home, "BACKUP", g.home, "BACKUP_COPY");
/*
* Salvage command, save the interesting files so we can replay the
diff --git a/test/format/wts.c b/test/format/wts.c
index 81e484296e2..69195abc3d4 100644
--- a/test/format/wts.c
+++ b/test/format/wts.c
@@ -87,10 +87,10 @@ handle_message(WT_EVENT_HANDLER *handler,
/* Write and flush the message so we're up-to-date on error. */
if (g.logfp == NULL) {
- out = printf("%p:%s\n", session, message);
+ out = printf("%p:%s\n", (void *)session, message);
(void)fflush(stdout);
} else {
- out = fprintf(g.logfp, "%p:%s\n", session, message);
+ out = fprintf(g.logfp, "%p:%s\n", (void *)session, message);
(void)fflush(g.logfp);
}
return (out < 0 ? EIO : 0);
@@ -126,10 +126,10 @@ static WT_EVENT_HANDLER event_handler = {
* Open a connection to a WiredTiger database.
*/
void
-wts_open(const char *home, int set_api, WT_CONNECTION **connp)
+wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
{
WT_CONNECTION *conn;
- int ret;
+ WT_DECL_RET;
char *config, *end, *p, helium_config[1024];
*connp = NULL;
@@ -138,10 +138,11 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
end = config + sizeof(g.wiredtiger_open_config);
p += snprintf(p, REMAIN(p, end),
- "create,checkpoint_sync=false,cache_size=%" PRIu32 "MB",
- g.c_cache);
-
- p += snprintf(p, REMAIN(p, end), ",error_prefix=\"%s\"", g.progname);
+ "create=true,"
+ "cache_size=%" PRIu32 "MB,"
+ "checkpoint_sync=false,"
+ "error_prefix=\"%s\"",
+ g.c_cache, g.progname);
/* In-memory configuration. */
if (g.c_in_memory != 0)
@@ -273,8 +274,13 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
void
wts_reopen(void)
{
+ WT_CONNECTION *conn;
+
testutil_checkfmt(wiredtiger_open(g.home, &event_handler,
- g.wiredtiger_open_config, &g.wts_conn), "%s", g.home);
+ g.wiredtiger_open_config, &conn), "%s", g.home);
+
+ g.wt_api = conn->get_extension_api(conn);
+ g.wts_conn = conn;
}
/*
@@ -282,7 +288,7 @@ wts_reopen(void)
* Create the underlying store.
*/
void
-wts_create(void)
+wts_init(void)
{
WT_CONNECTION *conn;
WT_SESSION *session;
@@ -497,8 +503,8 @@ void
wts_verify(const char *tag)
{
WT_CONNECTION *conn;
+ WT_DECL_RET;
WT_SESSION *session;
- int ret;
if (g.c_verify == 0)
return;
@@ -531,12 +537,12 @@ wts_stats(void)
{
WT_CONNECTION *conn;
WT_CURSOR *cursor;
+ WT_DECL_RET;
WT_SESSION *session;
FILE *fp;
char *stat_name;
const char *pval, *desc;
uint64_t v;
- int ret;
/* Ignore statistics if they're not configured. */
if (g.c_statistics == 0)
diff --git a/test/huge/Makefile.am b/test/huge/Makefile.am
index bc76bdc0f3c..894bff5eace 100644
--- a/test/huge/Makefile.am
+++ b/test/huge/Makefile.am
@@ -1,13 +1,16 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
t_SOURCES = huge.c
-t_LDADD = $(top_builddir)/libwiredtiger.la
+
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
# Run this during a "make check" smoke test.
TESTS = smoke.sh
clean-local:
- rm -rf WiredTiger* *.core __*
+ rm -rf WT_TEST *.core
diff --git a/test/huge/huge.c b/test/huge/huge.c
index ad19035ff99..3aa61a9048e 100644
--- a/test/huge/huge.c
+++ b/test/huge/huge.c
@@ -26,15 +26,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <errno.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include "test_util.i"
+#include "test_util.h"
static char home[512]; /* Program working dir */
static const char *progname; /* Program name */
@@ -73,13 +65,13 @@ static size_t lengths[] = {
0
};
+static void usage(void)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
fprintf(stderr, "usage: %s [-s]\n", progname);
- fprintf(stderr, "%s",
- "\t-s small run, only test up to 1GB\n");
-
+ fprintf(stderr, "%s", "\t-s small run, only test up to 1GB\n");
exit(EXIT_FAILURE);
}
@@ -205,8 +197,7 @@ main(int argc, char *argv[])
/* Allocate a buffer to use. */
len = small ? ((size_t)SMALL_MAX) : ((size_t)4 * GIGABYTE);
- if ((big = malloc(len)) == NULL)
- testutil_die(errno, "");
+ big = dmalloc(len);
memset(big, 'a', len);
/* Make sure the configurations all work. */
diff --git a/test/manydbs/Makefile.am b/test/manydbs/Makefile.am
index 53559b25243..2bc47ad7f2e 100644
--- a/test/manydbs/Makefile.am
+++ b/test/manydbs/Makefile.am
@@ -1,13 +1,16 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
t_SOURCES = manydbs.c
-t_LDADD = $(top_builddir)/libwiredtiger.la
+
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
# Run this during a "make check" smoke test.
TESTS = smoke.sh
clean-local:
- rm -rf WiredTiger* *.core __*
+ rm -rf WT_TEST *.core
diff --git a/test/manydbs/manydbs.c b/test/manydbs/manydbs.c
index 1d3412a7b06..e485e73067f 100644
--- a/test/manydbs/manydbs.c
+++ b/test/manydbs/manydbs.c
@@ -26,22 +26,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <sys/wait.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include <wiredtiger.h>
-
-#include "test_util.i"
+#include "test_util.h"
#define HOME_SIZE 512
-#define HOME_BASE "WT_HOME"
+#define HOME_BASE "WT_TEST"
static char home[HOME_SIZE]; /* Base home directory */
static char hometmp[HOME_SIZE]; /* Each conn home directory */
static const char *progname; /* Program name */
@@ -67,6 +55,8 @@ static const char * const uri = "table:main";
#define MAX_KV 100
#define MAX_VAL 128
+static void usage(void)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
@@ -80,10 +70,10 @@ extern char *__wt_optarg;
void (*custom_die)(void) = NULL;
-WT_CONNECTION **connections = NULL;
-WT_CURSOR **cursors = NULL;
-WT_RAND_STATE rnd;
-WT_SESSION **sessions = NULL;
+static WT_CONNECTION **connections = NULL;
+static WT_CURSOR **cursors = NULL;
+static WT_RAND_STATE rnd;
+static WT_SESSION **sessions = NULL;
static int
get_stat(WT_SESSION *stat_session, int stat_field, uint64_t *valuep)
@@ -172,17 +162,10 @@ main(int argc, char *argv[])
* Allocate arrays for connection handles, sessions, statistics
* cursors and, if needed, data cursors.
*/
- if ((connections = calloc(
- (size_t)dbs, sizeof(WT_CONNECTION *))) == NULL)
- testutil_die(ENOMEM, "connection array malloc");
- if ((sessions = calloc(
- (size_t)dbs, sizeof(WT_SESSION *))) == NULL)
- testutil_die(ENOMEM, "session array malloc");
- if ((cond_reset_orig = calloc((size_t)dbs, sizeof(uint64_t))) == NULL)
- testutil_die(ENOMEM, "orig stat malloc");
- if (!idle && ((cursors = calloc(
- (size_t)dbs, sizeof(WT_CURSOR *))) == NULL))
- testutil_die(ENOMEM, "cursor array malloc");
+ connections = dcalloc((size_t)dbs, sizeof(WT_CONNECTION *));
+ sessions = dcalloc((size_t)dbs, sizeof(WT_SESSION *));
+ cond_reset_orig = dcalloc((size_t)dbs, sizeof(uint64_t));
+ cursors = idle ? NULL : dcalloc((size_t)dbs, sizeof(WT_CURSOR *));
memset(cmd, 0, sizeof(cmd));
/*
* Set up all the directory names.
@@ -257,8 +240,7 @@ main(int argc, char *argv[])
free(connections);
free(sessions);
free(cond_reset_orig);
- if (!idle)
- free(cursors);
+ free(cursors);
return (EXIT_SUCCESS);
}
diff --git a/test/mciproject.yml b/test/mciproject.yml
index 9abdf23ec3b..3df1ce5805e 100644
--- a/test/mciproject.yml
+++ b/test/mciproject.yml
@@ -8,6 +8,14 @@ functions:
command: git.get_project
params:
directory: wiredtiger
+ "fetch artifacts" : &fetch_artifacts
+ - command: s3.get
+ params:
+ aws_key: ${aws_key}
+ aws_secret: ${aws_secret}
+ remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz
+ bucket: build_external
+ extract_to: wiredtiger
pre:
- command: shell.exec
@@ -21,7 +29,9 @@ post:
rm -rf "wiredtiger"
tasks:
- - name: compile-posix
+## Base compile task on posix flavours
+ - name: compile
+ depends_on: []
commands:
- func: "fetch source"
- command: git.apply_patch
@@ -33,36 +43,49 @@ tasks:
script: |
set -o errexit
set -o verbose
-
- ./build_posix/reconf
- ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib
- ${make_command|make} ${smp_command|}
- ${make_command|make} check
-
- ${test_env_vars|} python ./test/suite/run.py -v 2
- - name: compile-windows
- commands:
- - func: "fetch source"
- - command: git.apply_patch
+ if [ "Windows_NT" = "$OS" ]; then
+ scons.bat --enable-python=c:\\swigwin-3.0.2\\swig.exe --enable-diagnostic --enable-verbose ${smp_command|}
+ else
+ ./build_posix/reconf
+ ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose
+ ${make_command|make} ${smp_command|} 2>&1
+ ${make_command|make} check 2>&1
+ fi
+ - command: archive.targz_pack
params:
- directory: wiredtiger
+ target: "wiredtiger.tgz"
+ source_dir: "wiredtiger"
+ include:
+ - "./**"
+ - command: s3.put
+ params:
+ aws_secret: ${aws_secret}
+ aws_key: ${aws_key}
+ local_file: wiredtiger.tgz
+ bucket: build_external
+ permissions: public-read
+ content_type: application/tar
+ display_name: Artifacts
+ remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz
+
+ - name: unit-test
+ depends_on:
+ - name: compile
+ commands:
+ - func: "fetch artifacts"
- command: shell.exec
params:
working_dir: "wiredtiger"
script: |
set -o errexit
set -o verbose
-
- scons.bat --enable-python=c:\\swigwin-3.0.2\\swig.exe ${smp_command|}
-
- ${test_env_vars|} python ./test/suite/run.py -v 2
+ ${test_env_vars|} python ./test/suite/run.py -v 2 ${smp_command|} 2>&1
- name: compile-windows-alt
+ depends_on:
+ - name: compile
commands:
- - func: "fetch source"
- - command: git.apply_patch
- params:
- directory: wiredtiger
+ - func: "fetch artifacts"
- command: shell.exec
params:
working_dir: "wiredtiger"
@@ -72,22 +95,22 @@ tasks:
scons.bat ${smp_command|} "CFLAGS=/Gv /wd4090 /wd4996 /we4047 /we4024 /TC /we4100" wiredtiger.dll libwiredtiger.lib
- - name: fops-windows
+ - name: fops
+ depends_on:
+ - name: compile
commands:
- - func: "fetch source"
- - command: git.apply_patch
- params:
- directory: wiredtiger
+ - func: "fetch artifacts"
- command: shell.exec
params:
working_dir: "wiredtiger"
script: |
set -o errexit
set -o verbose
-
- scons.bat --enable-python=c:\\swigwin-3.0.2\\swig.exe ${smp_command|}
-
- cmd.exe /c t_fops.exe
+ if [ "Windows_NT" = "$OS" ]; then
+ cmd.exe /c t_fops.exe
+ else
+ ./test/fops/t
+ fi
buildvariants:
- name: ubuntu1404
@@ -95,11 +118,14 @@ buildvariants:
run_on:
- ubuntu1404-test
expansions:
- test_env_vars: LD_LIBRARY_PATH=.libs
- smp_command: -j$(grep -c ^processor /proc/cpuinfo)
+ # It's ugly, but we need the absolute path here, not the relative
+ test_env_vars: LD_LIBRARY_PATH=`pwd`/.libs
+ smp_command: -j $(grep -c ^processor /proc/cpuinfo)
configure_env_vars: CC=/opt/mongodbtoolchain/bin/gcc CXX=/opt/mongodbtoolchain/bin/g++
tasks:
- - name: compile-posix
+ - name: compile
+ - name: unit-test
+ - name: fops
- name: solaris
display_name: Solaris
@@ -107,31 +133,34 @@ buildvariants:
- solaris
expansions:
make_command: PATH=/opt/mongodbtoolchain/bin:$PATH gmake
- test_env_vars: LD_LIBRARY_PATH=.libs
- smp_command: -j$(kstat cpu | sort -u | grep -c "^module")
+ test_env_vars: LD_LIBRARY_PATH=`pwd`/.libs
+ smp_command: -j $(kstat cpu | sort -u | grep -c "^module")
configure_env_vars: PATH=/opt/mongodbtoolchain/bin:$PATH CFLAGS="-m64"
tasks:
- - name: compile-posix
+ - name: compile
+ - name: unit-test
+ - name: fops
- name: windows-64
display_name: Windows 64-bit
run_on:
- windows-64-vs2013-test
- expansions:
- smp_command: -j$(grep -c ^processor /proc/cpuinfo)
tasks:
- - name: compile-windows
+ - name: compile
- name: compile-windows-alt
- - name: fops-windows
+ - name: unit-test
+ - name: fops
- name: osx-1010
display_name: OS X 10.10
run_on:
- osx-1010
expansions:
- smp_command: -j$(sysctl -n hw.logicalcpu)
+ smp_command: -j $(sysctl -n hw.logicalcpu)
configure_env_vars: PATH=/opt/local/bin:$PATH
make_command: PATH=/opt/local/bin:$PATH ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future make
- test_env_vars: DYLD_LIBRARY_PATH=.libs
+ test_env_vars: DYLD_LIBRARY_PATH=`pwd`/.libs
tasks:
- - name: compile-posix
+ - name: compile
+ - name: unit-test
+ - name: fops
diff --git a/test/packing/Makefile.am b/test/packing/Makefile.am
index a9e7e16e5c2..c9128100cc3 100644
--- a/test/packing/Makefile.am
+++ b/test/packing/Makefile.am
@@ -1,7 +1,11 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = intpack-test intpack-test2 intpack-test3 packing-test
-LDADD = $(top_builddir)/libwiredtiger.la
+
+LDADD = $(top_builddir)/test/utility/libtest_util.la
+LDADD +=$(top_builddir)/libwiredtiger.la
LDFLAGS = -static
TESTS = smoke.sh
diff --git a/test/packing/intpack-test.c b/test/packing/intpack-test.c
index 08cc3807725..76851b38e35 100644
--- a/test/packing/intpack-test.c
+++ b/test/packing/intpack-test.c
@@ -26,9 +26,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "wt_internal.h" /* For __wt_XXX */
+#include "test_util.h"
-#include <assert.h>
+void (*custom_die)(void) = NULL;
int
main(void)
@@ -47,9 +47,10 @@ main(void)
#if 1
p = buf;
- assert(__wt_vpack_uint(&p, sizeof(buf), r) == 0);
+ testutil_check(__wt_vpack_uint(&p, sizeof(buf), r));
cp = buf;
- assert(__wt_vunpack_uint(&cp, sizeof(buf), &r2) == 0);
+ testutil_check(
+ __wt_vunpack_uint(&cp, sizeof(buf), &r2));
#else
/*
* Note: use memmove for comparison because GCC does
diff --git a/test/packing/intpack-test2.c b/test/packing/intpack-test2.c
index 7555d2724e7..a7d31329069 100644
--- a/test/packing/intpack-test2.c
+++ b/test/packing/intpack-test2.c
@@ -26,9 +26,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "wt_internal.h" /* For __wt_XXX */
+#include "test_util.h"
-#include <assert.h>
+void (*custom_die)(void) = NULL;
int
main(void)
@@ -38,14 +38,15 @@ main(void)
for (i = 1; i < 1LL << 60; i <<= 1) {
end = buf;
- assert(__wt_vpack_uint(&end, sizeof(buf), (uint64_t)i) == 0);
+ testutil_check(
+ __wt_vpack_uint(&end, sizeof(buf), (uint64_t)i));
printf("%" PRId64 " ", i);
for (p = buf; p < end; p++)
printf("%02x", *p);
printf("\n");
end = buf;
- assert(__wt_vpack_int(&end, sizeof(buf), -i) == 0);
+ testutil_check(__wt_vpack_int(&end, sizeof(buf), -i));
printf("%" PRId64 " ", -i);
for (p = buf; p < end; p++)
printf("%02x", *p);
diff --git a/test/packing/intpack-test3.c b/test/packing/intpack-test3.c
index 2ebc01f9e2e..aac0178578f 100644
--- a/test/packing/intpack-test3.c
+++ b/test/packing/intpack-test3.c
@@ -26,9 +26,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "wt_internal.h" /* For __wt_XXX */
+#include "test_util.h"
-#include <assert.h>
+void (*custom_die)(void) = NULL;
void test_value(int64_t);
void test_spread(int64_t, int64_t, int64_t);
@@ -42,12 +42,14 @@ test_value(int64_t val)
uint64_t uinput, uoutput;
size_t used_len;
+ soutput = 0; /* -Werror=maybe-uninitialized */
sinput = val;
+ soutput = 0; /* Make GCC happy. */
p = buf;
- assert(__wt_vpack_int(&p, sizeof(buf), sinput) == 0);
+ testutil_check(__wt_vpack_int(&p, sizeof(buf), sinput));
used_len = (size_t)(p - buf);
cp = buf;
- assert(__wt_vunpack_int(&cp, used_len, &soutput) == 0);
+ testutil_check(__wt_vunpack_int(&cp, used_len, &soutput));
/* Ensure we got the correct value back */
if (sinput != soutput) {
fprintf(stderr, "mismatch %" PRIu64 ", %" PRIu64 "\n",
@@ -69,10 +71,9 @@ test_value(int64_t val)
uinput = (uint64_t)val;
p = buf;
- assert(__wt_vpack_uint(&p, sizeof(buf), uinput) == 0);
+ testutil_check(__wt_vpack_uint(&p, sizeof(buf), uinput));
cp = buf;
- assert(__wt_vunpack_uint(
- &cp, sizeof(buf), &uoutput) == 0);
+ testutil_check(__wt_vunpack_uint(&cp, sizeof(buf), &uoutput));
/* Ensure we got the correct value back */
if (sinput != soutput) {
fprintf(stderr, "mismatch %" PRIu64 ", %" PRIu64 "\n",
diff --git a/test/packing/packing-test.c b/test/packing/packing-test.c
index 9b7105d7d4a..f251c17eb67 100644
--- a/test/packing/packing-test.c
+++ b/test/packing/packing-test.c
@@ -26,9 +26,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "wt_internal.h" /* For __wt_XXX */
+#include "test_util.h"
-#include <assert.h>
+void (*custom_die)(void) = NULL;
static void
check(const char *fmt, ...)
@@ -40,13 +40,15 @@ check(const char *fmt, ...)
len = 0; /* -Werror=maybe-uninitialized */
va_start(ap, fmt);
- assert(__wt_struct_sizev(NULL, &len, fmt, ap) == 0);
+ testutil_check(__wt_struct_sizev(NULL, &len, fmt, ap));
va_end(ap);
- assert(len > 0 && len < sizeof(buf));
+ if (len < 1 || len >= sizeof(buf))
+ testutil_die(EINVAL,
+ "Unexpected length from __wt_struct_sizev");
va_start(ap, fmt);
- assert(__wt_struct_packv(NULL, buf, sizeof(buf), fmt, ap) == 0);
+ testutil_check(__wt_struct_packv(NULL, buf, sizeof(buf), fmt, ap));
va_end(ap);
printf("%s ", fmt);
diff --git a/test/readonly/Makefile.am b/test/readonly/Makefile.am
index 3abcd2386a1..84092e76f02 100644
--- a/test/readonly/Makefile.am
+++ b/test/readonly/Makefile.am
@@ -1,13 +1,16 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
t_SOURCES = readonly.c
-t_LDADD = $(top_builddir)/libwiredtiger.la
+
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
# Run this during a "make check" smoke test.
TESTS = smoke.sh
clean-local:
- rm -rf WT_RD* WiredTiger* *.core __*
+ rm -rf WT_RD* *.core
diff --git a/test/readonly/readonly.c b/test/readonly/readonly.c
index 41400da2605..31edc0d2a24 100644
--- a/test/readonly/readonly.c
+++ b/test/readonly/readonly.c
@@ -26,19 +26,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <sys/wait.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include <wiredtiger.h>
+#include "test_util.h"
-#include "test_util.i"
+#include <sys/wait.h>
#define HOME_SIZE 512
static char home[HOME_SIZE]; /* Program working dir lock file */
@@ -67,6 +57,8 @@ static const char * const uri = "table:main";
#define OP_READ 0
#define OP_WRITE 1
+static void usage(void)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
@@ -129,6 +121,9 @@ run_child(const char *homedir, int op, int expect)
* Child process opens both databases readonly.
*/
static void
+open_dbs(int, const char *, const char *,
+ const char *, const char *) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void
open_dbs(int op, const char *dir,
const char *dir_wr, const char *dir_rd, const char *dir_rd2)
{
diff --git a/test/recovery/Makefile.am b/test/recovery/Makefile.am
index 35f8dd15823..19fc48dce47 100644
--- a/test/recovery/Makefile.am
+++ b/test/recovery/Makefile.am
@@ -1,13 +1,16 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = random-abort truncated-log
random_abort_SOURCES = random-abort.c
-random_abort_LDADD = $(top_builddir)/libwiredtiger.la
+random_abort_LDADD = $(top_builddir)/test/utility/libtest_util.la
+random_abort_LDADD +=$(top_builddir)/libwiredtiger.la
random_abort_LDFLAGS = -static
truncated_log_SOURCES = truncated-log.c
-truncated_log_LDADD = $(top_builddir)/libwiredtiger.la
+truncated_log_LDADD = $(top_builddir)/test/utility/libtest_util.la
+truncated_log_LDADD +=$(top_builddir)/libwiredtiger.la
truncated_log_LDFLAGS = -static
# Run this during a "make check" smoke test.
@@ -15,4 +18,4 @@ TESTS = $(noinst_PROGRAMS)
LOG_COMPILER = $(TEST_WRAPPER)
clean-local:
- rm -rf WT_TEST* *.core __*
+ rm -rf WT_TEST.* *.core
diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c
index 92f65c540cf..85629eddec4 100644
--- a/test/recovery/random-abort.c
+++ b/test/recovery/random-abort.c
@@ -26,19 +26,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "test_util.h"
+
#include <sys/wait.h>
-#include <errno.h>
#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include <wt_internal.h>
-
-#include "test_util.i"
static char home[512]; /* Program working dir */
static const char *progname; /* Program name */
@@ -48,7 +39,7 @@ static const char * const uri = "table:main";
#define MIN_TH 5
#define MAX_TIME 40
#define MIN_TIME 10
-#define RECORDS_FILE "records-%u"
+#define RECORDS_FILE "records-%" PRIu32
#define ENV_CONFIG \
"create,log=(file_max=10M,archive=false,enabled)," \
@@ -56,6 +47,8 @@ static const char * const uri = "table:main";
#define ENV_CONFIG_REC "log=(recover=on)"
#define MAX_VAL 4096
+static void usage(void)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
@@ -69,10 +62,6 @@ typedef struct {
uint32_t id;
} WT_THREAD_DATA;
-/*
- * Child process creates the database and table, and then writes data into
- * the table until it is killed by the parent.
- */
static void *
thread_run(void *arg)
{
@@ -104,7 +93,7 @@ thread_run(void *arg)
/*
* Set to no buffering.
*/
- __wt_stream_set_no_buffer(fp);
+ __wt_stream_set_line_buffer(fp);
if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
testutil_die(ret, "WT_CONNECTION:open_session");
if ((ret =
@@ -128,7 +117,7 @@ thread_run(void *arg)
if (fprintf(fp, "%" PRIu64 "\n", i) == -1)
testutil_die(errno, "fprintf");
}
- return (NULL);
+ /* NOTREACHED */
}
/*
@@ -147,8 +136,8 @@ fill_db(uint32_t nth)
uint32_t i;
int ret;
- thr = calloc(nth, sizeof(pthread_t));
- td = calloc(nth, sizeof(WT_THREAD_DATA));
+ thr = dcalloc(nth, sizeof(pthread_t));
+ td = dcalloc(nth, sizeof(WT_THREAD_DATA));
if (chdir(home) != 0)
testutil_die(errno, "Child chdir: %s", home);
if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
@@ -177,7 +166,7 @@ fill_db(uint32_t nth)
* it is killed.
*/
for (i = 0; i < nth; ++i)
- pthread_join(thr[i], NULL);
+ testutil_assert(pthread_join(thr[i], NULL) == 0);
/*
* NOTREACHED
*/
@@ -203,7 +192,7 @@ main(int argc, char *argv[])
uint32_t absent, count, i, nth, timeout;
int ch, status, ret;
pid_t pid;
- bool rand_th, rand_time;
+ bool rand_th, rand_time, verify_only;
const char *working_dir;
char fname[64], kname[64];
@@ -215,9 +204,10 @@ main(int argc, char *argv[])
nth = MIN_TH;
rand_th = rand_time = true;
timeout = MIN_TIME;
+ verify_only = false;
working_dir = "WT_TEST.random-abort";
- while ((ch = __wt_getopt(progname, argc, argv, "h:T:t:")) != EOF)
+ while ((ch = __wt_getopt(progname, argc, argv, "h:T:t:v")) != EOF)
switch (ch) {
case 'h':
working_dir = __wt_optarg;
@@ -230,6 +220,9 @@ main(int argc, char *argv[])
rand_time = false;
timeout = (uint32_t)atoi(__wt_optarg);
break;
+ case 'v':
+ verify_only = true;
+ break;
default:
usage();
}
@@ -239,48 +232,62 @@ main(int argc, char *argv[])
usage();
testutil_work_dir_from_path(home, 512, working_dir);
- testutil_make_work_dir(home);
-
- __wt_random_init_seed(NULL, &rnd);
- if (rand_time) {
- timeout = __wt_random(&rnd) % MAX_TIME;
- if (timeout < MIN_TIME)
- timeout = MIN_TIME;
- }
- if (rand_th) {
- nth = __wt_random(&rnd) % MAX_TH;
- if (nth < MIN_TH)
- nth = MIN_TH;
- }
- printf("Parent: Create %u threads; sleep %" PRIu32 " seconds\n",
- nth, timeout);
/*
- * Fork a child to insert as many items. We will then randomly
- * kill the child, run recovery and make sure all items we wrote
- * exist after recovery runs.
+ * If the user wants to verify they need to tell us how many threads
+ * there were so we can find the old record files.
*/
- if ((pid = fork()) < 0)
- testutil_die(errno, "fork");
-
- if (pid == 0) { /* child */
- fill_db(nth);
- return (EXIT_SUCCESS);
+ if (verify_only && rand_th) {
+ fprintf(stderr,
+ "Verify option requires specifying number of threads\n");
+ exit (EXIT_FAILURE);
}
+ if (!verify_only) {
+ testutil_make_work_dir(home);
+
+ testutil_assert(__wt_random_init_seed(NULL, &rnd) == 0);
+ if (rand_time) {
+ timeout = __wt_random(&rnd) % MAX_TIME;
+ if (timeout < MIN_TIME)
+ timeout = MIN_TIME;
+ }
+ if (rand_th) {
+ nth = __wt_random(&rnd) % MAX_TH;
+ if (nth < MIN_TH)
+ nth = MIN_TH;
+ }
+ printf("Parent: Create %" PRIu32
+ " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
+ /*
+ * Fork a child to insert as many items. We will then randomly
+ * kill the child, run recovery and make sure all items we wrote
+ * exist after recovery runs.
+ */
+ if ((pid = fork()) < 0)
+ testutil_die(errno, "fork");
- /* parent */
- /* Sleep for the configured amount of time before killing the child. */
- sleep(timeout);
+ if (pid == 0) { /* child */
+ fill_db(nth);
+ return (EXIT_SUCCESS);
+ }
- /*
- * !!! It should be plenty long enough to make sure more than one
- * log file exists. If wanted, that check would be added here.
- */
- printf("Kill child\n");
- if (kill(pid, SIGKILL) != 0)
- testutil_die(errno, "kill");
- if (waitpid(pid, &status, 0) == -1)
- testutil_die(errno, "waitpid");
+ /* parent */
+ /*
+ * Sleep for the configured amount of time before killing
+ * the child.
+ */
+ sleep(timeout);
+ /*
+ * !!! It should be plenty long enough to make sure more than
+ * one log file exists. If wanted, that check would be added
+ * here.
+ */
+ printf("Kill child\n");
+ if (kill(pid, SIGKILL) != 0)
+ testutil_die(errno, "kill");
+ if (waitpid(pid, &status, 0) == -1)
+ testutil_die(errno, "waitpid");
+ }
/*
* !!! If we wanted to take a copy of the directory before recovery,
* this is the place to do it.
@@ -300,7 +307,8 @@ main(int argc, char *argv[])
for (i = 0; i < nth; ++i) {
snprintf(fname, sizeof(fname), RECORDS_FILE, i);
if ((fp = fopen(fname, "r")) == NULL) {
- fprintf(stderr, "Failed to open %s. i %u\n", fname, i);
+ fprintf(stderr,
+ "Failed to open %s. i %" PRIu32 "\n", fname, i);
testutil_die(errno, "fopen");
}
@@ -309,7 +317,7 @@ main(int argc, char *argv[])
* in the table after recovery. Since we did write-no-sync, we
* expect every key to have been recovered.
*/
- for (count = 0;; ++count) {
+ for (;; ++count) {
ret = fscanf(fp, "%" SCNu64 "\n", &key);
if (ret != EOF && ret != 1)
testutil_die(errno, "fscanf");
@@ -320,7 +328,8 @@ main(int argc, char *argv[])
if ((ret = cursor->search(cursor)) != 0) {
if (ret != WT_NOTFOUND)
testutil_die(ret, "search");
- printf("no record with key %" PRIu64 "\n", key);
+ printf("%s: no record with key %" PRIu64 "\n",
+ fname, key);
++absent;
}
}
diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c
index e099873e5b9..a7509c27566 100644
--- a/test/recovery/truncated-log.c
+++ b/test/recovery/truncated-log.c
@@ -26,23 +26,15 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "test_util.h"
+
#include <sys/wait.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#else
+
+#ifdef _WIN32
/* snprintf is not supported on <= VS2013 */
#define snprintf _snprintf
#endif
-#include <wiredtiger.h>
-
-#include "test_util.i"
-
static char home[512]; /* Program working dir */
static const char *progname; /* Program name */
static const char * const uri = "table:main";
@@ -58,6 +50,8 @@ static const char * const uri = "table:main";
#define K_SIZE 16
#define V_SIZE 256
+static void usage(void)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
@@ -69,6 +63,7 @@ usage(void)
* Child process creates the database and table, and then writes data into
* the table until it is killed by the parent.
*/
+static void fill_db(void)WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
fill_db(void)
{
diff --git a/test/salvage/Makefile.am b/test/salvage/Makefile.am
index 3e686dd2951..a3c49b9c41a 100644
--- a/test/salvage/Makefile.am
+++ b/test/salvage/Makefile.am
@@ -1,9 +1,12 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
t_SOURCES = salvage.c
-t_LDADD = $(top_builddir)/libwiredtiger.la
+
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
# Run this during a "make check" smoke test.
@@ -11,4 +14,4 @@ TESTS = $(noinst_PROGRAMS)
LOG_COMPILER = $(TEST_WRAPPER)
clean-local:
- rm -rf WiredTiger* *.core __*
+ rm -rf WiredTiger* __slvg* *.core
diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c
index a1517d70787..c3349188623 100644
--- a/test/salvage/salvage.c
+++ b/test/salvage/salvage.c
@@ -26,7 +26,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "test_util.i"
+#include "test_util.h"
#include <assert.h>
@@ -159,7 +159,7 @@ int
usage(void)
{
(void)fprintf(stderr,
- "usage: %s [-v] [-r run] [-t fix|rle|var|row]\n", progname);
+ "usage: %s [-v] [-r run] [-t fix|var|row]\n", progname);
return (EXIT_FAILURE);
}
@@ -170,7 +170,7 @@ run(int r)
printf("\t%s: run %d\n", __wt_page_type_string(page_type), r);
- CHECK(system("rm -f WiredTiger* __slvg.* __schema.*") == 0);
+ CHECK(system("rm -f WiredTiger* __slvg.*") == 0);
CHECK((res_fp = fopen(RSLT, "w")) != NULL);
/*
@@ -701,7 +701,7 @@ print_res(int key, int value, int cnt)
switch (page_type) { /* Print value */
case WT_PAGE_COL_FIX:
ch = value & 0x7f;
- if (isprint(ch)) {
+ if (__wt_isprint((u_char)ch)) {
if (ch == '\\')
fputc('\\', res_fp);
fputc(ch, res_fp);
diff --git a/test/suite/run.py b/test/suite/run.py
index f7f0d1399ff..6e7421b8b96 100644
--- a/test/suite/run.py
+++ b/test/suite/run.py
@@ -51,7 +51,7 @@ elif os.path.isfile(os.path.join(wt_disttop, 'wt.exe')):
wt_builddir = wt_disttop
else:
print 'Unable to find useable WiredTiger build'
- sys.exit(False)
+ sys.exit(1)
# Cannot import wiredtiger and supporting utils until we set up paths
# We want our local tree in front of any installed versions of WiredTiger.
@@ -241,7 +241,7 @@ if __name__ == '__main__':
if option == '-dir' or option == 'D':
if dirarg != None or len(args) == 0:
usage()
- sys.exit(False)
+ sys.exit(2)
dirarg = args.pop(0)
continue
if option == '-debug' or option == 'd':
@@ -252,14 +252,14 @@ if __name__ == '__main__':
continue
if option == '-help' or option == 'h':
usage()
- sys.exit(True)
+ sys.exit(0)
if option == '-long' or option == 'l':
longtest = True
continue
if option == '-parallel' or option == 'j':
if parallel != 0 or len(args) == 0:
usage()
- sys.exit(False)
+ sys.exit(2)
parallel = int(args.pop(0))
continue
if option == '-preserve' or option == 'p':
@@ -271,7 +271,7 @@ if __name__ == '__main__':
if option == '-verbose' or option == 'v':
if len(args) == 0:
usage()
- sys.exit(False)
+ sys.exit(2)
verbose = int(args.pop(0))
if verbose > 3:
verbose = 3
@@ -281,19 +281,19 @@ if __name__ == '__main__':
if option == '-config' or option == 'c':
if configfile != None or len(args) == 0:
usage()
- sys.exit(False)
+ sys.exit(2)
configfile = args.pop(0)
continue
if option == '-configcreate' or option == 'C':
if configfile != None or len(args) == 0:
usage()
- sys.exit(False)
+ sys.exit(2)
configfile = args.pop(0)
configwrite = True
continue
print 'unknown arg: ' + arg
usage()
- sys.exit(False)
+ sys.exit(2)
testargs.append(arg)
# All global variables should be set before any test classes are loaded.
@@ -318,4 +318,4 @@ if __name__ == '__main__':
pdb.set_trace()
result = wttest.runsuite(tests, parallel)
- sys.exit(not result.wasSuccessful())
+ sys.exit(0 if result.wasSuccessful() else 1)
diff --git a/test/suite/suite_subprocess.py b/test/suite/suite_subprocess.py
index df89d82e4c9..c56c8d8e933 100644
--- a/test/suite/suite_subprocess.py
+++ b/test/suite/suite_subprocess.py
@@ -117,13 +117,12 @@ class suite_subprocess:
print 'ERROR: ' + filename + ' should not be empty (this command expected error output)'
self.assertNotEqual(filesize, 0, filename + ': expected to not be empty')
- def runWt(self, args, infilename=None, outfilename=None, errfilename=None, reopensession=True):
- """
- Run the 'wt' process
- """
+ # Run the wt utility.
+ def runWt(self, args, infilename=None,
+ outfilename=None, errfilename=None, reopensession=True, failure=False):
- # we close the connection to guarantee everything is
- # flushed, and that we can open it from another process
+ # Close the connection to guarantee everything is flushed, and that
+ # we can open it from another process.
self.close_conn()
wtoutname = outfilename or "wt.out"
@@ -141,14 +140,26 @@ class suite_subprocess:
infilepart = "<" + infilename + " "
print str(procargs)
print "*********************************************"
- print "**** Run 'wt' via: run " + " ".join(procargs[3:]) + infilepart + ">" + wtoutname + " 2>" + wterrname
+ print "**** Run 'wt' via: run " + \
+ " ".join(procargs[3:]) + infilepart + \
+ ">" + wtoutname + " 2>" + wterrname
print "*********************************************"
- subprocess.call(procargs)
+ returncode = subprocess.call(procargs)
elif infilename:
with open(infilename, "r") as wtin:
- subprocess.call(procargs, stdin=wtin, stdout=wtout, stderr=wterr)
+ returncode = subprocess.call(
+ procargs, stdin=wtin, stdout=wtout, stderr=wterr)
else:
- subprocess.call(procargs, stdout=wtout, stderr=wterr)
+ returncode = subprocess.call(
+ procargs, stdout=wtout, stderr=wterr)
+ if failure:
+ self.assertNotEqual(returncode, 0,
+ 'expected failure: "' + \
+ str(procargs) + '": exited ' + str(returncode))
+ else:
+ self.assertEqual(returncode, 0,
+ 'expected success: "' + \
+ str(procargs) + '": exited ' + str(returncode))
if errfilename == None:
self.check_empty_file(wterrname)
if outfilename == None:
diff --git a/test/suite/test_backup05.py b/test/suite/test_backup05.py
index 991a9f71b19..fbe219d8de8 100644
--- a/test/suite/test_backup05.py
+++ b/test/suite/test_backup05.py
@@ -37,10 +37,12 @@ import fnmatch, os, shutil, time
from suite_subprocess import suite_subprocess
from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
from helper import copy_wiredtiger_home
-import wttest
+import wiredtiger, wttest
class test_backup05(wttest.WiredTigerTestCase, suite_subprocess):
uri = 'table:test_backup05'
+ emptyuri = 'table:test_empty05'
+ newuri = 'table:test_new05'
create_params = 'key_format=i,value_format=i'
freq = 5
@@ -51,12 +53,35 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess):
# With the connection still open, copy files to new directory.
# Half the time use an unaligned copy.
- aligned = (i % (self.freq * 2) != 0) or os.name == "nt"
+ even = i % (self.freq * 2) == 0
+ aligned = even or os.name == "nt"
copy_wiredtiger_home(olddir, newdir, aligned)
+ # Half the time try to rename a table and the other half try
+ # to remove a table. They should fail.
+ if not even:
+ self.assertRaises(wiredtiger.WiredTigerError,
+ lambda: self.session.rename(
+ self.emptyuri, self.newuri, None))
+ else:
+ self.assertRaises(wiredtiger.WiredTigerError,
+ lambda: self.session.drop(self.emptyuri, None))
+
# Now simulate fsyncUnlock by closing the backup cursor.
cbkup.close()
+ # Once the backup cursor is closed we should be able to perform
+ # schema operations. Test that and then reset the files to their
+ # expected initial names.
+ if not even:
+ self.session.rename(self.emptyuri, self.newuri, None)
+ self.session.drop(self.newuri, None)
+ self.session.create(self.emptyuri, self.create_params)
+ else:
+ self.session.drop(self.emptyuri, None)
+ self.session.create(self.emptyuri, self.create_params)
+
+
# Open the new directory and verify
conn = self.setUpConnectionOpen(newdir)
session = self.setUpSessionOpen(conn)
@@ -77,6 +102,10 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess):
#
# If the metadata isn't flushed, eventually the metadata we copy will
# be sufficiently out-of-sync with the data file that it won't verify.
+
+ self.session.create(self.emptyuri, self.create_params)
+ self.reopen_conn()
+
self.session.create(self.uri, self.create_params)
for i in range(100):
c = self.session.open_cursor(self.uri)
@@ -88,7 +117,7 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess):
self.session.verify(self.uri)
def test_backup(self):
- with self.expectedStdoutPattern('Recreating metadata'):
+ with self.expectedStdoutPattern('recreating metadata'):
self.backup()
if __name__ == '__main__':
diff --git a/test/suite/test_config04.py b/test/suite/test_config04.py
index 7186bc3a716..dffa7479f1b 100644
--- a/test/suite/test_config04.py
+++ b/test/suite/test_config04.py
@@ -26,7 +26,7 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
-import os
+import os, shutil
import wiredtiger, wttest
from wiredtiger import stat
@@ -34,6 +34,7 @@ from wiredtiger import stat
# Individually test config options
class test_config04(wttest.WiredTigerTestCase):
table_name1 = 'test_config04'
+ log1 = 'WiredTigerLog.0000000001'
nentries = 100
K = 1024
@@ -86,6 +87,10 @@ class test_config04(wttest.WiredTigerTestCase):
self.assertEqual(cursor[stat.conn.cache_bytes_max][2], size)
cursor.close()
+ def common_log_test(self, path, dirname):
+ self.common_test('log=(archive=false,enabled,' + path + ')')
+ self.assertTrue(os.path.exists(dirname + os.sep + self.log1))
+
def test_bad_config(self):
msg = '/unknown configuration key/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
@@ -168,24 +173,46 @@ class test_config04(wttest.WiredTigerTestCase):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.wiredtiger_open('.', '(create='), msg)
- def test_session_max(self):
- # Note: There isn't any direct way to know that this was set,
- # but we'll have a separate functionality test to test for
- # this indirectly.
- self.common_test('session_max=99')
-
- def test_multiprocess(self):
- self.common_test('multiprocess')
- # TODO: how do we verify that it was set?
-
def test_error_prefix(self):
self.common_test('error_prefix="MyOwnPrefix"')
# TODO: how do we verify that it was set?
def test_logging(self):
- self.common_test('log=(enabled=true)')
- # TODO: how do we verify that it was set? For this we could look
- # for the existence of the log file in the home dir.
+ # Test variations on the log configuration. The log test takes
+ # a configuration string as the first arg and the directory pathname
+ # to confirm the existence of the log file. For now we're testing
+ # the log pathname only.
+ #
+ # Test the default in the home directory.
+ self.common_log_test('', '.')
+ self.conn.close()
+
+ # Test a subdir of the home directory.
+ logdirname = 'logdir'
+ logdir = '.' + os.sep + logdirname
+ os.mkdir(logdir)
+ confstr = 'path=' + logdirname
+ self.common_log_test(confstr, logdir)
+ self.conn.close()
+
+ # Test an absolute path directory.
+ if os.name == 'posix':
+ logdir = '/tmp/logdir'
+ os.mkdir(logdir)
+ confstr = 'path=' + logdir
+ self.common_log_test(confstr, logdir)
+ self.conn.close()
+ shutil.rmtree(logdir, ignore_errors=True)
+
+ def test_multiprocess(self):
+ self.common_test('multiprocess')
+ # TODO: how do we verify that it was set?
+
+ def test_session_max(self):
+ # Note: There isn't any direct way to know that this was set,
+ # but we'll have a separate functionality test to test for
+ # this indirectly.
+ self.common_test('session_max=99')
def test_transactional(self):
# Note: this will have functional tests in the future.
diff --git a/test/suite/test_dump.py b/test/suite/test_dump.py
index fc1422155e2..85196174c1b 100644
--- a/test/suite/test_dump.py
+++ b/test/suite/test_dump.py
@@ -26,7 +26,7 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
-import os
+import os, shutil
import wiredtiger, wttest
from helper import \
complex_populate, complex_populate_check, \
@@ -42,6 +42,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
dir='dump.dir' # Backup directory name
name = 'test_dump'
+ name2 = 'test_dumpb'
nentries = 2500
dumpfmt = [
@@ -109,6 +110,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
# Create the object.
uri = self.uri + self.name
+ uri2 = self.uri + self.name2
self.populate(self, uri,
self.config + ',key_format=' + self.keyfmt, self.nentries)
@@ -130,23 +132,19 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
self.assertEqual(not s1.symmetric_difference(s2), True)
# Check the object's contents
- conn = self.wiredtiger_open(self.dir)
- session = conn.open_session()
+ self.reopen_conn(self.dir)
self.populate_check(self, uri, self.nentries)
- conn.close()
- # Re-load the object again.
+ # Re-load the object again in the original directory.
+ self.reopen_conn('.')
self.runWt(['-h', self.dir, 'load', '-f', 'dump.out'])
# Check the contents, they shouldn't have changed.
- conn = self.wiredtiger_open(self.dir)
- session = conn.open_session()
self.populate_check(self, uri, self.nentries)
- conn.close()
# Re-load the object again, but confirm -n (no overwrite) fails.
- self.runWt(['-h', self.dir,
- 'load', '-n', '-f', 'dump.out'], errfilename='errfile.out')
+ self.runWt(['-h', self.dir, 'load', '-n', '-f', 'dump.out'],
+ errfilename='errfile.out', failure=True)
self.check_non_empty_file('errfile.out')
# If there are indices, dump one of them and check the output.
@@ -158,5 +156,14 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
self.check_non_empty_file('dumpidx.out')
self.compare_dump_values('dump.out', 'dumpidx.out')
+ # Re-load the object into a different table uri
+ shutil.rmtree(self.dir)
+ os.mkdir(self.dir)
+ self.runWt(['-h', self.dir, 'load', '-r', self.name2, '-f', 'dump.out'])
+
+ # Check the contents in the new table.
+ self.reopen_conn(self.dir)
+ self.populate_check(self, uri2, self.nentries)
+
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_join01.py b/test/suite/test_join01.py
index 4aa2bc6e269..f8d96a2718a 100644
--- a/test/suite/test_join01.py
+++ b/test/suite/test_join01.py
@@ -35,10 +35,44 @@ from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
class test_join01(wttest.WiredTigerTestCase):
nentries = 100
- scenarios = [
+ type_scen = [
('table', dict(ref='table')),
('index', dict(ref='index'))
]
+ bloom0_scen = [
+ ('bloom0=0', dict(joincfg0='')),
+ ('bloom0=1000', dict(joincfg0=',strategy=bloom,count=1000')),
+ ('bloom0=10000', dict(joincfg0=',strategy=bloom,count=10000')),
+ ]
+ bloom1_scen = [
+ ('bloom1=0', dict(joincfg1='')),
+ ('bloom1=1000', dict(joincfg1=',strategy=bloom,count=1000')),
+ ('bloom1=10000', dict(joincfg1=',strategy=bloom,count=10000')),
+ ]
+ projection_scen = [
+ ('no-projection', dict(do_proj=False)),
+ ('projection', dict(do_proj=True))
+ ]
+ nested_scen = [
+ ('simple', dict(do_nested=False)),
+ ('nested', dict(do_nested=True))
+ ]
+ stats_scen = [
+ ('no-stats', dict(do_stats=False)),
+ ('stats', dict(do_stats=True))
+ ]
+ order_scen = [
+ ('order=0', dict(join_order=0)),
+ ('order=1', dict(join_order=1)),
+ ('order=2', dict(join_order=2)),
+ ('order=3', dict(join_order=3)),
+ ]
+ scenarios = number_scenarios(multiply_scenarios('.', type_scen,
+ bloom0_scen, bloom1_scen,
+ projection_scen,
+ nested_scen, stats_scen,
+ order_scen))
+
# We need statistics for these tests.
conn_config = 'statistics=(all)'
@@ -52,9 +86,29 @@ class test_join01(wttest.WiredTigerTestCase):
return [s, rs, sort3]
# Common function for testing iteration of join cursors
- def iter_common(self, jc, do_proj):
+ def iter_common(self, jc, do_proj, do_nested, join_order):
# See comments in join_common()
- expect = [73, 82, 62, 83, 92]
+ # The order that the results are seen depends on
+ # the ordering of the joins. Specifically, the first
+ # join drives the order that results are seen.
+ if do_nested:
+ if join_order == 0:
+ expect = [73, 82, 83, 92]
+ elif join_order == 1:
+ expect = [73, 82, 83, 92]
+ elif join_order == 2:
+ expect = [82, 92, 73, 83]
+ elif join_order == 3:
+ expect = [92, 73, 82, 83]
+ else:
+ if join_order == 0:
+ expect = [73, 82, 62, 83, 92]
+ elif join_order == 1:
+ expect = [62, 73, 82, 83, 92]
+ elif join_order == 2:
+ expect = [62, 82, 92, 73, 83]
+ elif join_order == 3:
+ expect = [73, 82, 62, 83, 92]
while jc.next() == 0:
[k] = jc.get_keys()
i = k - 1
@@ -64,7 +118,9 @@ class test_join01(wttest.WiredTigerTestCase):
[v0,v1,v2] = jc.get_values()
self.assertEquals(self.gen_values(i), [v0,v1,v2])
if len(expect) == 0 or i != expect[0]:
- self.tty(' result ' + str(i) + ' is not in: ' + str(expect))
+ self.tty('ERROR: ' + str(i) + ' is not next in: ' +
+ str(expect))
+ self.tty('JOIN ORDER=' + str(join_order) + ', NESTED=' + str(do_nested))
self.assertTrue(i == expect[0])
expect.remove(i)
self.assertEquals(0, len(expect))
@@ -81,6 +137,8 @@ class test_join01(wttest.WiredTigerTestCase):
'join: index:join01:index2: ' + statdesc ]
if self.ref == 'index':
expectstats.append('join: index:join01:index0: ' + statdesc)
+ elif self.do_proj:
+ expectstats.append('join: table:join01(v2,v1,v0): ' + statdesc)
else:
expectstats.append('join: table:join01: ' + statdesc)
self.check_stats(statcur, expectstats)
@@ -118,11 +176,46 @@ class test_join01(wttest.WiredTigerTestCase):
self.assertTrue(len(expectstats) == 0,
'missing expected values in stats: ' + str(expectstats))
+ def session_record_join(self, jc, refc, config, order, joins):
+ joins.append([order, [jc, refc, config]])
+
+ def session_play_one_join(self, firsturi, jc, refc, config):
+ if refc.uri == firsturi and config != None:
+ config = config.replace('strategy=bloom','')
+ #self.tty('->join(jc, uri="' + refc.uri +
+ # '", config="' + str(config) + '"')
+ self.session.join(jc, refc, config)
+
+ def session_play_joins(self, joins, join_order):
+ #self.tty('->')
+ firsturi = None
+ for [i, joinargs] in joins:
+ if i >= join_order:
+ if firsturi == None:
+ firsturi = joinargs[1].uri
+ self.session_play_one_join(firsturi, *joinargs)
+ for [i, joinargs] in joins:
+ if i < join_order:
+ if firsturi == None:
+ firsturi = joinargs[1].uri
+ self.session_play_one_join(firsturi, *joinargs)
+
# Common function for testing the most basic functionality
# of joins
- def join_common(self, joincfg0, joincfg1, do_proj, do_stats):
+ def test_join(self):
+ joincfg0 = self.joincfg0
+ joincfg1 = self.joincfg1
+ do_proj = self.do_proj
+ do_nested = self.do_nested
+ do_stats = self.do_stats
+ join_order = self.join_order
#self.tty('join_common(' + joincfg0 + ',' + joincfg1 + ',' +
- # str(do_proj) + ')')
+ # str(do_proj) + ',' + str(do_nested) + ',' +
+ # str(do_stats) + ',' + str(join_order) + ')')
+
+ closeme = []
+ joins = [] # cursors to be joined
+
self.session.create('table:join01', 'key_format=r' +
',value_format=SSi,columns=(k,v0,v1,v2)')
self.session.create('index:join01:index0','columns=(v0)')
@@ -143,7 +236,7 @@ class test_join01(wttest.WiredTigerTestCase):
# We join on index2 first, not using bloom indices.
# This defines the order that items are returned.
- # index2 is sorts multiples of 3 first (see gen_values())
+ # index2 sorts multiples of 3 first (see gen_values())
# and by using 'gt' and key 99, we'll skip multiples of 3,
# and examine primary keys 2,5,8,...,95,98,1,4,7,...,94,97.
jc = self.session.open_cursor('join:table:join01' + proj_suffix,
@@ -152,7 +245,7 @@ class test_join01(wttest.WiredTigerTestCase):
c2 = self.session.open_cursor('index:join01:index2(v1)', None, None)
c2.set_key(99) # skips all entries w/ primary key divisible by three
self.assertEquals(0, c2.search())
- self.session.join(jc, c2, 'compare=gt')
+ self.session_record_join(jc, c2, 'compare=gt', 0, joins)
# Then select all the numbers 0-99 whose string representation
# sort >= '60'.
@@ -163,285 +256,87 @@ class test_join01(wttest.WiredTigerTestCase):
c0 = self.session.open_cursor('table:join01', None, None)
c0.set_key(60)
self.assertEquals(0, c0.search())
- self.session.join(jc, c0, 'compare=ge' + joincfg0)
+ self.session_record_join(jc, c0, 'compare=ge' + joincfg0, 1, joins)
# Then select all numbers whose reverse string representation
# is in '20' < x < '40'.
c1a = self.session.open_cursor('index:join01:index1(v1)', None, None)
c1a.set_key('21')
self.assertEquals(0, c1a.search())
- self.session.join(jc, c1a, 'compare=gt' + joincfg1)
+ self.session_record_join(jc, c1a, 'compare=gt' + joincfg1, 2, joins)
c1b = self.session.open_cursor('index:join01:index1(v1)', None, None)
c1b.set_key('41')
self.assertEquals(0, c1b.search())
- self.session.join(jc, c1b, 'compare=lt' + joincfg1)
+ self.session_record_join(jc, c1b, 'compare=lt' + joincfg1, 2, joins)
# Numbers that satisfy these 3 conditions (with ordering implied by c2):
# [73, 82, 62, 83, 92].
#
# After iterating, we should be able to reset and iterate again.
+ if do_nested:
+ # To test nesting, we create two new levels of conditions:
+ #
+ # x == 72 or x == 73 or x == 82 or x == 83 or
+ # (x >= 90 and x <= 99)
+ #
+ # that will get AND-ed into our existing join. The expected
+ # result is [73, 82, 83, 92].
+ #
+ # We don't specify the projection here, it should be picked up
+ # from the 'enclosing' join.
+ nest1 = self.session.open_cursor('join:table:join01', None, None)
+ nest2 = self.session.open_cursor('join:table:join01', None, None)
+
+ nc = self.session.open_cursor('index:join01:index0', None, None)
+ nc.set_key('90')
+ self.assertEquals(0, nc.search())
+ self.session.join(nest2, nc, 'compare=ge') # joincfg left out
+ closeme.append(nc)
+
+ nc = self.session.open_cursor('index:join01:index0', None, None)
+ nc.set_key('99')
+ self.assertEquals(0, nc.search())
+ self.session.join(nest2, nc, 'compare=le')
+ closeme.append(nc)
+
+ self.session.join(nest1, nest2, "operation=or")
+
+ for val in [ '72', '73', '82', '83' ]:
+ nc = self.session.open_cursor('index:join01:index0', None, None)
+ nc.set_key(val)
+ self.assertEquals(0, nc.search())
+ self.session.join(nest1, nc, 'compare=eq,operation=or' +
+ joincfg0)
+ closeme.append(nc)
+ self.session_record_join(jc, nest1, None, 3, joins)
+
+ self.session_play_joins(joins, join_order)
+ self.iter_common(jc, do_proj, do_nested, join_order)
if do_stats:
self.stats(jc, 0)
- self.iter_common(jc, do_proj)
+ jc.reset()
+ self.iter_common(jc, do_proj, do_nested, join_order)
if do_stats:
self.stats(jc, 1)
jc.reset()
- self.iter_common(jc, do_proj)
+ self.iter_common(jc, do_proj, do_nested, join_order)
if do_stats:
self.stats(jc, 2)
jc.reset()
- self.iter_common(jc, do_proj)
+ self.iter_common(jc, do_proj, do_nested, join_order)
jc.close()
c2.close()
c1a.close()
c1b.close()
c0.close()
+ if do_nested:
+ nest1.close()
+ nest2.close()
+ for c in closeme:
+ c.close()
self.session.drop('table:join01')
- # Test joins with basic functionality
- def test_join(self):
- bloomcfg1000 = ',strategy=bloom,count=1000'
- bloomcfg10000 = ',strategy=bloom,count=10000'
- for cfga in [ '', bloomcfg1000, bloomcfg10000 ]:
- for cfgb in [ '', bloomcfg1000, bloomcfg10000 ]:
- for do_proj in [ False, True ]:
- #self.tty('cfga=' + cfga +
- # ', cfgb=' + cfgb +
- # ', doproj=' + str(do_proj))
- self.join_common(cfga, cfgb, do_proj, False)
-
- def test_join_errors(self):
- self.session.create('table:join01', 'key_format=r,value_format=SS'
- ',columns=(k,v0,v1)')
- self.session.create('table:join01B', 'key_format=r,value_format=SS'
- ',columns=(k,v0,v1)')
- self.session.create('index:join01:index0','columns=(v0)')
- self.session.create('index:join01:index1','columns=(v1)')
- self.session.create('index:join01B:index0','columns=(v0)')
- jc = self.session.open_cursor('join:table:join01', None, None)
- tc = self.session.open_cursor('table:join01', None, None)
- fc = self.session.open_cursor('file:join01.wt', None, None)
- ic0 = self.session.open_cursor('index:join01:index0', None, None)
- ic0again = self.session.open_cursor('index:join01:index0', None, None)
- ic1 = self.session.open_cursor('index:join01:index1', None, None)
- icB = self.session.open_cursor('index:join01B:index0', None, None)
- tcB = self.session.open_cursor('table:join01B', None, None)
-
- tc.set_key(1)
- tc.set_value('val1', 'val1')
- tc.insert()
- tcB.set_key(1)
- tcB.set_value('val1', 'val1')
- tcB.insert()
- fc.next()
-
- # Joining using a non join-cursor
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(tc, ic0, 'compare=ge'),
- '/not a join cursor/')
- # Joining a table cursor, not index
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, fc, 'compare=ge'),
- '/not an index or table cursor/')
- # Joining a non positioned cursor
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic0, 'compare=ge'),
- '/requires reference cursor be positioned/')
- ic0.set_key('val1')
- # Joining a non positioned cursor (no search or next has been done)
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic0, 'compare=ge'),
- '/requires reference cursor be positioned/')
- ic0.set_key('valXX')
- self.assertEqual(ic0.search(), wiredtiger.WT_NOTFOUND)
- # Joining a non positioned cursor after failed search
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic0, 'compare=ge'),
- '/requires reference cursor be positioned/')
-
- # position the cursors now
- ic0.set_key('val1')
- ic0.search()
- ic0again.next()
- icB.next()
-
- # Joining non matching index
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, icB, 'compare=ge'),
- '/table for join cursor does not match/')
-
- # The cursor must be positioned
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic1, 'compare=ge'),
- '/requires reference cursor be positioned/')
- ic1.next()
-
- # The first cursor joined cannot be bloom
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic1,
- 'compare=ge,strategy=bloom,count=1000'),
- '/first joined cursor cannot specify strategy=bloom/')
-
- # This succeeds.
- self.session.join(jc, ic1, 'compare=ge'),
-
- # With bloom filters, a count is required
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic0, 'compare=ge,strategy=bloom'),
- '/count must be nonzero/')
-
- # This succeeds.
- self.session.join(jc, ic0, 'compare=ge,strategy=bloom,count=1000'),
-
- bloom_config = ',strategy=bloom,count=1000'
- # Cannot use the same index cursor
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic0,
- 'compare=le' + bloom_config),
- '/index cursor already used in a join/')
-
- # When joining with the same index, need compatible compares
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic0again, 'compare=ge' + bloom_config),
- '/join has overlapping ranges/')
-
- # Another incompatible compare
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic0again, 'compare=gt' + bloom_config),
- '/join has overlapping ranges/')
-
- # Compare is compatible, but bloom args need to match
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic0again, 'compare=le'),
- '/join has incompatible strategy/')
-
- # Counts need to match for bloom filters
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.join(jc, ic0again, 'compare=le,strategy=bloom,'
- 'count=100'), '/count.* does not match previous count/')
-
- # This succeeds
- self.session.join(jc, ic0again, 'compare=le,strategy=bloom,count=1000')
-
- # Need to do initial next() before getting key/values
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: jc.get_keys(),
- '/join cursor must be advanced with next/')
-
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: jc.get_values(),
- '/join cursor must be advanced with next/')
-
- # Operations on the joined cursor are frozen until the join is closed.
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: ic0.next(),
- '/index cursor is being used in a join/')
-
- # Operations on the joined cursor are frozen until the join is closed.
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: ic0.prev(),
- '/index cursor is being used in a join/')
-
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: ic0.reset(),
- '/index cursor is being used in a join/')
-
- # Only a small number of operations allowed on a join cursor
- msg = "/Unsupported cursor/"
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: jc.search(), msg)
-
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: jc.prev(), msg)
-
- self.assertEquals(jc.next(), 0)
- self.assertEquals(jc.next(), wiredtiger.WT_NOTFOUND)
-
- # Only after the join cursor is closed can we use the index cursor
- # normally
- jc.close()
- self.assertEquals(ic0.next(), wiredtiger.WT_NOTFOUND)
- self.assertEquals(ic0.prev(), 0)
-
- # common code for making sure that cursors can be
- # implicitly closed, no matter the order they are created
- def cursor_close_common(self, joinfirst):
- self.session.create('table:join01', 'key_format=r' +
- ',value_format=SS,columns=(k,v0,v1)')
- self.session.create('index:join01:index0','columns=(v0)')
- self.session.create('index:join01:index1','columns=(v1)')
- c = self.session.open_cursor('table:join01', None, None)
- for i in range(0, self.nentries):
- c.set_key(*self.gen_key(i))
- c.set_value(*self.gen_values(i))
- c.insert()
- c.close()
-
- if joinfirst:
- jc = self.session.open_cursor('join:table:join01', None, None)
- c0 = self.session.open_cursor('index:join01:index0', None, None)
- c1 = self.session.open_cursor('index:join01:index1', None, None)
- c0.next() # index cursors must be positioned
- c1.next()
- if not joinfirst:
- jc = self.session.open_cursor('join:table:join01', None, None)
- self.session.join(jc, c0, 'compare=ge')
- self.session.join(jc, c1, 'compare=ge')
- self.session.close()
- self.session = None
-
- def test_cursor_close1(self):
- self.cursor_close_common(True)
-
- def test_cursor_close2(self):
- self.cursor_close_common(False)
-
- # test statistics using the framework set up for this test
- def test_stats(self):
- bloomcfg1000 = ',strategy=bloom,count=1000'
- bloomcfg10 = ',strategy=bloom,count=10'
- self.join_common(bloomcfg1000, bloomcfg1000, False, True)
-
- # Intentially run with an underconfigured Bloom filter,
- # statistics should pick up some false positives.
- self.join_common(bloomcfg10, bloomcfg10, False, True)
-
- # test statistics with a simple one index join cursor
- def test_simple_stats(self):
- self.session.create("table:join01b",
- "key_format=i,value_format=i,columns=(k,v)")
- self.session.create("index:join01b:index", "columns=(v)")
-
- cursor = self.session.open_cursor("table:join01b", None, None)
- cursor[1] = 11
- cursor[2] = 12
- cursor[3] = 13
- cursor.close()
-
- cursor = self.session.open_cursor("index:join01b:index", None, None)
- cursor.set_key(11)
- cursor.search()
-
- jcursor = self.session.open_cursor("join:table:join01b", None, None)
- self.session.join(jcursor, cursor, "compare=gt")
-
- while jcursor.next() == 0:
- [k] = jcursor.get_keys()
- [v] = jcursor.get_values()
-
- statcur = self.session.open_cursor("statistics:join", jcursor, None)
- found = False
- while statcur.next() == 0:
- [desc, pvalue, value] = statcur.get_values()
- #self.tty(str(desc) + "=" + str(pvalue))
- found = True
- self.assertEquals(found, True)
-
- jcursor.close()
- cursor.close()
-
-
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_join07.py b/test/suite/test_join07.py
new file mode 100644
index 00000000000..36e91361329
--- /dev/null
+++ b/test/suite/test_join07.py
@@ -0,0 +1,548 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import os, re, run
+import wiredtiger, wttest, suite_random
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+
+class ParseException(Exception):
+ def __init__(self, msg):
+ super(ParseException, self).__init__(msg)
+
+class Token:
+ UNKNOWN = '<unknown>'
+ NUMBER = 'Number'
+ STRING = 'String'
+ COLUMN = 'Column'
+ LPAREN = '('
+ RPAREN = ')'
+ LBRACKET = '{'
+ RBRACKET = '}'
+ COMMA = ','
+ OR = '||'
+ AND = '&&'
+ LT = '<'
+ GT = '>'
+ LE = '<='
+ GE = '>='
+ EQ = '=='
+ ATTRIBUTE = 'Attribute' # bracketed key value pair
+
+ COMPARE_OPS = [LT, GT, LE, GE, EQ]
+ COMPARATORS = [NUMBER, STRING]
+
+ def __init__(self, kind, tokenizer):
+ self.kind = kind
+ self.pos = tokenizer.off + tokenizer.pos
+ self.n = 0
+ self.s = ''
+ self.index = ''
+ self.attr_key = ''
+ self.attr_value = ''
+ self.groups = None
+
+ def __str__(self):
+ return '<Token ' + self.kind + ' at char ' + str(self.pos) + '>'
+
+class Tokenizer:
+ def __init__(self, s):
+ self.off = 0
+ self.s = s + '?' # add a char that won't match anything
+ self.pos = 0
+ self.end = len(s)
+ self.re_num = re.compile(r"(\d+)")
+ self.re_quote1 = re.compile(r"'([^']*)'")
+ self.re_quote2 = re.compile(r"\"([^\"]*)\"")
+ self.re_attr = re.compile(r"\[(\w+)=(\w+)\]")
+ self.pushed = None
+
+ def newToken(self, kind, sz):
+ t = Token(kind, self)
+ self.pos += sz
+ return t
+
+ def error(self, s):
+ raise ParseException(str(self.pos) + ': ' + s)
+
+ def matched(self, kind, repat):
+ pos = self.pos
+ match = re.match(repat, self.s[pos:])
+ if not match:
+ end = pos + 10
+ if end > self.end:
+ end = self.end
+ self.error('matching ' + kind + ' at "' +
+ self.s[pos:end] + '..."')
+ t = self.newToken(kind, match.end())
+ t.groups = match.groups()
+ t.s = self.s[pos:pos + match.end()]
+ return t
+
+ def available(self):
+ if self.pushed == None:
+ self.pushback(self.token())
+ return (self.pushed != None)
+
+ def pushback(self, token):
+ if self.pushed != None:
+ raise AssertionError('pushback more than once')
+ self.pushed = token
+
+ def peek(self):
+ token = self.token()
+ self.pushback(token)
+ return token
+
+ def scan(self):
+ while self.pos < self.end and self.s[self.pos].isspace():
+ self.pos += 1
+ return '' if self.pos >= self.end else self.s[self.pos]
+
+ def token(self):
+ if self.pushed != None:
+ ret = self.pushed
+ self.pushed = None
+ return ret
+ c = self.scan()
+ if self.pos >= self.end:
+ return None
+ lookahead = '' if self.pos + 1 >= self.end else self.s[self.pos+1]
+ #self.tty("Tokenizer.token char=" + c + ", lookahead=" + lookahead)
+ if c == "'":
+ t = self.matched(Token.STRING, self.re_quote1)
+ t.s = t.groups[0]
+ return t
+ if c == '"':
+ t = self.matched(Token.STRING, self.re_quote2)
+ t.s = t.groups[0]
+ return t
+ if c in "{}(),":
+ return self.newToken(c, 1)
+ if c == "|":
+ if lookahead != "|":
+ self.error('matching OR')
+ return self.newToken(Token.OR, 2)
+ if c == "&":
+ if lookahead != "&":
+ self.error('matching AND')
+ return self.newToken(Token.AND, 2)
+ if c in "0123456789":
+ t = self.matched(Token.NUMBER, self.re_num)
+ t.s = t.groups[0]
+ t.n = int(t.s)
+ return t
+ if c in "ABCDEFGHIJ":
+ t = self.newToken(Token.COLUMN, 1)
+ t.s = c
+ return t
+ if c == '<':
+ if lookahead == '=':
+ return self.newToken(Token.LE, 2)
+ else:
+ return self.newToken(Token.LT, 1)
+ if c == '>':
+ if lookahead == '=':
+ return self.newToken(Token.GE, 2)
+ else:
+ return self.newToken(Token.GT, 1)
+ if c in "=":
+ if lookahead != "=":
+ self.error('matching EQ')
+ return self.newToken(Token.EQ, 2)
+ if c in "[":
+ t = self.matched(Token.ATTRIBUTE, self.re_attr)
+ t.attr_key = t.groups[0]
+ t.attr_value = t.groups[1]
+ return t
+ return None
+
+ def tty(self, s):
+ wttest.WiredTigerTestCase.tty(s)
+
+# test_join07.py
+# Join interpreter
+class test_join07(wttest.WiredTigerTestCase):
+ reverseop = { '==' : '==', '<=' : '>=', '<' : '>', '>=' : '<=', '>' : '<' }
+ compareop = { '==' : 'eq', '<=' : 'le', '<' : 'lt', '>=' : 'ge',
+ '>' : 'gt' }
+ columnmult = { 'A' : 1, 'B' : 2, 'C' : 3, 'D' : 4, 'E' : 5,
+ 'F' : 6, 'G' : 7, 'H' : 8, 'I' : 9, 'J' : 10 }
+
+ extractscen = [
+ ('extractor', dict(extractor=True)),
+ ('noextractor', dict(extractor=False))
+ ]
+
+ scenarios = number_scenarios(extractscen)
+
+ # Return the wiredtiger_open extension argument for a shared library.
+ def extensionArg(self, exts):
+ extfiles = []
+ for ext in exts:
+ (dirname, name, libname) = ext
+ if name != None and name != 'none':
+ testdir = os.path.dirname(__file__)
+ extdir = os.path.join(run.wt_builddir, 'ext', dirname)
+ extfile = os.path.join(
+ extdir, name, '.libs', 'libwiredtiger_' + libname + '.so')
+ if not os.path.exists(extfile):
+ self.skipTest('extension "' + extfile + '" not built')
+ if not extfile in extfiles:
+ extfiles.append(extfile)
+ if len(extfiles) == 0:
+ return ''
+ else:
+ return ',extensions=["' + '","'.join(extfiles) + '"]'
+
+ # Override WiredTigerTestCase, we have extensions.
+ def setUpConnectionOpen(self, dir):
+ extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor')])
+ connarg = 'create,error_prefix="{0}: ",{1}'.format(
+ self.shortid(), extarg)
+ conn = self.wiredtiger_open(dir, connarg)
+ self.pr(`conn`)
+ return conn
+
+ def expect(self, token, expected):
+ if token == None or token.kind not in expected:
+ self.err(token, 'expected one of: ' + str(expected))
+ return token
+
+ def err(self, token, msg):
+ self.assertTrue(False, 'ERROR at token ' + str(token) + ': ' + msg)
+
+ def gen_key(self, i):
+ if self.keyformat == 'S':
+ return [ 'key%06d' % i ] # zero pad so it sorts expectedly
+ else:
+ return [ i ]
+
+ def gen_values(self, i):
+ s = ""
+ ret = []
+ for x in range(1, 11):
+ v = (i * x) % self.N
+ if x <= 5:
+ ret.append(v)
+ else:
+ ret.append(str(v))
+ if s != "":
+ s += ","
+ s += str(v)
+ ret.insert(0, s)
+ return ret
+
+ def iterate(self, jc, mbr):
+ mbr = set(mbr) # we need a mutable set
+ gotkeys = []
+ #self.tty('iteration expects ' + str(len(mbr)) +
+ # ' entries: ' + str(mbr))
+ while jc.next() == 0:
+ [k] = jc.get_keys()
+ values = jc.get_values()
+ if self.keyformat == 'S':
+ i = int(str(k[3:]))
+ else:
+ i = k
+ #self.tty('GOT key=' + str(k) + ', values=' + str(values))
+
+ # Duplicates may be returned when the disjunctions are used,
+ # so we ignore them.
+ if not i in gotkeys:
+ self.assertEquals(self.gen_values(i), values)
+ if not i in mbr:
+ self.tty('ERROR: result ' + str(i) + ' is not in: ' +
+ str(mbr))
+ self.assertTrue(i in mbr)
+ mbr.remove(i)
+ gotkeys.append(i)
+ self.assertEquals(0, len(mbr))
+
+ def token_literal(self, token):
+ if token.kind == Token.STRING:
+ return token.s
+ elif token.kind == Token.NUMBER:
+ return token.n
+
+ def idx_sim(self, x, mult, isstr):
+ if isstr:
+ return str(int(x) * mult % self.N)
+ else:
+ return (x * mult % self.N)
+
+ def mkmbr(self, expr):
+ return frozenset([x for x in self.allN if expr(x)])
+
+ def join_one_side(self, jc, coltok, littok, optok, conjunction,
+ isright, mbr):
+ idxname = 'index:join07:' + coltok.s
+ cursor = self.session.open_cursor(idxname, None, None)
+ jc.cursors.append(cursor)
+ literal = self.token_literal(littok)
+ cursor.set_key(literal)
+ searchret = cursor.search()
+ if searchret != 0:
+ self.tty('ERROR: cannot find value ' + str(literal) +
+ ' in ' + idxname)
+ self.assertEquals(0, searchret)
+ op = optok.kind
+ if not isright:
+ op = self.reverseop[op]
+ mult = self.columnmult[coltok.s]
+ config = 'compare=' + self.compareop[op] + ',operation=' + \
+ ('and' if conjunction else 'or')
+ if hasattr(coltok, 'bloom'):
+ config += ',strategy=bloom,count=' + str(coltok.bloom)
+ #self.tty('join(jc, cursor=' + str(literal) + ', ' + config)
+ self.session.join(jc, cursor, config)
+ isstr = type(literal) is str
+ if op == '==':
+ tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) == literal)
+ elif op == '<=':
+ tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) <= literal)
+ elif op == '<':
+ tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) < literal)
+ elif op == '>=':
+ tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) >= literal)
+ elif op == '>':
+ tmbr = self.mkmbr(lambda x: self.idx_sim(x, mult, isstr) > literal)
+ if conjunction:
+ mbr = mbr.intersection(tmbr)
+ else:
+ mbr = mbr.union(tmbr)
+ return mbr
+
+ def parse_join(self, jc, tokenizer, conjunction, mbr):
+ left = None
+ right = None
+ leftop = None
+ rightop = None
+ col = None
+ token = tokenizer.token()
+ if token.kind == Token.LPAREN:
+ subjc = self.session.open_cursor('join:table:join07', None, None)
+ jc.cursors.append(subjc)
+ submbr = self.parse_junction(subjc, tokenizer)
+ config = 'operation=' + ('and' if conjunction else 'or')
+ self.session.join(jc, subjc, config)
+ if conjunction:
+ mbr = mbr.intersection(submbr)
+ else:
+ mbr = mbr.union(submbr)
+ return mbr
+ if token.kind in Token.COMPARATORS:
+ left = token
+ leftop = self.expect(tokenizer.token(), Token.COMPARE_OPS)
+ token = tokenizer.token()
+ col = self.expect(token, [Token.COLUMN])
+ token = tokenizer.token()
+ if token.kind in Token.ATTRIBUTE:
+ tokenizer.pushback(token)
+ self.parse_column_attributes(tokenizer, col)
+ token = tokenizer.token()
+ if token.kind in Token.COMPARE_OPS:
+ rightop = token
+ right = self.expect(tokenizer.token(), Token.COMPARATORS)
+ token = tokenizer.token()
+ tokenizer.pushback(token)
+
+ # Now we have everything we need to do a join.
+ if left != None:
+ mbr = self.join_one_side(jc, col, left, leftop, conjunction,
+ False, mbr)
+ if right != None:
+ mbr = self.join_one_side(jc, col, right, rightop, conjunction,
+ True, mbr)
+ return mbr
+
+ # Parse a set of joins, grouped by && or ||
+ def parse_junction(self, jc, tokenizer):
+ jc.cursors = []
+
+ # Take a peek at the tokenizer's stream to see if we
+ # have a conjunction or disjunction
+ token = tokenizer.peek()
+ s = tokenizer.s[token.pos:]
+ (andpos, orpos) = self.find_nonparen(s, ['&', '|'])
+ if orpos >= 0 and (andpos < 0 or orpos < andpos):
+ conjunction = False
+ mbr = frozenset()
+ else:
+ conjunction = True
+ mbr = frozenset(self.allN)
+
+ while tokenizer.available():
+ mbr = self.parse_join(jc, tokenizer, conjunction, mbr)
+ token = tokenizer.token()
+ if token != None:
+ if token.kind == Token.OR:
+ self.assertTrue(not conjunction)
+ elif token.kind == Token.AND:
+ self.assertTrue(conjunction)
+ elif token.kind == Token.RPAREN:
+ break
+ else:
+ self.err(token, 'unexpected token')
+ return mbr
+
+ def parse_attributes(self, tokenizer):
+ attributes = []
+ token = tokenizer.token()
+ while token != None and token.kind == Token.ATTRIBUTE:
+ attributes.append(token)
+ token = tokenizer.token()
+ tokenizer.pushback(token)
+ return attributes
+
+ # Find a set of chars that aren't within parentheses.
+ # For this simple language, we don't allow parentheses in quoted literals.
+ def find_nonparen(self, s, matchlist):
+ pos = 0
+ end = len(s)
+ nmatch = len(matchlist)
+ nfound = 0
+ result = [-1 for i in range(0, nmatch)]
+ parennest = 0
+ while pos < end and nfound < nmatch:
+ c = s[pos]
+ if c == '(':
+ parennest += 1
+ elif c == ')':
+ parennest -= 1
+ if parennest < 0:
+ break
+ elif parennest == 0 and c in matchlist:
+ m = matchlist.index(c)
+ if result[m] < 0:
+ result[m] = pos
+ nfound += 1
+ pos += 1
+ return result
+
+ def parse_toplevel(self, jc, tokenizer):
+ return self.parse_junction(jc, tokenizer)
+
+ def parse_toplevel_attributes(self, tokenizer):
+ for attrtoken in self.parse_attributes(tokenizer):
+ key = attrtoken.attr_key
+ value = attrtoken.attr_value
+ #self.tty('ATTR:' + str([key,value]))
+ if key == 'N':
+ self.N = int(value)
+ elif key == 'key':
+ self.keyformat = value
+ else:
+ tokenizer.error('bad attribute key: ' + str(key))
+
+ def parse_column_attributes(self, tokenizer, c):
+ for attrtoken in self.parse_attributes(tokenizer):
+ key = attrtoken.attr_key
+ value = attrtoken.attr_value
+ #self.tty('ATTR:' + str([key,value]))
+ if key == 'bloom':
+ c.bloom = int(value)
+ else:
+ tokenizer.error('bad column attribute key: ' + str(key))
+
+ def close_cursors(self, jc):
+ jc.close()
+ for c in jc.cursors:
+ if c.uri[0:5] == 'join:':
+ self.close_cursors(c)
+ else:
+ c.close()
+
+ def interpret(self, s):
+ #self.tty('INTERPRET: ' + s)
+ self.N = 1000
+ self.keyformat = "r"
+ self.keycols = 'k'
+
+ # Grab attributes before creating anything, as some attributes
+ # may override needed parameters.
+ tokenizer = Tokenizer(s)
+ self.parse_toplevel_attributes(tokenizer)
+ self.allN = range(1, self.N + 1)
+
+ self.session.create('table:join07', 'key_format=' + self.keyformat +
+ ',value_format=SiiiiiSSSSS,' +
+ 'columns=(' + self.keycols +
+ ',S,A,B,C,D,E,F,G,H,I,J)')
+ mdfieldnum = 0
+ mdformat = 'i'
+ mdconfig = ''
+ for colname in [ 'A','B','C','D','E','F','G','H','I','J' ]:
+ if self.extractor:
+ if colname == 'F':
+ mdformat = 'S'
+ mdconfig = 'app_metadata={"format" : "%s","field" : "%d"}' % \
+ (mdformat, mdfieldnum)
+ config = 'extractor=csv,key_format=%s' % mdformat
+ mdfieldnum += 1
+ else:
+ config = 'columns=(%s)' % colname
+ self.session.create('index:join07:%s' % colname,
+ '%s,%s' % (config, mdconfig))
+ c = self.session.open_cursor('table:join07', None, None)
+ for i in self.allN:
+ c.set_key(*self.gen_key(i))
+ c.set_value(*self.gen_values(i))
+ c.insert()
+ c.close()
+
+ jc = self.session.open_cursor('join:table:join07', None, None)
+ mbr = self.parse_toplevel(jc, tokenizer)
+ self.iterate(jc, mbr)
+
+ self.close_cursors(jc)
+ self.session.drop('table:join07')
+
+ def test_join_string(self):
+ self.interpret("[N=1000][key=r] 7 < A <= 500 && B < 150 && C > 17")
+ self.interpret("[N=1001][key=r] 7 < A <= 500 && B < 150 && F > '234'")
+ self.interpret("[N=10000][key=r] 7 < A <= 500 && B < 150 && " +
+ "(F > '234' || G < '100')")
+ self.interpret("[N=7919][key=r](7 < A <= 9)&&(F > '234')")
+ self.interpret("[N=1000][key=S](A>=0 && A<0)||(A>999)")
+ self.interpret("[N=2000][key=S](A>=0 && A<0)||(A>1999)")
+ self.interpret("(7<A<=10 && B < 150)||(B>998)")
+ self.interpret("(7<A<=10 && B < 150)||(J=='990')")
+ clause1 = "(7 < A <= 500 && B < 150)"
+ clause2 = "(F > '234' || G < '100')"
+ self.interpret("[N=1000][key=r]" + clause1 + "&&" + clause2)
+ self.interpret("(7<A<=10)||(B>994||C<12)")
+ self.interpret("(7<A<=10 && B < 150)||(B>996||C<6)")
+ self.interpret("[N=1000][key=r]" + clause2 + "||" + clause1)
+ self.interpret("[N=1000][key=r]" + clause1 + "||" + clause2)
+ self.interpret("[N=1000][key=S]" + clause2 + "&&" + clause1)
+ clause1 = "(7 < A <= 500 && B[bloom=300] < 150)"
+ clause2 = "(F[bloom=500] > '234' || G[bloom=20] < '100')"
+ self.interpret("[N=1000][key=S]" + clause1 + "&&" + clause2)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_join08.py b/test/suite/test_join08.py
new file mode 100644
index 00000000000..6d674ab8193
--- /dev/null
+++ b/test/suite/test_join08.py
@@ -0,0 +1,265 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+
+# test_join08.py
+# Test join error paths
+class test_join08(wttest.WiredTigerTestCase):
+ nentries = 100
+
+ # We need statistics for these tests.
+ conn_config = 'statistics=(all)'
+
+ def gen_key(self, i):
+ return [ i + 1 ]
+
+ def gen_values(self, i):
+ s = str(i)
+ rs = s[::-1]
+ sort3 = (self.nentries * (i % 3)) + i # multiples of 3 sort first
+ return [s, rs, sort3]
+
+ def test_join_errors(self):
+ self.session.create('table:join08', 'key_format=r,value_format=SS'
+ ',columns=(k,v0,v1)')
+ self.session.create('table:join08B', 'key_format=r,value_format=SS'
+ ',columns=(k,v0,v1)')
+ self.session.create('index:join08:index0','columns=(v0)')
+ self.session.create('index:join08:index1','columns=(v1)')
+ self.session.create('index:join08B:index0','columns=(v0)')
+ jc = self.session.open_cursor('join:table:join08', None, None)
+ tc = self.session.open_cursor('table:join08', None, None)
+ fc = self.session.open_cursor('file:join08.wt', None, None)
+ ic0 = self.session.open_cursor('index:join08:index0', None, None)
+ ic0again = self.session.open_cursor('index:join08:index0', None, None)
+ ic1 = self.session.open_cursor('index:join08:index1', None, None)
+ icB = self.session.open_cursor('index:join08B:index0', None, None)
+ tcB = self.session.open_cursor('table:join08B', None, None)
+
+ tc.set_key(1)
+ tc.set_value('val1', 'val1')
+ tc.insert()
+ tcB.set_key(1)
+ tcB.set_value('val1', 'val1')
+ tcB.insert()
+ fc.next()
+
+ # Joining using a non join-cursor
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(tc, ic0, 'compare=ge'),
+ '/not a join cursor/')
+ # Joining a table cursor, not index
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, fc, 'compare=ge'),
+ '/must be an index, table or join cursor/')
+ # Joining a non positioned cursor
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic0, 'compare=ge'),
+ '/requires reference cursor be positioned/')
+ ic0.set_key('val1')
+ # Joining a non positioned cursor (no search or next has been done)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic0, 'compare=ge'),
+ '/requires reference cursor be positioned/')
+ ic0.set_key('valXX')
+ self.assertEqual(ic0.search(), wiredtiger.WT_NOTFOUND)
+ # Joining a non positioned cursor after failed search
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic0, 'compare=ge'),
+ '/requires reference cursor be positioned/')
+
+ # position the cursors now
+ ic0.set_key('val1')
+ ic0.search()
+ ic0again.next()
+ icB.next()
+
+ # Joining non matching index
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, icB, 'compare=ge'),
+ '/table for join cursor does not match/')
+
+ # The cursor must be positioned
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic1, 'compare=ge'),
+ '/requires reference cursor be positioned/')
+ ic1.next()
+
+ # This succeeds.
+ self.session.join(jc, ic1, 'compare=ge'),
+
+ # With bloom filters, a count is required
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic0, 'compare=ge,strategy=bloom'),
+ '/count must be nonzero/')
+
+ # This succeeds.
+ self.session.join(jc, ic0, 'compare=ge,strategy=bloom,count=1000'),
+
+ bloom_config = ',strategy=bloom,count=1000'
+ # Cannot use the same index cursor
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic0,
+ 'compare=le' + bloom_config),
+ '/cursor already used in a join/')
+
+ # When joining with the same index, need compatible compares
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic0again, 'compare=ge' + bloom_config),
+ '/join has overlapping ranges/')
+
+ # Another incompatible compare
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic0again, 'compare=gt' + bloom_config),
+ '/join has overlapping ranges/')
+
+ # Compare is compatible, but bloom args need to match
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic0again, 'compare=le'),
+ '/join has incompatible strategy/')
+
+ # Counts need to match for bloom filters
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.join(jc, ic0again, 'compare=le,strategy=bloom,'
+ 'count=100'), '/count.* does not match previous count/')
+
+ # This succeeds
+ self.session.join(jc, ic0again, 'compare=le,strategy=bloom,count=1000')
+
+ # Need to do initial next() before getting key/values
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: jc.get_keys(),
+ '/join cursor must be advanced with next/')
+
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: jc.get_values(),
+ '/join cursor must be advanced with next/')
+
+ # Operations on the joined cursor are frozen until the join is closed.
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: ic0.next(),
+ '/cursor is being used in a join/')
+
+ # Operations on the joined cursor are frozen until the join is closed.
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: ic0.prev(),
+ '/cursor is being used in a join/')
+
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: ic0.reset(),
+ '/cursor is being used in a join/')
+
+ # Only a small number of operations allowed on a join cursor
+ msg = "/Unsupported cursor/"
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: jc.search(), msg)
+
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: jc.prev(), msg)
+
+ self.assertEquals(jc.next(), 0)
+ self.assertEquals(jc.next(), wiredtiger.WT_NOTFOUND)
+
+ # Only after the join cursor is closed can we use the index cursor
+ # normally
+ jc.close()
+ self.assertEquals(ic0.next(), wiredtiger.WT_NOTFOUND)
+ self.assertEquals(ic0.prev(), 0)
+
+ # common code for making sure that cursors can be
+ # implicitly closed, no matter the order they are created
+ def cursor_close_common(self, joinfirst):
+ self.session.create('table:join08', 'key_format=r' +
+ ',value_format=SS,columns=(k,v0,v1)')
+ self.session.create('index:join08:index0','columns=(v0)')
+ self.session.create('index:join08:index1','columns=(v1)')
+ c = self.session.open_cursor('table:join08', None, None)
+ for i in range(0, self.nentries):
+ c.set_key(*self.gen_key(i))
+ c.set_value(*self.gen_values(i))
+ c.insert()
+ c.close()
+
+ if joinfirst:
+ jc = self.session.open_cursor('join:table:join08', None, None)
+ c0 = self.session.open_cursor('index:join08:index0', None, None)
+ c1 = self.session.open_cursor('index:join08:index1', None, None)
+ c0.next() # index cursors must be positioned
+ c1.next()
+ if not joinfirst:
+ jc = self.session.open_cursor('join:table:join08', None, None)
+ self.session.join(jc, c0, 'compare=ge')
+ self.session.join(jc, c1, 'compare=ge')
+ self.session.close()
+ self.session = None
+
+ def test_cursor_close1(self):
+ self.cursor_close_common(True)
+
+ def test_cursor_close2(self):
+ self.cursor_close_common(False)
+
+ # test statistics with a simple one index join cursor
+ def test_simple_stats(self):
+ self.session.create("table:join01b",
+ "key_format=i,value_format=i,columns=(k,v)")
+ self.session.create("index:join01b:index", "columns=(v)")
+
+ cursor = self.session.open_cursor("table:join01b", None, None)
+ cursor[1] = 11
+ cursor[2] = 12
+ cursor[3] = 13
+ cursor.close()
+
+ cursor = self.session.open_cursor("index:join01b:index", None, None)
+ cursor.set_key(11)
+ cursor.search()
+
+ jcursor = self.session.open_cursor("join:table:join01b", None, None)
+ self.session.join(jcursor, cursor, "compare=gt")
+
+ while jcursor.next() == 0:
+ [k] = jcursor.get_keys()
+ [v] = jcursor.get_values()
+
+ statcur = self.session.open_cursor("statistics:join", jcursor, None)
+ found = False
+ while statcur.next() == 0:
+ [desc, pvalue, value] = statcur.get_values()
+ #self.tty(str(desc) + "=" + str(pvalue))
+ found = True
+ self.assertEquals(found, True)
+
+ jcursor.close()
+ cursor.close()
+
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_jsondump01.py b/test/suite/test_jsondump01.py
index ddf871d9a24..10262edc777 100644
--- a/test/suite/test_jsondump01.py
+++ b/test/suite/test_jsondump01.py
@@ -77,16 +77,22 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
('string', dict(keyfmt='S'))
]
types = [
- ('file', dict(type='file:',
- name='file',
+ ('file', dict(uri='file:', config='', lsm=False,
populate=simple_populate,
populate_check=simple_populate_check_cursor)),
- ('table-simple', dict(type='table:',
- name='table-simple',
+ ('lsm', dict(uri='lsm:', config='', lsm=True,
populate=simple_populate,
populate_check=simple_populate_check_cursor)),
- ('table-complex', dict(type='table:',
- name='table-complex',
+ ('table-simple', dict(uri='table:', config='', lsm=False,
+ populate=simple_populate,
+ populate_check=simple_populate_check_cursor)),
+ ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True,
+ populate=simple_populate,
+ populate_check=simple_populate_check_cursor)),
+ ('table-complex', dict(uri='table:', config='', lsm=False,
+ populate=complex_populate,
+ populate_check=complex_populate_check_cursor)),
+ ('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True,
populate=complex_populate,
populate_check=complex_populate_check_cursor))
]
@@ -95,9 +101,14 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
# Dump using util, re-load using python's JSON, and do a content comparison.
def test_jsondump_util(self):
+ # LSM and column-store isn't a valid combination.
+ if self.lsm and self.keyfmt == 'r':
+ return
+
# Create the object.
- uri = self.type + self.name
- self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries)
+ uri = self.uri + self.name
+ self.populate(self, uri, self.config + ',key_format=' + self.keyfmt,
+ self.nentries)
# Dump the object.
self.runWt(['dump', '-j', uri], outfilename='jsondump.out')
@@ -125,9 +136,13 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
# Dump using util, re-load using python's JSON, and do a content comparison.
def test_jsonload_util(self):
+ # LSM and column-store isn't a valid combination.
+ if self.lsm and self.keyfmt == 'r':
+ return
+
# Create the object.
- uri = self.type + self.name
- uri2 = self.type + self.name2
+ uri = self.uri + self.name
+ uri2 = self.uri + self.name2
self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries)
# Dump the object.
diff --git a/test/suite/test_jsondump02.py b/test/suite/test_jsondump02.py
index c6cd464e453..50931f0f5e6 100644
--- a/test/suite/test_jsondump02.py
+++ b/test/suite/test_jsondump02.py
@@ -28,16 +28,19 @@
import os
import wiredtiger, wttest
+from suite_subprocess import suite_subprocess
# test_jsondump.py
# Test dump output from json cursors.
-class test_jsondump02(wttest.WiredTigerTestCase):
+class test_jsondump02(wttest.WiredTigerTestCase, suite_subprocess):
table_uri1 = 'table:jsondump02a.wt'
table_uri2 = 'table:jsondump02b.wt'
table_uri3 = 'table:jsondump02c.wt'
basename_uri4 = 'jsondump02d.wt'
table_uri4 = 'table:' + basename_uri4
+ table_uri5 = 'table:jsondump02e.wt'
+ table_uri6 = 'table:jsondump02f.wt'
def set_kv(self, uri, key, val):
cursor = self.session.open_cursor(uri, None, None)
@@ -80,15 +83,14 @@ class test_jsondump02(wttest.WiredTigerTestCase):
pos = 0
try:
for insert in inserts:
- #tty_pr('Insert: ' + str(insert))
cursor[insert[0]] = insert[1]
finally:
cursor.close()
- # Create JSON cursors and test them directly.
def test_json_cursor(self):
"""
- Create a table, add a key, get it back
+ Create JSON cursors and test them directly, also test
+ dump/load commands.
"""
extra_params = ',allocation_size=512,' +\
'internal_page_max=16384,leaf_page_max=131072'
@@ -112,7 +114,12 @@ class test_jsondump02(wttest.WiredTigerTestCase):
self.session.create(uri4index3, "columns=(i2,i4)")
self.set_kv(self.table_uri1, 'KEY000', 'string value')
- self.set_kv(self.table_uri1, 'KEY001', '\'\"({[]})\"\', etc. allowed')
+ self.set_kv(self.table_uri1, 'KEY001', '\'\"({[]})\"\'\\, etc. allowed')
+ # \u03c0 is pi in Unicode, converted by Python to UTF-8: 0xcf 0x80.
+ # Here's how UTF-8 might be used.
+ self.set_kv(self.table_uri1, 'KEY002', u'\u03c0'.encode('utf-8'))
+ # 0xf5-0xff are illegal in Unicode, but may occur legally in C strings.
+ self.set_kv(self.table_uri1, 'KEY003', '\xff\xfe')
self.set_kv2(self.table_uri2, 'KEY000', 123, 'str0')
self.set_kv2(self.table_uri2, 'KEY001', 234, 'str1')
self.set_kv(self.table_uri3, 1, '\x01\x02\x03')
@@ -122,7 +129,9 @@ class test_jsondump02(wttest.WiredTigerTestCase):
table1_json = (
('"key0" : "KEY000"', '"value0" : "string value"'),
('"key0" : "KEY001"', '"value0" : ' +
- '"\'\\\"({[]})\\\"\', etc. allowed"'))
+ '"\'\\\"({[]})\\\"\'\\\\, etc. allowed"'),
+ ('"key0" : "KEY002"', '"value0" : "\\u00cf\\u0080"'),
+ ('"key0" : "KEY003"', '"value0" : "\\u00ff\\u00fe"'))
self.check_json(self.table_uri1, table1_json)
self.session.truncate(self.table_uri1, None, None, None)
@@ -199,18 +208,23 @@ class test_jsondump02(wttest.WiredTigerTestCase):
# this one should work
self.load_json(self.table_uri2,
- (('"key0" : "KEY002"', '"value0" : 345,\n"value1" : "str2"'),))
+ (('"key0" : "KEY002"', '"value0" : 34,\n"value1" : "str2"'),))
# extraneous/missing space is okay
self.load_json(self.table_uri2,
((' "key0"\n:\t"KEY003" ',
- '"value0":456,"value1"\n\n\r\n:\t\n"str3"'),))
+ '"value0":45,"value1"\n\n\r\n:\t\n"str3"'),))
- self.check_json(self.table_uri3, (
- ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'),
- ('"key0" : 2',
- '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00ff\\u00fe"')))
- self.check_json(self.table_uri4, (
+ table2_json = (
+ ('"key0" : "KEY002"', '"value0" : 34,\n"value1" : "str2"'),
+ ('"key0" : "KEY003"', '"value0" : 45,\n"value1" : "str3"'))
+
+ table3_json = (
+ ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'),
+ ('"key0" : 2',
+ '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00ff\\u00fe"'))
+ self.check_json(self.table_uri3, table3_json)
+ table4_json = (
('"ikey" : 1,\n"Skey" : "key1"',
'"S1" : "val1",\n"i2" : 1,\n"S3" : "val1",\n"i4" : 1'),
('"ikey" : 2,\n"Skey" : "key2"',
@@ -218,7 +232,8 @@ class test_jsondump02(wttest.WiredTigerTestCase):
('"ikey" : 3,\n"Skey" : "key3"',
'"S1" : "val9",\n"i2" : 9,\n"S3" : "val27",\n"i4" : 27'),
('"ikey" : 4,\n"Skey" : "key4"',
- '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64')))
+ '"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64'))
+ self.check_json(self.table_uri4, table4_json)
# The dump config currently is not supported for the index type.
self.check_json(uri4index1, (
('"Skey" : "key1"',
@@ -248,5 +263,141 @@ class test_jsondump02(wttest.WiredTigerTestCase):
('"i2" : 16,\n"i4" : 64',
'"S1" : "val16",\n"i2" : 16,\n"S3" : "val64",\n"i4" : 64')))
+ # Dump all the tables into a single file, and also each
+ # table into its own file.
+ self.runWt(['dump', '-j',
+ self.table_uri1,
+ self.table_uri2,
+ self.table_uri3,
+ self.table_uri4],
+ outfilename='jsondump-all.out')
+ self.runWt(['dump', '-j', self.table_uri1], outfilename='jsondump1.out')
+ self.runWt(['dump', '-j', self.table_uri2], outfilename='jsondump2.out')
+ self.runWt(['dump', '-j', self.table_uri3], outfilename='jsondump3.out')
+ self.runWt(['dump', '-j', self.table_uri4], outfilename='jsondump4.out')
+ self.session.drop(self.table_uri1)
+ self.session.drop(self.table_uri2)
+ self.session.drop(self.table_uri3)
+ self.session.drop(self.table_uri4)
+ self.runWt(['load', '-jf', 'jsondump1.out'])
+ self.session.drop(self.table_uri1)
+ self.runWt(['load', '-jf', 'jsondump2.out'])
+ self.session.drop(self.table_uri2)
+ self.runWt(['load', '-jf', 'jsondump3.out'])
+ self.session.drop(self.table_uri3)
+ self.runWt(['load', '-jf', 'jsondump4.out'])
+ self.session.drop(self.table_uri4)
+
+ self.runWt(['load', '-jf', 'jsondump-all.out'])
+ self.check_json(self.table_uri1, table1_json)
+ self.check_json(self.table_uri2, table2_json)
+ self.check_json(self.table_uri3, table3_json)
+ self.check_json(self.table_uri4, table4_json)
+
+ # Generate two byte keys that cover some range of byte values.
+ # For simplicity, the keys are monotonically increasing.
+ # A null byte is disallowed in a string key, so we don't use it.
+ def generate_key(self, i, k):
+ k[0] = ((i & 0xffc0) >> 6) + 1
+ k[1] = (i & 0x3f) + 1
+
+ # Generate three byte values:
+ # i==0 : v:[0x00, 0x01, 0x02]
+ # i==1 : v:[0x01, 0x02, 0x03]
+ # etc.
+ # A null byte is disallowed in a string value, it is replaced by 'X'
+ def generate_value(self, i, v, isstring):
+ for j in range(0, 3):
+ val = (i + j) % 256
+ if isstring and val == 0:
+ val = 88 # 'X'
+ v[j] = val
+
+ def test_json_all_bytes(self):
+ """
+ Test the generated JSON for all byte values in byte array and
+ string formats.
+ """
+ self.session.create(self.table_uri5, 'key_format=u,value_format=u')
+ self.session.create(self.table_uri6, 'key_format=S,value_format=S')
+
+ c5 = self.session.open_cursor(self.table_uri5, None, None)
+ c6 = self.session.open_cursor(self.table_uri6, None, None)
+ k = bytearray(b'\x00\x00')
+ v = bytearray(b'\x00\x00\x00')
+ for i in range(0, 512):
+ self.generate_key(i, k)
+ self.generate_value(i, v, False)
+ c5[str(k)] = str(v)
+ self.generate_value(i, v, True) # no embedded nuls
+ c6[str(k)] = str(v)
+ c5.close()
+ c6.close()
+
+ # Build table5_json, we want it to look like this:
+ # ('"key0" : "\u0001\u0001"', '"value0" : "\u0000\u0001\u0002"'),
+ # ('"key0" : "\u0001\u0002"', '"value0" : "\u0001\u0002\u0003"'))
+ # ('"key0" : "\u0001\u0003"', '"value0" : "\u0003\u0003\u0004"'))
+ # ...
+ # table6_json is similar, except that printable values like '\u0041'
+ # would appear as 'A'. The string type cannot have embedded nulls,
+ # so '\u0000' in table6_json appears instead as an 'X'.
+ #
+ # Start by creating two tables of individual Unicode values.
+ # bin_unicode[] contains only the \u escape sequences.
+ # mix_unicode[] contains printable characters or \t \n etc. escapes
+ bin_unicode = []
+ mix_unicode = []
+ for i in range(0, 256):
+ u = "\\u00" + hex(256 + i)[3:] # e.g. "\u00ab")
+ bin_unicode.append(u)
+ mix_unicode.append(u)
+ for i in range(0x20, 0x7f):
+ mix_unicode[i] = chr(i)
+ mix_unicode[ord('"')] = '\\"'
+ mix_unicode[ord('\\')] = '\\\\'
+ mix_unicode[ord('\f')] = '\\f'
+ mix_unicode[ord('\n')] = '\\n'
+ mix_unicode[ord('\r')] = '\\r'
+ mix_unicode[ord('\t')] = '\\t'
+
+ table5_json = []
+ table6_json = []
+ for i in range(0, 512):
+ self.generate_key(i, k)
+ self.generate_value(i, v, False)
+ j = i if (i > 0 and i < 254) or (i > 256 and i < 510) else 88
+ table5_json.append(('"key0" : "' + bin_unicode[k[0]] +
+ bin_unicode[k[1]] + '"',
+ '"value0" : "' + bin_unicode[v[0]] +
+ bin_unicode[v[1]] +
+ bin_unicode[v[2]] + '"'))
+ self.generate_value(i, v, True)
+ table6_json.append(('"key0" : "' + mix_unicode[k[0]] +
+ mix_unicode[k[1]] + '"',
+ '"value0" : "' + mix_unicode[v[0]] +
+ mix_unicode[v[1]] +
+ mix_unicode[v[2]] + '"'))
+
+ self.check_json(self.table_uri5, table5_json)
+ self.check_json(self.table_uri6, table6_json)
+
+ self.session.truncate(self.table_uri5, None, None, None)
+ self.session.truncate(self.table_uri6, None, None, None)
+ self.load_json(self.table_uri5, table5_json)
+ self.load_json(self.table_uri6, table6_json)
+ self.check_json(self.table_uri5, table5_json)
+ self.check_json(self.table_uri6, table6_json)
+
+ self.runWt(['dump', '-j', self.table_uri5], outfilename='jsondump5.out')
+ self.runWt(['dump', '-j', self.table_uri6], outfilename='jsondump6.out')
+ self.session.drop(self.table_uri5)
+ self.session.drop(self.table_uri6)
+ self.runWt(['load', '-jf', 'jsondump5.out'])
+ self.runWt(['load', '-jf', 'jsondump6.out'])
+ self.session.drop(self.table_uri5)
+ self.session.drop(self.table_uri6)
+
+
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_reconfig02.py b/test/suite/test_reconfig02.py
index aee8ee4458b..85a9ceb2a34 100644
--- a/test/suite/test_reconfig02.py
+++ b/test/suite/test_reconfig02.py
@@ -74,9 +74,15 @@ class test_reconfig02(wttest.WiredTigerTestCase):
# Now turn on pre-allocation. Sleep to give the worker thread
# a chance to run and verify pre-allocated log files exist.
+ #
+ # Potentially loop a few times in case it is a very slow system.
self.conn.reconfigure("log=(prealloc=true)")
- time.sleep(2)
- prep_logs = fnmatch.filter(os.listdir('.'), "*Prep*")
+ for x in xrange(0, 20):
+ time.sleep(1)
+ prep_logs = fnmatch.filter(os.listdir('.'), "*Prep*")
+ if len(prep_logs) != 0:
+ break
+
self.assertNotEqual(0, len(prep_logs))
# Logging starts on, but archive is off. Verify it is off.
diff --git a/test/suite/test_stat05.py b/test/suite/test_stat05.py
index 6a93ec2c84d..9bcedd65089 100644
--- a/test/suite/test_stat05.py
+++ b/test/suite/test_stat05.py
@@ -37,9 +37,13 @@ from helper import complex_value_populate, key_populate, value_populate
# Statistics cursor using size only
class test_stat_cursor_config(wttest.WiredTigerTestCase):
pfx = 'test_stat_cursor_size'
+ conn_config = 'statistics=(fast)'
+
uri = [
('file', dict(uri='file:' + pfx, pop=simple_populate, cfg='')),
('table', dict(uri='table:' + pfx, pop=simple_populate, cfg='')),
+ ('inmem', dict(uri='table:' + pfx, pop=simple_populate, cfg='',
+ conn_config='in_memory,statistics=(fast)')),
('table-lsm', dict(uri='table:' + pfx, pop=simple_populate,
cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)')),
('complex', dict(uri='table:' + pfx, pop=complex_populate, cfg='')),
@@ -49,7 +53,6 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase):
]
scenarios = number_scenarios(uri)
- conn_config = 'statistics=(fast)'
def openAndWalkStatCursor(self):
c = self.session.open_cursor(
diff --git a/test/suite/test_txn04.py b/test/suite/test_txn04.py
index bbd6ce8c4e2..9d9d2db62c6 100644
--- a/test/suite/test_txn04.py
+++ b/test/suite/test_txn04.py
@@ -193,7 +193,7 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
self.hot_backup(self.uri, committed)
def test_ops(self):
- with self.expectedStdoutPattern('Recreating metadata'):
+ with self.expectedStdoutPattern('recreating metadata'):
self.ops()
if __name__ == '__main__':
diff --git a/test/suite/test_util02.py b/test/suite/test_util02.py
index 51e03d8d105..475e856052a 100644
--- a/test/suite/test_util02.py
+++ b/test/suite/test_util02.py
@@ -173,7 +173,7 @@ class test_load_commandline(wttest.WiredTigerTestCase, suite_subprocess):
complex_populate(self, self.uri, "key_format=S,value_format=S", 20)
self.runWt(["dump", self.uri], outfilename="dump.out")
loadargs = ["load", "-f", "dump.out"] + args
- self.runWt(loadargs, errfilename=errfile)
+ self.runWt(loadargs, errfilename=errfile, failure=fail)
if fail:
self.check_non_empty_file(errfile)
else:
@@ -181,23 +181,24 @@ class test_load_commandline(wttest.WiredTigerTestCase, suite_subprocess):
# Empty arguments should suceed.
def test_load_commandline_1(self):
- self.load_commandline([], 0)
+ self.load_commandline([], False)
# Arguments are in pairs.
def test_load_commandline_2(self):
- self.load_commandline(["table"], 1)
- self.load_commandline([self.uri, "block_allocation=first", self.uri], 1)
+ self.load_commandline(["table"], True)
+ self.load_commandline(
+ [self.uri, "block_allocation=first", self.uri], True)
# You can use short-hand URIs for a single object, but cannot match multiple
# objects.
def test_load_commandline_3(self):
- self.load_commandline(["table", "block_allocation=first"], 0)
- self.load_commandline(["colgroup", "block_allocation=first"], 1)
+ self.load_commandline(["table", "block_allocation=first"], False)
+ self.load_commandline(["colgroup", "block_allocation=first"], True)
# You can't reference non-existent objects.
def test_load_commandline_4(self):
- self.load_commandline([self.uri, "block_allocation=first"], 0)
- self.load_commandline(["table:bar", "block_allocation=first"], 1)
+ self.load_commandline([self.uri, "block_allocation=first"], False)
+ self.load_commandline(["table:bar", "block_allocation=first"], True)
# You can specify multipleconfiguration arguments for the same object.
def test_load_commandline_5(self):
@@ -205,19 +206,19 @@ class test_load_commandline(wttest.WiredTigerTestCase, suite_subprocess):
self.uri, "block_allocation=first",
self.uri, "block_allocation=best",
self.uri, "block_allocation=first",
- self.uri, "block_allocation=best"], 0)
+ self.uri, "block_allocation=best"], False)
# You can't modify a format.
def test_load_commandline_6(self):
- self.load_commandline(["table", "key_format=d"], 1)
- self.load_commandline(["table", "value_format=d"], 1)
+ self.load_commandline(["table", "key_format=d"], True)
+ self.load_commandline(["table", "value_format=d"], True)
# You can set the source or version, but it gets stripped; confirm the
# attempt succeeds, so we know they configuration values are stripped.
def test_load_commandline_7(self):
- self.load_commandline(["table", "filename=bar"], 0)
- self.load_commandline(["table", "source=bar"], 0)
- self.load_commandline(["table", "version=(100,200)"], 0)
+ self.load_commandline(["table", "filename=bar"], False)
+ self.load_commandline(["table", "source=bar"], False)
+ self.load_commandline(["table", "version=(100,200)"], False)
if __name__ == '__main__':
diff --git a/test/suite/test_util07.py b/test/suite/test_util07.py
index 2bbb40422bd..602ddbba5ff 100644
--- a/test/suite/test_util07.py
+++ b/test/suite/test_util07.py
@@ -71,7 +71,8 @@ class test_util07(wttest.WiredTigerTestCase, suite_subprocess):
self.session.create('table:' + self.tablename, self.session_params)
outfile = "readout.txt"
errfile = "readerr.txt"
- self.runWt(["read", 'table:' + self.tablename, 'NoMatch'], outfilename=outfile, errfilename=errfile)
+ self.runWt(["read", 'table:' + self.tablename, 'NoMatch'],
+ outfilename=outfile, errfilename=errfile, failure=True)
self.check_empty_file(outfile)
self.check_file_contains(errfile, 'NoMatch: not found\n')
@@ -83,10 +84,12 @@ class test_util07(wttest.WiredTigerTestCase, suite_subprocess):
self.populate(self.tablename)
outfile = "readout.txt"
errfile = "readerr.txt"
- self.runWt(["read", 'table:' + self.tablename, 'KEY49'], outfilename=outfile, errfilename=errfile)
+ self.runWt(["read", 'table:' + self.tablename, 'KEY49'],
+ outfilename=outfile, errfilename=errfile)
self.check_file_content(outfile, 'VAL49\n')
self.check_empty_file(errfile)
- self.runWt(["read", 'table:' + self.tablename, 'key49'], outfilename=outfile, errfilename=errfile)
+ self.runWt(["read", 'table:' + self.tablename, 'key49'],
+ outfilename=outfile, errfilename=errfile, failure=True)
self.check_empty_file(outfile)
self.check_file_contains(errfile, 'key49: not found\n')
diff --git a/test/suite/test_util12.py b/test/suite/test_util12.py
index e8226a3146c..f407c2ce7d6 100644
--- a/test/suite/test_util12.py
+++ b/test/suite/test_util12.py
@@ -57,7 +57,8 @@ class test_util12(wttest.WiredTigerTestCase, suite_subprocess):
self.session.create('table:' + self.tablename, self.session_params)
errfile = 'writeerr.txt'
- self.runWt(['write', 'table:' + self.tablename], errfilename=errfile)
+ self.runWt(['write', 'table:' + self.tablename],
+ errfilename=errfile, failure=True)
self.check_file_contains(errfile, 'usage:')
def test_write_overwrite(self):
@@ -82,7 +83,7 @@ class test_util12(wttest.WiredTigerTestCase, suite_subprocess):
self.session.create('table:' + self.tablename, self.session_params)
errfile = 'writeerr.txt'
self.runWt(['write', 'table:' + self.tablename,
- 'def', '456', 'abc'], errfilename=errfile)
+ 'def', '456', 'abc'], errfilename=errfile, failure=True)
self.check_file_contains(errfile, 'usage:')
diff --git a/test/suite/test_verify.py b/test/suite/test_verify.py
index 5ce926027ef..28a66415b9d 100644
--- a/test/suite/test_verify.py
+++ b/test/suite/test_verify.py
@@ -151,7 +151,8 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess):
with self.open_and_position(self.tablename, 75) as f:
for i in range(0, 4096):
f.write(struct.pack('B', 0))
- self.runWt(["verify", "table:" + self.tablename], errfilename="verifyerr.out")
+ self.runWt(["verify", "table:" + self.tablename],
+ errfilename="verifyerr.out", failure=True)
self.check_non_empty_file("verifyerr.out")
def test_verify_process_25pct_junk(self):
@@ -165,7 +166,8 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess):
with self.open_and_position(self.tablename, 25) as f:
for i in range(0, 100):
f.write('\x01\xff\x80')
- self.runWt(["verify", "table:" + self.tablename], errfilename="verifyerr.out")
+ self.runWt(["verify", "table:" + self.tablename],
+ errfilename="verifyerr.out", failure=True)
self.check_non_empty_file("verifyerr.out")
def test_verify_process_truncated(self):
@@ -178,7 +180,8 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess):
self.populate(self.tablename)
with self.open_and_position(self.tablename, 75) as f:
f.truncate(0)
- self.runWt(["verify", "table:" + self.tablename], errfilename="verifyerr.out")
+ self.runWt(["verify", "table:" + self.tablename],
+ errfilename="verifyerr.out", failure=True)
self.check_non_empty_file("verifyerr.out")
def test_verify_process_zero_length(self):
@@ -190,7 +193,8 @@ class test_verify(wttest.WiredTigerTestCase, suite_subprocess):
self.populate(self.tablename)
with self.open_and_position(self.tablename, 0) as f:
f.truncate(0)
- self.runWt(["verify", "table:" + self.tablename], errfilename="verifyerr.out")
+ self.runWt(["verify", "table:" + self.tablename],
+ errfilename="verifyerr.out", failure=True)
self.check_non_empty_file("verifyerr.out")
diff --git a/test/suite/wttest.py b/test/suite/wttest.py
index a1945b4325d..9e430fcdba7 100644
--- a/test/suite/wttest.py
+++ b/test/suite/wttest.py
@@ -259,20 +259,20 @@ class WiredTigerTestCase(unittest.TestCase):
self.conn.close()
self.conn = None
- def open_conn(self):
+ def open_conn(self, directory="."):
"""
Open the connection if already closed.
"""
if self.conn == None:
- self.conn = self.setUpConnectionOpen(".")
+ self.conn = self.setUpConnectionOpen(directory)
self.session = self.setUpSessionOpen(self.conn)
- def reopen_conn(self):
+ def reopen_conn(self, directory="."):
"""
Reopen the connection.
"""
self.close_conn()
- self.open_conn()
+ self.open_conn(directory)
def setUp(self):
if not hasattr(self.__class__, 'wt_ntests'):
@@ -551,4 +551,4 @@ def runsuite(suite, parallel):
def run(name='__main__'):
result = runsuite(unittest.TestLoader().loadTestsFromName(name), False)
- sys.exit(not result.wasSuccessful())
+ sys.exit(0 if result.wasSuccessful() else 1)
diff --git a/test/thread/Makefile.am b/test/thread/Makefile.am
index a58f019b513..58b715d4a80 100644
--- a/test/thread/Makefile.am
+++ b/test/thread/Makefile.am
@@ -1,12 +1,15 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(top_srcdir)/test/utility
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
-t_LDADD = $(top_builddir)/libwiredtiger.la
-t_SOURCES = thread.h file.c rw.c stats.c t.c
+t_SOURCES = file.c rw.c stats.c t.c
+
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
TESTS = smoke.sh
clean-local:
- rm -rf WiredTiger* wt.* *.core __stats
+ rm -rf WT_TEST __stats *.core
diff --git a/test/thread/rw.c b/test/thread/rw.c
index 913fa6e6c25..10f13b9eb04 100644
--- a/test/thread/rw.c
+++ b/test/thread/rw.c
@@ -59,16 +59,13 @@ rw_start(u_int readers, u_int writers)
total_nops = 0;
/* Create per-thread structures. */
- if ((run_info = calloc(
- (size_t)(readers + writers), sizeof(*run_info))) == NULL ||
- (tids = calloc((size_t)(readers + writers), sizeof(*tids))) == NULL)
- testutil_die(errno, "calloc");
+ run_info = dcalloc((size_t)(readers + writers), sizeof(*run_info));
+ tids = dcalloc((size_t)(readers + writers), sizeof(*tids));
/* Create the files and load the initial records. */
for (i = 0; i < writers; ++i) {
if (i == 0 || multiple_files) {
- if ((run_info[i].name = malloc(64)) == NULL)
- testutil_die(errno, "malloc");
+ run_info[i].name = dmalloc(64);
snprintf(run_info[i].name, 64, FNAME, i);
/* Vary by orders of magnitude */
@@ -88,8 +85,7 @@ rw_start(u_int readers, u_int writers)
for (i = 0; i < readers; ++i) {
offset = i + writers;
if (multiple_files) {
- if ((run_info[offset].name = malloc(64)) == NULL)
- testutil_die(errno, "malloc");
+ run_info[offset].name = dmalloc(64);
/* Have readers read from tables with writes. */
name_index = i % writers;
snprintf(
diff --git a/test/thread/t.c b/test/thread/t.c
index 22334076ee1..5b53532e8a6 100644
--- a/test/thread/t.c
+++ b/test/thread/t.c
@@ -42,7 +42,8 @@ static FILE *logfp; /* Log file */
static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
-static void onint(int);
+static void onint(int)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void shutdown(void);
static int usage(void);
static void wt_connect(char *);
diff --git a/test/thread/thread.h b/test/thread/thread.h
index 36cdbebd210..edcb919ec32 100644
--- a/test/thread/thread.h
+++ b/test/thread/thread.h
@@ -26,19 +26,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <sys/types.h>
-#include <sys/time.h>
+#include "test_util.h"
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "test_util.i"
#define FNAME "file:wt.%03d" /* File name */
#define FNAME_STAT "__stats" /* File name for statistics */
diff --git a/test/utility/Makefile.am b/test/utility/Makefile.am
new file mode 100644
index 00000000000..a2923eb41a8
--- /dev/null
+++ b/test/utility/Makefile.am
@@ -0,0 +1,4 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
+
+libtest_util_la_SOURCES = misc.c parse_opts.c thread.c
+noinst_LTLIBRARIES = libtest_util.la
diff --git a/test/utility/test_util.i b/test/utility/misc.c
index 43982d9e4a1..dfc655dec1a 100644
--- a/test/utility/test_util.i
+++ b/test/utility/misc.c
@@ -25,37 +25,13 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "wt_internal.h" /* For __wt_XXX */
-
-#ifdef _WIN32
-#include "windows_shim.h"
-#endif
-
-#ifdef _WIN32
- #define DIR_DELIM '\\'
- #define RM_COMMAND "rd /s /q "
-#else
- #define DIR_DELIM '/'
- #define RM_COMMAND "rm -rf "
-#endif
-
-#define DEFAULT_DIR "WT_TEST"
-#define MKDIR_COMMAND "mkdir "
-
-/* Allow tests to add their own death handling. */
-extern void (*custom_die)(void);
-
-static void testutil_die(int, const char *, ...)
-#if defined(__GNUC__)
-__attribute__((__noreturn__))
-#endif
-;
+#include "test_util.h"
/*
* die --
* Report an error and quit.
*/
-static void
+void
testutil_die(int e, const char *fmt, ...)
{
va_list ap;
@@ -64,9 +40,11 @@ testutil_die(int e, const char *fmt, ...)
if (custom_die != NULL)
(*custom_die)();
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
+ if (fmt != NULL) {
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
if (e != 0)
fprintf(stderr, ": %s", wiredtiger_strerror(e));
fprintf(stderr, "\n");
@@ -75,32 +53,11 @@ testutil_die(int e, const char *fmt, ...)
}
/*
- * testutil_check --
- * Complain and quit if a function call fails.
- */
-#define testutil_check(call) do { \
- int __r; \
- if ((__r = (call)) != 0) \
- testutil_die(__r, "%s/%d: %s", __func__, __LINE__, #call);\
-} while (0)
-
-/*
- * testutil_checkfmt --
- * Complain and quit if a function call fails, with additional arguments.
- */
-#define testutil_checkfmt(call, fmt, ...) do { \
- int __r; \
- if ((__r = (call)) != 0) \
- testutil_die(__r, "%s/%d: %s: " fmt, \
- __func__, __LINE__, #call, __VA_ARGS__); \
-} while (0)
-
-/*
* testutil_work_dir_from_path --
* Takes a buffer, its size and the intended work directory.
* Creates the full intended work directory in buffer.
*/
-static inline void
+void
testutil_work_dir_from_path(char *buffer, size_t len, const char *dir)
{
/* If no directory is provided, use the default. */
@@ -118,7 +75,7 @@ testutil_work_dir_from_path(char *buffer, size_t len, const char *dir)
* testutil_clean_work_dir --
* Remove the work directory.
*/
-static inline void
+void
testutil_clean_work_dir(char *dir)
{
size_t len;
@@ -141,7 +98,7 @@ testutil_clean_work_dir(char *dir)
* testutil_make_work_dir --
* Delete the existing work directory, then create a new one.
*/
-static inline void
+void
testutil_make_work_dir(char *dir)
{
size_t len;
@@ -161,3 +118,77 @@ testutil_make_work_dir(char *dir)
testutil_die(ret, "%s", buf);
free(buf);
}
+
+/*
+ * testutil_cleanup --
+ * Delete the existing work directory and free the options structure.
+ */
+void
+testutil_cleanup(TEST_OPTS *opts)
+{
+ if (opts->conn != NULL)
+ testutil_check(opts->conn->close(opts->conn, NULL));
+
+ if (!opts->preserve)
+ testutil_clean_work_dir(opts->home);
+
+ free(opts->conn_config);
+ free(opts->table_config);
+ free(opts->uri);
+ free(opts->home);
+}
+
+/*
+ * dcalloc --
+ * Call calloc, dying on failure.
+ */
+void *
+dcalloc(size_t number, size_t size)
+{
+ void *p;
+
+ if ((p = calloc(number, size)) != NULL)
+ return (p);
+ testutil_die(errno, "calloc: %" WT_SIZET_FMT "B", number * size);
+}
+
+/*
+ * dmalloc --
+ * Call malloc, dying on failure.
+ */
+void *
+dmalloc(size_t len)
+{
+ void *p;
+
+ if ((p = malloc(len)) != NULL)
+ return (p);
+ testutil_die(errno, "malloc: %" WT_SIZET_FMT "B", len);
+}
+
+/*
+ * drealloc --
+ * Call realloc, dying on failure.
+ */
+void *
+drealloc(void *p, size_t len)
+{
+ void *t;
+ if ((t = realloc(p, len)) != NULL)
+ return (t);
+ testutil_die(errno, "realloc: %" WT_SIZET_FMT "B", len);
+}
+
+/*
+ * dstrdup --
+ * Call strdup, dying on failure.
+ */
+void *
+dstrdup(const void *str)
+{
+ char *p;
+
+ if ((p = strdup(str)) != NULL)
+ return (p);
+ testutil_die(errno, "strdup");
+}
diff --git a/test/utility/parse_opts.c b/test/utility/parse_opts.c
new file mode 100644
index 00000000000..4054f318259
--- /dev/null
+++ b/test/utility/parse_opts.c
@@ -0,0 +1,132 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+extern int __wt_opterr; /* if error message should be printed */
+extern int __wt_optind; /* index into parent argv vector */
+extern int __wt_optopt; /* character checked for validity */
+extern int __wt_optreset; /* reset getopt */
+extern char *__wt_optarg; /* argument associated with option */
+
+/*
+ * testutil_parse_opts --
+ * Parse command line options for a test case.
+ */
+int
+testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts)
+{
+ int ch;
+ size_t len;
+
+ opts->preserve = false;
+ opts->running = true;
+ opts->verbose = false;
+
+ if ((opts->progname = strrchr(argv[0], '/')) == NULL)
+ opts->progname = argv[0];
+ else
+ ++opts->progname;
+
+ while ((ch = __wt_getopt(opts->progname,
+ argc, argv, "A:h:n:o:pR:T:t:vW:")) != EOF)
+ switch (ch) {
+ case 'A': /* Number of append threads */
+ opts->n_append_threads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'h': /* Home directory */
+ opts->home = __wt_optarg;
+ break;
+ case 'n': /* Number of records */
+ opts->nrecords = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'o': /* Number of operations */
+ opts->nops = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'p': /* Preserve directory contents */
+ opts->preserve = true;
+ break;
+ case 'R': /* Number of reader threads */
+ opts->n_read_threads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'T': /* Number of threads */
+ opts->nthreads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 't': /* Table type */
+ switch (__wt_optarg[0]) {
+ case 'C':
+ case 'c':
+ opts->table_type = TABLE_COL;
+ break;
+ case 'F':
+ case 'f':
+ opts->table_type = TABLE_FIX;
+ break;
+ case 'R':
+ case 'r':
+ opts->table_type = TABLE_ROW;
+ break;
+ }
+ break;
+ case 'v':
+ opts->verbose = true;
+ break;
+ case 'W': /* Number of writer threads */
+ opts->n_write_threads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case '?':
+ default:
+ (void)fprintf(stderr, "usage: %s "
+ "[-A append thread count] "
+ "[-h home] "
+ "[-n record count] "
+ "[-o op count] "
+ "[-p] "
+ "[-R read thread count] "
+ "[-T thread count] "
+ "[-t c|f|r table type] "
+ "[-v] "
+ "[-W write thread count] ",
+ opts->progname);
+ return (1);
+ }
+
+ /*
+ * Setup the home directory. It needs to be unique for every test
+ * or the auto make parallel tester gets upset.
+ */
+ len = strlen("WT_TEST.") + strlen(opts->progname) + 10;
+ opts->home = dmalloc(len);
+ snprintf(opts->home, len, "WT_TEST.%s", opts->progname);
+
+ /* Setup the default URI string */
+ len = strlen("table:") + strlen(opts->progname) + 10;
+ opts->uri = dmalloc(len);
+ snprintf(opts->uri, len, "table:%s", opts->progname);
+
+ return (0);
+}
diff --git a/test/utility/test_util.h b/test/utility/test_util.h
new file mode 100644
index 00000000000..66ff8de2d19
--- /dev/null
+++ b/test/utility/test_util.h
@@ -0,0 +1,125 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "wt_internal.h" /* For __wt_XXX */
+
+#ifdef _WIN32
+ #define DIR_DELIM '\\'
+ #define RM_COMMAND "rd /s /q "
+#else
+ #define DIR_DELIM '/'
+ #define RM_COMMAND "rm -rf "
+#endif
+
+#define DEFAULT_DIR "WT_TEST"
+#define MKDIR_COMMAND "mkdir "
+
+#ifdef _WIN32
+#include "windows_shim.h"
+#endif
+
+/* Generic option parsing structure shared by all test cases. */
+typedef struct {
+ char *home;
+ char *progname;
+ enum { TABLE_COL=1, /* Fixed-length column store */
+ TABLE_FIX=2, /* Variable-length column store */
+ TABLE_ROW=3 /* Row-store */
+ } table_type;
+ bool preserve; /* Don't remove files on exit */
+ bool verbose; /* Run in verbose mode */
+ uint64_t nrecords; /* Number of records */
+ uint64_t nops; /* Number of operations */
+ uint64_t nthreads; /* Number of threads */
+ uint64_t n_append_threads; /* Number of append threads */
+ uint64_t n_read_threads; /* Number of read threads */
+ uint64_t n_write_threads; /* Number of write threads */
+
+ /*
+ * Fields commonly shared within a test program. The test cleanup
+ * function will attempt to automatically free and close non-null
+ * resources.
+ */
+ WT_CONNECTION *conn;
+ char *conn_config;
+ WT_SESSION *session;
+ bool running;
+ char *table_config;
+ char *uri;
+ volatile uint64_t next_threadid;
+ uint64_t max_inserted_id;
+} TEST_OPTS;
+
+/*
+ * testutil_assert --
+ * Complain and quit if something isn't true.
+ */
+#define testutil_assert(a) do { \
+ if (!(a)) \
+ testutil_die(0, "%s/%d: %s", __func__, __LINE__, #a); \
+} while (0)
+
+/*
+ * testutil_check --
+ * Complain and quit if a function call fails.
+ */
+#define testutil_check(call) do { \
+ int __r; \
+ if ((__r = (call)) != 0) \
+ testutil_die( \
+ __r, "%s/%d: %s", __func__, __LINE__, #call); \
+} while (0)
+
+/*
+ * testutil_checkfmt --
+ * Complain and quit if a function call fails, with additional arguments.
+ */
+#define testutil_checkfmt(call, fmt, ...) do { \
+ int __r; \
+ if ((__r = (call)) != 0) \
+ testutil_die(__r, "%s/%d: %s: " fmt, \
+ __func__, __LINE__, #call, __VA_ARGS__); \
+} while (0)
+
+/* Allow tests to add their own death handling. */
+extern void (*custom_die)(void);
+
+void testutil_die(int, const char *, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+
+void *dcalloc(size_t, size_t);
+void *dmalloc(size_t);
+void *drealloc(void *, size_t);
+void *dstrdup(const void *);
+void testutil_clean_work_dir(char *);
+void testutil_cleanup(TEST_OPTS *);
+void testutil_make_work_dir(char *);
+int testutil_parse_opts(int, char * const *, TEST_OPTS *);
+void testutil_work_dir_from_path(char *, size_t, const char *);
+void *thread_append(void *);
+void *thread_insert_append(void *);
+void *thread_prev(void *);
diff --git a/test/utility/thread.c b/test/utility/thread.c
new file mode 100644
index 00000000000..38465b2f02b
--- /dev/null
+++ b/test/utility/thread.c
@@ -0,0 +1,141 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "test_util.h"
+
+/*
+ * A thread dedicated to appending records into a table. Works with fixed
+ * length column stores and variable length column stores.
+ * One thread (the first thread created by an application) checks for a
+ * terminating condition after each insert.
+ */
+void *
+thread_append(void *arg)
+{
+ TEST_OPTS *opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t id, recno;
+ char buf[64];
+
+ opts = (TEST_OPTS *)arg;
+ conn = opts->conn;
+
+ id = __wt_atomic_fetch_addv64(&opts->next_threadid, 1);
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(
+ session->open_cursor(session, opts->uri, NULL, "append", &cursor));
+
+ buf[0] = '\2';
+ for (recno = 1; opts->running; ++recno) {
+ if (opts->table_type == TABLE_FIX)
+ cursor->set_value(cursor, buf[0]);
+ else {
+ snprintf(buf, sizeof(buf),
+ "%" PRIu64 " VALUE ------", recno);
+ cursor->set_value(cursor, buf);
+ }
+ testutil_check(cursor->insert(cursor));
+ if (id == 0) {
+ testutil_check(
+ cursor->get_key(cursor, &opts->max_inserted_id));
+ if (opts->max_inserted_id >= opts->nrecords)
+ opts->running = false;
+ }
+ }
+
+ return (NULL);
+}
+
+/*
+ * Append into a row store table.
+ */
+void *
+thread_insert_append(void *arg)
+{
+ TEST_OPTS *opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t i;
+ char kbuf[64];
+
+ opts = (TEST_OPTS *)arg;
+ conn = opts->conn;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(
+ session, opts->uri, NULL, NULL, &cursor));
+
+ for (i = 0; i < opts->nrecords; ++i) {
+ snprintf(kbuf, sizeof(kbuf), "%010d KEY------", (int)i);
+ cursor->set_key(cursor, kbuf);
+ cursor->set_value(cursor, "========== VALUE =======");
+ testutil_check(cursor->insert(cursor));
+ if (i % 100000 == 0) {
+ printf("insert: %" PRIu64 "\r", i);
+ fflush(stdout);
+ }
+ }
+ printf("\n");
+
+ opts->running = false;
+
+ return (NULL);
+}
+
+/*
+ * Repeatedly walk backwards through the records in a table.
+ */
+void *
+thread_prev(void *arg)
+{
+ TEST_OPTS *opts;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int ret;
+
+ opts = (TEST_OPTS *)arg;
+ ret = 0;
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(
+ session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+ while (opts->running) {
+ while (opts->running && (ret = cursor->prev(cursor)) == 0)
+ ;
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ testutil_check(ret);
+ }
+
+ testutil_check(session->close(session, NULL));
+ return (NULL);
+}
diff --git a/tools/wtstats/stat_data.py b/tools/wtstats/stat_data.py
index 7c42ab4d926..a79cf1faf5e 100644
--- a/tools/wtstats/stat_data.py
+++ b/tools/wtstats/stat_data.py
@@ -6,6 +6,7 @@ no_scale_per_second_list = [
'cache: bytes currently in the cache',
'cache: eviction currently operating in aggressive mode',
'cache: files with active eviction walks',
+ 'cache: hazard pointer maximum array length',
'cache: maximum bytes configured',
'cache: maximum page size at eviction',
'cache: pages currently held in the cache',
@@ -27,6 +28,9 @@ no_scale_per_second_list = [
'reconciliation: split objects currently awaiting free',
'session: open cursor count',
'session: open session count',
+ 'thread-state: active filesystem fsync calls',
+ 'thread-state: active filesystem read calls',
+ 'thread-state: active filesystem write calls',
'transaction: transaction checkpoint currently running',
'transaction: transaction checkpoint generation',
'transaction: transaction checkpoint max time (msecs)',
@@ -60,7 +64,6 @@ no_scale_per_second_list = [
'btree: overflow pages',
'btree: row-store internal pages',
'btree: row-store leaf pages',
- 'cache: bytes currently in the cache',
'cache: overflow values cached in memory',
'LSM: bloom filters in the LSM tree',
'LSM: chunks in the LSM tree',
@@ -95,6 +98,9 @@ no_clear_list = [
'reconciliation: split objects currently awaiting free',
'session: open cursor count',
'session: open session count',
+ 'thread-state: active filesystem fsync calls',
+ 'thread-state: active filesystem read calls',
+ 'thread-state: active filesystem write calls',
'transaction: transaction checkpoint currently running',
'transaction: transaction checkpoint generation',
'transaction: transaction checkpoint max time (msecs)',
@@ -105,7 +111,6 @@ no_clear_list = [
'transaction: transaction range of IDs currently pinned by a checkpoint',
'transaction: transaction range of IDs currently pinned by named snapshots',
'btree: btree checkpoint generation',
- 'cache: bytes currently in the cache',
'session: open cursor count',
]
prefix_list = [
@@ -122,6 +127,7 @@ prefix_list = [
'thread-yield',
'async',
'btree',
+ 'thread-state',
'compression',
]
-groups = {'cursor': ['cursor', 'session'], 'lsm': ['LSM', 'transaction'], 'system': ['connection', 'data-handle', 'session'], 'evict': ['cache', 'connection', 'block-manager'], 'memory': ['cache', 'connection', 'reconciliation']} \ No newline at end of file
+groups = {'cursor': ['cursor', 'session'], 'lsm': ['LSM', 'transaction'], 'system': ['connection', 'data-handle', 'session', 'thread-state'], 'evict': ['block-manager', 'cache', 'connection', 'thread-state'], 'memory': ['cache', 'connection', 'reconciliation']} \ No newline at end of file