summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2016-04-01 15:50:16 +1100
committerAlex Gorrod <alexander.gorrod@mongodb.com>2016-04-01 15:50:16 +1100
commita2dec6e48b269db78f94260a5f6f174404affa4a (patch)
tree4ba83c2204f623e36fb6d506904fb782f99ffff5
parent385b126a7d517afd7eb711b04d71bb3b5a03c917 (diff)
parent357bf28ba3fc1d5902b429a9e634015187f648ed (diff)
downloadmongo-a2dec6e48b269db78f94260a5f6f174404affa4a.tar.gz
Merge pull request #2603 from wiredtiger/wt-2330
WT-2330: in-memory configurations should not create on-disk collection files
-rw-r--r--bench/wtperf/misc.c2
-rw-r--r--bench/wtperf/wtperf.c4
-rw-r--r--build_win/filelist.win31
-rw-r--r--dist/api_data.py1
-rw-r--r--dist/filelist30
-rw-r--r--dist/flags.py3
-rw-r--r--dist/log.py18
-rw-r--r--dist/s_define.list1
-rw-r--r--dist/s_funcs.list2
-rwxr-xr-xdist/s_prototypes42
-rwxr-xr-xdist/s_stat2
-rw-r--r--dist/s_string.ok28
-rwxr-xr-xdist/s_style11
-rwxr-xr-xdist/s_win57
-rw-r--r--examples/c/ex_event_handler.c2
-rw-r--r--src/block/block_addr.c2
-rw-r--r--src/block/block_ckpt.c6
-rw-r--r--src/block/block_compact.c23
-rw-r--r--src/block/block_ext.c25
-rw-r--r--src/block/block_map.c23
-rw-r--r--src/block/block_mgr.c18
-rw-r--r--src/block/block_open.c66
-rw-r--r--src/block/block_read.c79
-rw-r--r--src/block/block_slvg.c14
-rw-r--r--src/block/block_vrfy.c6
-rw-r--r--src/block/block_write.c83
-rw-r--r--src/btree/bt_debug.c17
-rw-r--r--src/btree/bt_discard.c7
-rw-r--r--src/btree/bt_handle.c4
-rw-r--r--src/btree/bt_huffman.c39
-rw-r--r--src/btree/bt_sync.c2
-rw-r--r--src/btree/bt_vrfy.c4
-rw-r--r--src/cache/cache_las.c4
-rw-r--r--src/config/config_def.c30
-rw-r--r--src/conn/conn_api.c133
-rw-r--r--src/conn/conn_handle.c14
-rw-r--r--src/conn/conn_log.c7
-rw-r--r--src/conn/conn_open.c22
-rw-r--r--src/conn/conn_stat.c37
-rw-r--r--src/conn/conn_sweep.c5
-rw-r--r--src/cursor/cur_backup.c10
-rw-r--r--src/evict/evict_lru.c38
-rw-r--r--src/include/block.h20
-rw-r--r--src/include/connection.h22
-rw-r--r--src/include/cursor.h2
-rw-r--r--src/include/extern.h172
-rw-r--r--src/include/flags.h39
-rw-r--r--src/include/misc.i249
-rw-r--r--src/include/msvc.h11
-rw-r--r--src/include/os.h75
-rw-r--r--src/include/wiredtiger.in12
-rw-r--r--src/include/wt_internal.h8
-rw-r--r--src/log/log.c47
-rw-r--r--src/log/log_auto.c117
-rw-r--r--src/log/log_slot.c6
-rw-r--r--src/meta/meta_turtle.c30
-rw-r--r--src/os_common/filename.c (renamed from src/support/filename.c)82
-rw-r--r--src/os_common/os_abort.c (renamed from src/os_posix/os_abort.c)0
-rw-r--r--src/os_common/os_alloc.c (renamed from src/os_posix/os_alloc.c)0
-rw-r--r--src/os_common/os_fhandle.c321
-rw-r--r--src/os_common/os_fs_inmemory.c466
-rw-r--r--src/os_common/os_fs_stdio.c239
-rw-r--r--src/os_common/os_getline.c (renamed from src/os_posix/os_getline.c)10
-rw-r--r--src/os_common/os_getopt.c (renamed from src/os_posix/os_getopt.c)0
-rw-r--r--src/os_common/os_init.c41
-rw-r--r--src/os_common/os_strtouq.c (renamed from src/os_posix/os_strtouq.c)0
-rw-r--r--src/os_posix/os_dir.c31
-rw-r--r--src/os_posix/os_exist.c38
-rw-r--r--src/os_posix/os_fallocate.c43
-rw-r--r--src/os_posix/os_filesize.c62
-rw-r--r--src/os_posix/os_flock.c38
-rw-r--r--src/os_posix/os_fs.c734
-rw-r--r--src/os_posix/os_fsync.c171
-rw-r--r--src/os_posix/os_ftruncate.c26
-rw-r--r--src/os_posix/os_map.c156
-rw-r--r--src/os_posix/os_open.c253
-rw-r--r--src/os_posix/os_remove.c69
-rw-r--r--src/os_posix/os_rename.c40
-rw-r--r--src/os_posix/os_rw.c90
-rw-r--r--src/os_posix/os_setvbuf.c34
-rw-r--r--src/os_posix/os_stdio.c126
-rw-r--r--src/os_win/os_dir.c93
-rw-r--r--src/os_win/os_dlopen.c26
-rw-r--r--src/os_win/os_errno.c35
-rw-r--r--src/os_win/os_exist.c33
-rw-r--r--src/os_win/os_fallocate.c45
-rw-r--r--src/os_win/os_filesize.c64
-rw-r--r--src/os_win/os_flock.c47
-rw-r--r--src/os_win/os_fs.c705
-rw-r--r--src/os_win/os_fsync.c71
-rw-r--r--src/os_win/os_ftruncate.c37
-rw-r--r--src/os_win/os_getenv.c2
-rw-r--r--src/os_win/os_map.c94
-rw-r--r--src/os_win/os_mtx_cond.c2
-rw-r--r--src/os_win/os_once.c2
-rw-r--r--src/os_win/os_open.c266
-rw-r--r--src/os_win/os_remove.c71
-rw-r--r--src/os_win/os_rename.c53
-rw-r--r--src/os_win/os_rw.c102
-rw-r--r--src/os_win/os_setvbuf.c38
-rw-r--r--src/os_win/os_sleep.c6
-rw-r--r--src/os_win/os_thread.c13
-rw-r--r--src/reconcile/rec_write.c14
-rw-r--r--src/session/session_compact.c3
-rw-r--r--src/support/err.c33
-rw-r--r--src/support/mtx_rw.c (renamed from src/os_posix/os_mtx_rw.c)0
-rw-r--r--src/txn/txn_ckpt.c2
-rw-r--r--src/txn/txn_log.c66
-rw-r--r--src/utilities/util_backup.c99
-rw-r--r--src/utilities/util_load_json.c8
-rw-r--r--src/utilities/util_printlog.c2
-rw-r--r--test/recovery/random-abort.c2
-rw-r--r--test/recovery/truncated-log.c2
-rw-r--r--test/utility/test_util.i47
114 files changed, 4124 insertions, 2791 deletions
diff --git a/bench/wtperf/misc.c b/bench/wtperf/misc.c
index 98920f6ab64..2821216f240 100644
--- a/bench/wtperf/misc.c
+++ b/bench/wtperf/misc.c
@@ -54,7 +54,7 @@ setup_log_file(CONFIG *cfg)
return (ret);
/* Use line buffering for the log file. */
- (void)setvbuf(cfg->logf, NULL, _IOLBF, 1024);
+ __wt_stream_set_line_buffer(cfg->logf);
return (0);
}
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index 2c0dee71096..9d57bdcf6b0 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -1233,7 +1233,7 @@ monitor(void *arg)
goto err;
}
/* Set line buffering for monitor file. */
- (void)setvbuf(fp, NULL, _IOLBF, 1024);
+ __wt_stream_set_line_buffer(fp);
fprintf(fp,
"#time,"
"totalsec,"
@@ -2313,7 +2313,7 @@ main(int argc, char *argv[])
cfg->table_name);
/* Make stdout line buffered, so verbose output appears quickly. */
- (void)setvbuf(stdout, NULL, _IOLBF, 1024);
+ __wt_stream_set_line_buffer(stdout);
/* Concatenate non-default configuration strings. */
if (cfg->verbose > 1 || user_cconfig != NULL ||
diff --git a/build_win/filelist.win b/build_win/filelist.win
index b6a9caf4a74..c370303d5f8 100644
--- a/build_win/filelist.win
+++ b/build_win/filelist.win
@@ -101,33 +101,28 @@ src/meta/meta_ext.c
src/meta/meta_table.c
src/meta/meta_track.c
src/meta/meta_turtle.c
-src/os_posix/os_abort.c
-src/os_posix/os_alloc.c
-src/os_posix/os_getline.c
-src/os_posix/os_getopt.c
-src/os_posix/os_mtx_rw.c
-src/os_posix/os_stdio.c
-src/os_posix/os_strtouq.c
+src/os_common/filename.c
+src/os_common/os_abort.c
+src/os_common/os_alloc.c
+src/os_common/os_fhandle.c
+src/os_common/os_fs_inmemory.c
+src/os_common/os_fs_stdio.c
+src/os_common/os_getline.c
+src/os_common/os_getopt.c
+src/os_common/os_init.c
+src/os_common/os_strtouq.c
src/os_win/os_dir.c
src/os_win/os_dlopen.c
src/os_win/os_errno.c
-src/os_win/os_exist.c
-src/os_win/os_fallocate.c
-src/os_win/os_filesize.c
-src/os_win/os_flock.c
-src/os_win/os_fsync.c
-src/os_win/os_ftruncate.c
+src/os_win/os_fs.c
src/os_win/os_getenv.c
src/os_win/os_map.c
src/os_win/os_mtx_cond.c
src/os_win/os_once.c
-src/os_win/os_open.c
src/os_win/os_pagesize.c
src/os_win/os_path.c
src/os_win/os_priv.c
-src/os_win/os_remove.c
-src/os_win/os_rename.c
-src/os_win/os_rw.c
+src/os_win/os_setvbuf.c
src/os_win/os_sleep.c
src/os_win/os_snprintf.c
src/os_win/os_thread.c
@@ -158,13 +153,13 @@ src/support/cksum.c
src/support/cond_auto.c
src/support/crypto.c
src/support/err.c
-src/support/filename.c
src/support/global.c
src/support/hash_city.c
src/support/hash_fnv.c
src/support/hazard.c
src/support/hex.c
src/support/huffman.c
+src/support/mtx_rw.c
src/support/pow.c
src/support/rand.c
src/support/scratch.c
diff --git a/dist/api_data.py b/dist/api_data.py
index 02aee1e8825..5ca294a5d60 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -569,6 +569,7 @@ connection_runtime_config = [
'evict',
'evictserver',
'fileops',
+ 'handleops',
'log',
'lsm',
'lsm_manager',
diff --git a/dist/filelist b/dist/filelist
index 350e0c50087..1d7ffa76922 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -101,34 +101,30 @@ src/meta/meta_ext.c
src/meta/meta_table.c
src/meta/meta_track.c
src/meta/meta_turtle.c
-src/os_posix/os_abort.c
-src/os_posix/os_alloc.c
+src/os_common/filename.c
+src/os_common/os_abort.c
+src/os_common/os_alloc.c
+src/os_common/os_fhandle.c
+src/os_common/os_fs_inmemory.c
+src/os_common/os_fs_stdio.c
+src/os_common/os_getline.c
+src/os_common/os_getopt.c
+src/os_common/os_init.c
+src/os_common/os_strtouq.c
src/os_posix/os_dir.c
src/os_posix/os_dlopen.c
src/os_posix/os_errno.c
-src/os_posix/os_exist.c
src/os_posix/os_fallocate.c
-src/os_posix/os_filesize.c
-src/os_posix/os_flock.c
-src/os_posix/os_fsync.c
-src/os_posix/os_ftruncate.c
+src/os_posix/os_fs.c
src/os_posix/os_getenv.c
-src/os_posix/os_getline.c
-src/os_posix/os_getopt.c
src/os_posix/os_map.c
src/os_posix/os_mtx_cond.c
-src/os_posix/os_mtx_rw.c
src/os_posix/os_once.c
-src/os_posix/os_open.c
src/os_posix/os_pagesize.c
src/os_posix/os_path.c
src/os_posix/os_priv.c
-src/os_posix/os_remove.c
-src/os_posix/os_rename.c
-src/os_posix/os_rw.c
+src/os_posix/os_setvbuf.c
src/os_posix/os_sleep.c
-src/os_posix/os_stdio.c
-src/os_posix/os_strtouq.c
src/os_posix/os_thread.c
src/os_posix/os_time.c
src/os_posix/os_yield.c
@@ -156,13 +152,13 @@ src/support/cksum.c
src/support/cond_auto.c
src/support/crypto.c
src/support/err.c
-src/support/filename.c
src/support/global.c
src/support/hash_city.c
src/support/hash_fnv.c
src/support/hazard.c
src/support/hex.c
src/support/huffman.c
+src/support/mtx_rw.c
src/support/pow.c
src/support/power8/crc32.S
src/support/power8/crc32_wrapper.c
diff --git a/dist/flags.py b/dist/flags.py
index f500e3b1ae1..8f7827ad160 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -13,7 +13,7 @@ flags = {
'FILE_TYPE_DATA',
'FILE_TYPE_DIRECTORY',
'FILE_TYPE_LOG',
- 'FILE_TYPE_TURTLE',
+ 'FILE_TYPE_REGULAR',
],
'log_scan' : [
'LOGSCAN_FIRST',
@@ -65,6 +65,7 @@ flags = {
'VERB_EVICT',
'VERB_EVICTSERVER',
'VERB_FILEOPS',
+ 'VERB_HANDLEOPS',
'VERB_LOG',
'VERB_LSM',
'VERB_LSM_MANAGER',
diff --git a/dist/log.py b/dist/log.py
index 6d35bf2e718..9201b20054b 100644
--- a/dist/log.py
+++ b/dist/log.py
@@ -89,7 +89,7 @@ def printf_line(f, optype, i, ishex):
ifbegin = 'if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {' + nl_indent
if postcomma == '':
precomma = ',\\n'
- body = '%s%s(__wt_fprintf(out,' % (
+ body = '%s%s(__wt_fprintf(session, WT_STDOUT(session),' % (
printf_setup(f, ishex, nl_indent),
'WT_ERR' if has_escape(optype.fields) else 'WT_RET') + \
'%s "%s \\"%s\\": \\"%s\\"%s",%s));' % (
@@ -292,16 +292,16 @@ __wt_logop_%(name)s_unpack(
last_field = optype.fields[-1]
tfile.write('''
int
-__wt_logop_%(name)s_print(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- FILE *out, uint32_t flags)
+__wt_logop_%(name)s_print(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
%(arg_ret)s\t%(arg_decls)s
\t%(arg_unused)s%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack(
\t session, pp, end%(arg_addrs)s));
-\tWT_RET(__wt_fprintf(out, " \\"optype\\": \\"%(name)s\\",\\n"));
+\tWT_RET(__wt_fprintf(session, WT_STDOUT(session),
+\t " \\"optype\\": \\"%(name)s\\",\\n"));
\t%(print_args)s
%(arg_fini)s
}
@@ -324,9 +324,8 @@ __wt_logop_%(name)s_print(
# Emit the printlog entry point
tfile.write('''
int
-__wt_txn_op_printlog(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- FILE *out, uint32_t flags)
+__wt_txn_op_printlog(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
\tuint32_t optype, opsize;
@@ -342,8 +341,7 @@ for optype in log_data.optypes:
tfile.write('''
\tcase %(macro)s:
-\t\tWT_RET(%(print_func)s(session, pp, end, out,
-\t\t flags));
+\t\tWT_RET(%(print_func)s(session, pp, end, flags));
\t\tbreak;
''' % {
'macro' : optype.macro_name(),
diff --git a/dist/s_define.list b/dist/s_define.list
index e3f0dc7f181..c9777c86675 100644
--- a/dist/s_define.list
+++ b/dist/s_define.list
@@ -16,6 +16,7 @@ WIN32_LEAN_AND_MEAN
WT_ATOMIC_CAS
WT_ATOMIC_FUNC
WT_BLOCK_DESC_SIZE
+WT_BLOCK_HEADER_SIZE
WT_CACHE_LINE_ALIGNMENT
WT_COMPILER_TYPE_ALIGN
WT_CONN_CHECK_PANIC
diff --git a/dist/s_funcs.list b/dist/s_funcs.list
index 8d32eecdfb7..c0d9f2e688f 100644
--- a/dist/s_funcs.list
+++ b/dist/s_funcs.list
@@ -22,7 +22,6 @@ __wt_debug_set_verbose
__wt_debug_tree
__wt_debug_tree_all
__wt_debug_tree_shape
-__wt_fsync
__wt_lex_compare
__wt_lex_compare_skip
__wt_log_scan
@@ -31,6 +30,7 @@ __wt_nlpo2_round
__wt_print_huffman_code
__wt_stat_join_aggregate
__wt_stat_join_clear_all
+__wt_stream_set_no_buffer
__wt_try_readlock
wiredtiger_config_parser_open
wiredtiger_config_validate
diff --git a/dist/s_prototypes b/dist/s_prototypes
index 603c0f5633d..4ceb69f4c77 100755
--- a/dist/s_prototypes
+++ b/dist/s_prototypes
@@ -4,13 +4,10 @@
t=__wt.$$
trap 'rm -f $t; exit 0' 0 1 2 3 13 15
-(
-cat <<EOF
-/* DO NOT EDIT: automatically built by dist/s_prototypes. */
-
-EOF
-
-for i in `sed -e '/^[a-z]/!d' filelist`; do
+# proto --
+# extract public functions.
+proto()
+{
sed -n \
-e '/^__wt_[a-z]/!{' \
-e h \
@@ -32,9 +29,34 @@ for i in `sed -e '/^[a-z]/!d' filelist`; do
-e 's/ */ /g' \
-e 's/^/extern /' \
-e 's/WT_GCC_FUNC_/WT_GCC_FUNC_DECL_/' \
- -e 's/$/;/p' \
- < ../$i
-done) > $t
+ -e 's/$/;/p' < $1
+}
+
+(
+cat <<EOF
+/* DO NOT EDIT: automatically built by dist/s_prototypes. */
+
+EOF
+
+# First, get prototypes for everything but the OS directories.
+# Second, get prototypes for the OS directories.
+# The reason for this is because the OS directories repeat names (that is, there
+# are common names in both os_posix and os_win), and so we sort the prototypes
+# to avoid repeating them in the output (which some compilers won't tolerate).
+# We'd sort everything and discard duplicates, but we can't sort when function
+# signatures are on multiple lines, that is, #ifdef'd function signatures. Since
+# the OS directories are the only places with repeated names, and they have no
+# #ifdef'd signatures, we do it this way.
+l=`sed -e '/^[a-z]/!d' -e '/src\/os/d' filelist`
+for i in $l; do
+ proto ../$i
+done
+l=`echo ../src\/os*/*.c`
+
+for i in $l; do
+ proto $i
+done | tee xxx | env LC_ALL=C sort -u
+) > $t
f=../src/include/extern.h
cmp $t $f > /dev/null 2>&1 ||
diff --git a/dist/s_stat b/dist/s_stat
index 44c22ab56bb..3938b8e65eb 100755
--- a/dist/s_stat
+++ b/dist/s_stat
@@ -11,7 +11,7 @@ l=`sed \
-e 's,#.*,,' \
-e '/^$/d' \
-e 's,^,../,' filelist`
-l="$l `echo ../src/include/*.i`"
+l="$l `echo ../src/include/*.i ../src/include/os.h`"
(
# Get the list of statistics fields.
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 79f662aa851..eed034abb47 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -67,7 +67,9 @@ CloseHandle
Comparator
Config
Coverity
+CreateFileA
CreateFileMapping
+CreateFileMappingA
Crummey
CustomersPhone
DECL
@@ -76,11 +78,13 @@ DESC
DHANDLE
DNE
DOI
+DONTNEED
DUPLICATEV
DbCursor
DbEnv
Decrement
Decrypt
+DeleteFileA
EAGAIN
EBUSY
EEXIST
@@ -187,6 +191,7 @@ LoadLoad
LockFile
Lookaside
Lookup
+MADV
MALLOC
MEM
MEMALIGN
@@ -249,6 +254,7 @@ Prepend
Qsort
RCS
RDNOLOCK
+RDONLY
RECNO
REF's
REFs
@@ -261,6 +267,7 @@ RNG
RPC
RUNDIR
Radu
+ReadFile
Readonly
Rebalance
RedHat
@@ -318,6 +325,7 @@ UTF
UltraSparc
Unbuffered
UnixLib
+UnlockFile
Unmap
UnmapViewOfFile
Unmarshall
@@ -333,9 +341,11 @@ Vixie
Vo
VxWorks
WAL
+WILLNEED
WIREDTIGER
WRLSN
WRNOLOCK
+WaitForSingleObject
WakeAllConditionVariable
Wconditional
WeakHashLen
@@ -353,6 +363,7 @@ WiredTigerPreplog
WiredTigerTmplog
WiredTigerTxn
WithSeeds
+WriteFile
Wuninitialized
Wunused
XP
@@ -455,6 +466,7 @@ ckpt
ckptfrag
ckptlist
cksum
+cloexec
clsm
cmd
cmp
@@ -596,12 +608,17 @@ fallocate
fblocks
fclose
fcntl
+fd
+fdatasync
+fdopen
ffc
fflush
ffs
+fgetc
fgetln
fh
filefrag
+filehandle
fileid
filename
filenames
@@ -632,7 +649,9 @@ func
gcc
gdb
ge
+getc
getenv
+getlasterror
getline
getone
getones
@@ -648,6 +667,7 @@ gostring
gostruct
goutf
gt
+handleops
hashval
havesize
hdr
@@ -664,6 +684,7 @@ icount
idx
ifdef's
ikey
+im
impl
incase
incr
@@ -680,6 +701,7 @@ initsize
initval
inline
inmem
+inmemory
insertK
insertV
inserters
@@ -687,6 +709,7 @@ instantiation
intl
intnum
intpack
+intptr
intrin
inuse
io
@@ -839,6 +862,7 @@ optimizations
optype
ori
os
+osfhandle
ovfl
ownp
packv
@@ -864,8 +888,10 @@ postsize
powerpc
pragmas
pre
+pread
prealloc
preload
+preloaded
prepend
prepended
prepending
@@ -884,6 +910,7 @@ pushms
putK
putV
pv
+pwrite
py
qdown
qrrSS
@@ -942,6 +969,7 @@ sessionp
setkv
setstr
setv
+setvbuf
sfence
sii
sizeof
diff --git a/dist/s_style b/dist/s_style
index 78fb7a6eb03..a163eb83b25 100755
--- a/dist/s_style
+++ b/dist/s_style
@@ -60,11 +60,12 @@ else
echo "$f: use TAILQ for all lists"
fi
- if ! expr "$f" : 'src/os_posix/.*' > /dev/null &&
+ if ! expr "$f" : 'src/os_common/.*' > /dev/null &&
+ ! expr "$f" : 'src/os_posix/.*' > /dev/null &&
! expr "$f" : 'src/os_win/.*' > /dev/null &&
! expr "$f" : 'src/include/extern.h' > /dev/null &&
! expr "$f" : 'src/include/os.h' > /dev/null &&
- grep '__wt_errno' $f > $t; then
+ grep '__wt_errno' $f > $t; then
echo "$f: upper-level code should not call __wt_errno"
cat $t
fi
@@ -83,6 +84,12 @@ else
cat $t
}
+ if ! expr "$f" : 'src/.*/os_setvbuf.c' > /dev/null &&
+ egrep -w 'setvbuf' $f > $t; then
+ echo "$f: setvbuf call, use WiredTiger library replacements"
+ cat $t
+ fi
+
# Alignment directive before "struct".
egrep 'WT_COMPILER_TYPE_ALIGN.*struct' $f > $t
test -s $t && {
diff --git a/dist/s_win b/dist/s_win
index 0b7d5184037..562e89f94c6 100755
--- a/dist/s_win
+++ b/dist/s_win
@@ -43,40 +43,33 @@ win_filelist()
{
f='../build_win/filelist.win'
- # Process the files for which there's a Windows-specific version, then
- # append Windows-only files and discard POSIX-only files.
- (sed \
- -e 's;os_posix/os_dir.c;os_win/os_dir.c;' \
- -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \
- -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \
- -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \
- -e 's;os_posix/os_errno.c;os_win/os_errno.c;' \
- -e 's;os_posix/os_exist.c;os_win/os_exist.c;' \
- -e 's;os_posix/os_fallocate.c;os_win/os_fallocate.c;' \
- -e 's;os_posix/os_filesize.c;os_win/os_filesize.c;' \
- -e 's;os_posix/os_flock.c;os_win/os_flock.c;' \
- -e 's;os_posix/os_fsync.c;os_win/os_fsync.c;' \
- -e 's;os_posix/os_ftruncate.c;os_win/os_ftruncate.c;' \
- -e 's;os_posix/os_getenv.c;os_win/os_getenv.c;' \
- -e 's;os_posix/os_map.c;os_win/os_map.c;' \
- -e 's;os_posix/os_mtx_cond.c;os_win/os_mtx_cond.c;' \
- -e 's;os_posix/os_once.c;os_win/os_once.c;' \
- -e 's;os_posix/os_open.c;os_win/os_open.c;' \
- -e 's;os_posix/os_pagesize.c;os_win/os_pagesize.c;' \
- -e 's;os_posix/os_path.c;os_win/os_path.c;' \
- -e 's;os_posix/os_priv.c;os_win/os_priv.c;' \
- -e 's;os_posix/os_remove.c;os_win/os_remove.c;' \
- -e 's;os_posix/os_rename.c;os_win/os_rename.c;' \
- -e 's;os_posix/os_rw.c;os_win/os_rw.c;' \
- -e 's;os_posix/os_sleep.c;os_win/os_sleep.c;' \
- -e 's;os_posix/os_thread.c;os_win/os_thread.c;' \
- -e 's;os_posix/os_time.c;os_win/os_time.c;' \
- -e 's;os_posix/os_yield.c;os_win/os_yield.c;' \
+ # Discard POSIX-only and PPC-only files, add in Windows-only files.
+ (
+ sed \
+ -e '/\/os_posix\//d' \
-e '/src\/support\/power8\/crc32.S/d' \
-e '/src\/support\/power8\/crc32_wrapper.c/d'
- echo 'src/os_win/os_snprintf.c'
- echo 'src/os_win/os_vsnprintf.c') < filelist | sort > $t
- cmp $t $f > /dev/null 2>&1 ||
+
+ echo 'src/os_win/os_dir.c'
+ echo 'src/os_win/os_dlopen.c'
+ echo 'src/os_win/os_errno.c'
+ echo 'src/os_win/os_fs.c'
+ echo 'src/os_win/os_getenv.c'
+ echo 'src/os_win/os_map.c'
+ echo 'src/os_win/os_mtx_cond.c'
+ echo 'src/os_win/os_once.c'
+ echo 'src/os_win/os_pagesize.c'
+ echo 'src/os_win/os_path.c'
+ echo 'src/os_win/os_priv.c'
+ echo 'src/os_win/os_setvbuf.c'
+ echo 'src/os_win/os_sleep.c'
+ echo 'src/os_win/os_snprintf.c'
+ echo 'src/os_win/os_thread.c'
+ echo 'src/os_win/os_time.c'
+ echo 'src/os_win/os_vsnprintf.c'
+ echo 'src/os_win/os_yield.c') < filelist | sort > $t
+
+ cmp $t $f > /dev/null 2>&1 ||
(echo "Building $f" && rm -f $f && cp $t $f)
}
diff --git a/examples/c/ex_event_handler.c b/examples/c/ex_event_handler.c
index ba6807cd56d..d1e08edb04d 100644
--- a/examples/c/ex_event_handler.c
+++ b/examples/c/ex_event_handler.c
@@ -90,7 +90,7 @@ handle_wiredtiger_message(
/*! [Function event_handler] */
static int
-config_event_handler()
+config_event_handler(void)
{
WT_CONNECTION *conn;
WT_SESSION *session;
diff --git a/src/block/block_addr.c b/src/block/block_addr.c
index b1f2fd9454a..d8cc1d627cf 100644
--- a/src/block/block_addr.c
+++ b/src/block/block_addr.c
@@ -112,7 +112,7 @@ __wt_block_addr_invalid(WT_SESSION_IMPL *session,
#endif
/* Check if the address is past the end of the file. */
- return (offset + size > block->fh->size ? EINVAL : 0);
+ return (offset + size > block->size ? EINVAL : 0);
}
/*
diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c
index 812bf99acfb..a0aadb43b93 100644
--- a/src/block/block_ckpt.c
+++ b/src/block/block_ckpt.c
@@ -144,7 +144,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_ERR(__wt_verbose(session, WT_VERB_CHECKPOINT,
"truncate file to %" PRIuMAX, (uintmax_t)ci->file_size));
WT_ERR_BUSY_OK(
- __wt_block_truncate(session, block->fh, ci->file_size));
+ __wt_block_truncate(session, block, ci->file_size));
}
if (0) {
@@ -192,7 +192,7 @@ __wt_block_checkpoint_unload(
* an open checkpoint on the file), that's OK.
*/
WT_TRET_BUSY_OK(
- __wt_block_truncate(session, block->fh, block->fh->size));
+ __wt_block_truncate(session, block, block->size));
__wt_spin_lock(session, &block->live_lock);
__wt_block_ckpt_destroy(session, &block->live);
@@ -738,7 +738,7 @@ __ckpt_update(WT_SESSION_IMPL *session,
* if there ever is, this will need to be fixed.
*/
if (is_live)
- ci->file_size = block->fh->size;
+ ci->file_size = block->size;
/*
* Copy the checkpoint information into the checkpoint array's address
diff --git a/src/block/block_compact.c b/src/block/block_compact.c
index 8c9be4f029c..24ca6632311 100644
--- a/src/block/block_compact.c
+++ b/src/block/block_compact.c
@@ -59,20 +59,17 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
WT_DECL_RET;
WT_EXT *ext;
WT_EXTLIST *el;
- WT_FH *fh;
wt_off_t avail_eighty, avail_ninety, eighty, ninety;
*skipp = true; /* Return a default skip. */
- fh = block->fh;
-
/*
* We do compaction by copying blocks from the end of the file to the
* beginning of the file, and we need some metrics to decide if it's
* worth doing. Ignore small files, and files where we are unlikely
* to recover 10% of the file.
*/
- if (fh->size <= WT_MEGABYTE)
+ if (block->size <= WT_MEGABYTE)
return (0);
/*
@@ -93,8 +90,8 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
/* Sum the available bytes in the initial 80% and 90% of the file. */
avail_eighty = avail_ninety = 0;
- ninety = fh->size - fh->size / 10;
- eighty = fh->size - ((fh->size / 10) * 2);
+ ninety = block->size - block->size / 10;
+ eighty = block->size - ((block->size / 10) * 2);
el = &block->live.avail;
WT_EXT_FOREACH(ext, el->off)
@@ -117,11 +114,11 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
* less useful.
*/
if (avail_eighty > WT_MEGABYTE &&
- avail_eighty >= ((fh->size / 10) * 2)) {
+ avail_eighty >= ((block->size / 10) * 2)) {
*skipp = false;
block->compact_pct_tenths = 2;
} else if (avail_ninety > WT_MEGABYTE &&
- avail_ninety >= fh->size / 10) {
+ avail_ninety >= block->size / 10) {
*skipp = false;
block->compact_pct_tenths = 1;
}
@@ -140,7 +137,8 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
"%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first "
"90%% of the file to perform compaction, compaction %s",
block->name,
- (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10,
+ (uintmax_t)(block->size / 10) / WT_MEGABYTE,
+ (uintmax_t)block->size / 10,
*skipp ? "skipped" : "proceeding"));
err: __wt_spin_unlock(session, &block->live_lock);
@@ -159,15 +157,12 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session,
WT_DECL_RET;
WT_EXT *ext;
WT_EXTLIST *el;
- WT_FH *fh;
wt_off_t limit, offset;
uint32_t size, cksum;
WT_UNUSED(addr_size);
*skipp = true; /* Return a default skip. */
- fh = block->fh;
-
/* Crack the cookie. */
WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));
@@ -179,7 +174,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session,
* there's an obvious race if the file is sufficiently busy.
*/
__wt_spin_lock(session, &block->live_lock);
- limit = fh->size - ((fh->size / 10) * block->compact_pct_tenths);
+ limit = block->size - ((block->size / 10) * block->compact_pct_tenths);
if (offset > limit) {
el = &block->live.avail;
WT_EXT_FOREACH(ext, el->off) {
@@ -217,7 +212,7 @@ __block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, bool start)
u_int i;
el = &block->live.avail;
- size = block->fh->size;
+ size = block->size;
WT_RET(__wt_verbose(session, WT_VERB_COMPACT,
"============ %s",
diff --git a/src/block/block_ext.c b/src/block/block_ext.c
index ab5d5604087..caafcc77c48 100644
--- a/src/block/block_ext.c
+++ b/src/block/block_ext.c
@@ -462,17 +462,13 @@ static inline int
__block_extend(
WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size)
{
- WT_FH *fh;
-
- fh = block->fh;
-
/*
* Callers of this function are expected to have already acquired any
* locks required to extend the file.
*
* We should never be allocating from an empty file.
*/
- if (fh->size < block->allocsize)
+ if (block->size < block->allocsize)
WT_RET_MSG(session, EINVAL,
"file has no description information");
@@ -482,12 +478,12 @@ __block_extend(
* 8B bits (we currently check an wt_off_t is 8B in verify_build.h). I
* don't think we're likely to see anything bigger for awhile.
*/
- if (fh->size > (wt_off_t)INT64_MAX - size)
+ if (block->size > (wt_off_t)INT64_MAX - size)
WT_RET_MSG(session, WT_ERROR,
"block allocation failed, file cannot grow further");
- *offp = fh->size;
- fh->size += size;
+ *offp = block->size;
+ block->size += size;
WT_STAT_FAST_DATA_INCR(session, block_extension);
WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
@@ -1343,19 +1339,16 @@ __wt_block_extlist_truncate(
WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el)
{
WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
- WT_FH *fh;
wt_off_t orig, size;
- fh = block->fh;
-
/*
* Check if the last available extent is at the end of the file, and if
* so, truncate the file and discard the extent.
*/
if ((ext = __block_off_srch_last(el->off, astack)) == NULL)
return (0);
- WT_ASSERT(session, ext->off + ext->size <= fh->size);
- if (ext->off + ext->size < fh->size)
+ WT_ASSERT(session, ext->off + ext->size <= block->size);
+ if (ext->off + ext->size < block->size)
return (0);
/*
@@ -1363,10 +1356,10 @@ __wt_block_extlist_truncate(
* the cached file size, and that can't happen until after the extent
* list removal succeeds.)
*/
- orig = fh->size;
+ orig = block->size;
size = ext->off;
WT_RET(__block_off_remove(session, block, el, size, NULL));
- fh->size = size;
+ block->size = size;
/*
* Truncate the file. The truncate might fail if there's a file mapping
@@ -1376,7 +1369,7 @@ __wt_block_extlist_truncate(
WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
"truncate file from %" PRIdMAX " to %" PRIdMAX,
(intmax_t)orig, (intmax_t)size));
- WT_RET_BUSY_OK(__wt_block_truncate(session, block->fh, size));
+ WT_RET_BUSY_OK(__wt_block_truncate(session, block, size));
return (0);
}
diff --git a/src/block/block_map.c b/src/block/block_map.c
index b60623a37d8..b16fe7f8423 100644
--- a/src/block/block_map.c
+++ b/src/block/block_map.c
@@ -17,6 +17,8 @@ __wt_block_map(
WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp,
void **mappingcookie)
{
+ WT_DECL_RET;
+
*(void **)mapp = NULL;
*maplenp = 0;
@@ -42,14 +44,6 @@ __wt_block_map(
return (0);
/*
- * Turn off mapping when direct I/O is configured for the file, the
- * Linux open(2) documentation says applications should avoid mixing
- * mmap(2) of files with direct I/O to the same files.
- */
- if (block->fh->direct_io)
- return (0);
-
- /*
* Turn off mapping if the application configured a cache size maximum,
* we can't control how much of the cache size we use in that case.
*/
@@ -58,12 +52,16 @@ __wt_block_map(
/*
* Map the file into memory.
- * Ignore errors, we'll read the file through the cache if map fails.
+ * Ignore not-supported errors, we'll read the file through the cache
+ * if map fails.
*/
- (void)__wt_mmap(session, block->fh, mapp, maplenp, mappingcookie);
+ ret = block->fh->fh_map(
+ session, block->fh, mapp, maplenp, mappingcookie);
+ if (ret == ENOTSUP)
+ ret = 0;
#endif
- return (0);
+ return (ret);
}
/*
@@ -76,5 +74,6 @@ __wt_block_unmap(
void **mappingcookie)
{
/* Unmap the file from memory. */
- return (__wt_munmap(session, block->fh, map, maplen, mappingcookie));
+ return (block->fh->fh_map_unmap(
+ session, block->fh, map, maplen, mappingcookie));
}
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 0bb75d129e1..06150a0f062 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -411,11 +411,21 @@ __bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats)
* Flush a file to disk.
*/
static int
-__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool async)
+__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool block)
{
- return (async ?
- __wt_fsync_async(session, bm->block->fh) :
- __wt_fsync(session, bm->block->fh));
+ WT_DECL_RET;
+
+ if (!block && !bm->block->nowait_sync_available)
+ return (0);
+
+ if ((ret = __wt_fsync(session, bm->block->fh, block)) == 0)
+ return (0);
+
+ /* Ignore ENOTSUP, but don't try again. */
+ if (ret != ENOTSUP)
+ return (ret);
+ bm->block->nowait_sync_available = false;
+ return (0);
}
/*
diff --git a/src/block/block_open.c b/src/block/block_open.c
index adb745c99e7..f4da5ca7c05 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -44,8 +44,8 @@ __wt_block_manager_create(
* in our space. Move any existing files out of the way and complain.
*/
for (;;) {
- if ((ret = __wt_open(session,
- filename, true, true, WT_FILE_TYPE_DATA, &fh)) == 0)
+ if ((ret = __wt_open(session, filename, WT_FILE_TYPE_DATA,
+ WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)) == 0)
break;
WT_ERR_TEST(ret != EEXIST, ret);
@@ -67,13 +67,13 @@ __wt_block_manager_create(
}
/* Write out the file's meta-data. */
- ret = __wt_desc_init(session, fh, allocsize);
+ ret = __wt_desc_write(session, fh, allocsize);
/*
* Ensure the truncated file has made it to disk, then the upper-level
* is never surprised.
*/
- WT_TRET(__wt_fsync(session, fh));
+ WT_TRET(__wt_fsync(session, fh, true));
/* Close the file handle. */
WT_TRET(__wt_close(session, &fh));
@@ -157,6 +157,8 @@ __wt_block_open(WT_SESSION_IMPL *session,
WT_DECL_RET;
uint64_t bucket, hash;
+ WT_UNUSED(readonly);
+
WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename));
conn = S2C(session);
@@ -194,41 +196,23 @@ __wt_block_open(WT_SESSION_IMPL *session,
/* Configuration: optional OS buffer cache maximum size. */
WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval));
block->os_cache_max = (size_t)cval.val;
-#ifdef HAVE_POSIX_FADVISE
- if (conn->direct_io && block->os_cache_max)
- WT_ERR_MSG(session, EINVAL,
- "os_cache_max not supported in combination with direct_io");
-#else
- if (block->os_cache_max)
- WT_ERR_MSG(session, EINVAL,
- "os_cache_max not supported if posix_fadvise not "
- "available");
-#endif
/* Configuration: optional immediate write scheduling flag. */
WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval));
block->os_cache_dirty_max = (size_t)cval.val;
-#ifdef HAVE_SYNC_FILE_RANGE
- if (conn->direct_io && block->os_cache_dirty_max)
- WT_ERR_MSG(session, EINVAL,
- "os_cache_dirty_max not supported in combination with "
- "direct_io");
-#else
- if (block->os_cache_dirty_max) {
- /*
- * Ignore any setting if it is not supported.
- */
- block->os_cache_dirty_max = 0;
- WT_ERR(__wt_verbose(session, WT_VERB_BLOCK,
- "os_cache_dirty_max ignored when sync_file_range not "
- "available"));
- }
-#endif
+
+ /* Set the file extension information. */
+ block->extend_len = conn->data_extend_len;
+
+ /* Set the asynchronous flush, preload availability. */
+ block->nowait_sync_available = true;
+ block->preload_available = true;
/* Open the underlying file handle. */
- WT_ERR(__wt_open(session, filename, false, false,
- readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA,
- &block->fh));
+ WT_ERR(__wt_open(session, filename, WT_FILE_TYPE_DATA, 0, &block->fh));
+
+ /* Set the file's size. */
+ WT_ERR(__wt_filesize(session, block->fh, &block->size));
/* Initialize the live checkpoint's lock. */
WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager"));
@@ -282,16 +266,20 @@ __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block)
}
/*
- * __wt_desc_init --
+ * __wt_desc_write --
* Write a file's initial descriptor structure.
*/
int
-__wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize)
+__wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize)
{
WT_BLOCK_DESC *desc;
WT_DECL_ITEM(buf);
WT_DECL_RET;
+ /* If in-memory, we don't read or write the descriptor structure. */
+ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ return (0);
+
/* Use a scratch buffer to get correct alignment for direct I/O. */
WT_RET(__wt_scr_alloc(session, allocsize, &buf));
memset(buf->mem, 0, allocsize);
@@ -329,6 +317,10 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block)
WT_DECL_RET;
uint32_t cksum_calculate, cksum_tmp;
+ /* If in-memory, we don't read or write the descriptor structure. */
+ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ return (0);
+
/* Use a scratch buffer to get correct alignment for direct I/O. */
WT_RET(__wt_scr_alloc(session, block->allocsize, &buf));
@@ -406,7 +398,7 @@ __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats)
WT_STAT_WRITE(stats, block_minor, WT_BLOCK_MINOR_VERSION);
WT_STAT_WRITE(
stats, block_reuse_bytes, (int64_t)block->live.avail.bytes);
- WT_STAT_WRITE(stats, block_size, block->fh->size);
+ WT_STAT_WRITE(stats, block_size, block->size);
}
/*
@@ -418,7 +410,7 @@ __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep)
{
WT_UNUSED(session);
- *sizep = bm->block->fh == NULL ? 0 : bm->block->fh->size;
+ *sizep = bm->block->size;
return (0);
}
diff --git a/src/block/block_read.c b/src/block/block_read.c
index 6e74d7a7793..6f0c41c1b5c 100644
--- a/src/block/block_read.c
+++ b/src/block/block_read.c
@@ -26,33 +26,41 @@ __wt_bm_preload(
WT_UNUSED(addr_size);
block = bm->block;
- /*
- * Turn off pre-load when direct I/O is configured for the file,
- * the kernel cache isn't interesting.
- */
- if (block->fh->direct_io)
- return (0);
-
WT_STAT_FAST_CONN_INCR(session, block_preload);
- /* Crack the cookie. */
- WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));
-
- /* Check for a mapped block. */
- mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
- if (mapped)
- return (__wt_mmap_preload(
- session, (uint8_t *)bm->map + offset, size));
+ /* Preload the block. */
+ if (block->preload_available) {
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(
+ block, addr, &offset, &size, &cksum));
+
+ mapped = bm->map != NULL &&
+ offset + size <= (wt_off_t)bm->maplen;
+ if (mapped)
+ ret = block->fh->fh_map_preload(session,
+ block->fh, (uint8_t *)bm->map + offset, size);
+ else
+ ret = block->fh->fh_advise(session,
+ block->fh, (wt_off_t)offset,
+ (wt_off_t)size, POSIX_FADV_WILLNEED);
+ if (ret == 0)
+ return (0);
-#ifdef HAVE_POSIX_FADVISE
- if (posix_fadvise(block->fh->fd,
- (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED) == 0)
- return (0);
-#endif
+ /* Ignore ENOTSUP, but don't try again. */
+ if (ret != ENOTSUP)
+ return (ret);
+ block->preload_available = false;
+ }
- WT_RET(__wt_scr_alloc(session, size, &tmp));
- ret = __wt_block_read_off(session, block, tmp, offset, size, cksum);
+ /*
+ * If preload isn't supported, do it the slow way; don't call the
+ * underlying read routine directly, we don't know for certain if
+ * this is a mapped range.
+ */
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ ret = __wt_bm_read(bm, session, tmp, addr, addr_size);
__wt_scr_free(session, &tmp);
+
return (ret);
}
@@ -65,6 +73,7 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
{
WT_BLOCK *block;
+ WT_DECL_RET;
wt_off_t offset;
uint32_t cksum, size;
bool mapped;
@@ -82,7 +91,15 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
if (mapped) {
buf->data = (uint8_t *)bm->map + offset;
buf->size = size;
- WT_RET(__wt_mmap_preload(session, buf->data, buf->size));
+ if (block->preload_available) {
+ ret = block->fh->fh_map_preload(
+ session, block->fh, buf->data, buf->size);
+
+ /* Ignore ENOTSUP, but don't try again. */
+ if (ret != ENOTSUP)
+ return (ret);
+ block->preload_available = false;
+ }
WT_STAT_FAST_CONN_INCR(session, block_map_read);
WT_STAT_FAST_CONN_INCRV(session, block_byte_map_read, size);
@@ -100,21 +117,9 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
/* Read the block. */
WT_RET(__wt_block_read_off(session, block, buf, offset, size, cksum));
-#ifdef HAVE_POSIX_FADVISE
/* Optionally discard blocks from the system's buffer cache. */
- if (block->os_cache_max != 0 &&
- (block->os_cache += size) > block->os_cache_max) {
- WT_DECL_RET;
-
- block->os_cache = 0;
- /* Ignore EINVAL - some file systems don't support the flag. */
- if ((ret = posix_fadvise(block->fh->fd,
- (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)) != 0 &&
- ret != EINVAL)
- WT_RET_MSG(
- session, ret, "%s: posix_fadvise", block->name);
- }
-#endif
+ WT_RET(__wt_block_discard(session, block, (size_t)size));
+
return (0);
}
diff --git a/src/block/block_slvg.c b/src/block/block_slvg.c
index a8cccd53023..6be3fa73f70 100644
--- a/src/block/block_slvg.c
+++ b/src/block/block_slvg.c
@@ -21,7 +21,7 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
allocsize = block->allocsize;
/* Reset the description information in the first block. */
- WT_RET(__wt_desc_init(session, block->fh, allocsize));
+ WT_RET(__wt_desc_write(session, block->fh, allocsize));
/*
* Salvage creates a new checkpoint when it's finished, set up for
@@ -33,10 +33,10 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
* Truncate the file to an allocation-size multiple of blocks (bytes
* trailing the last block must be garbage, by definition).
*/
- if (block->fh->size > allocsize) {
- len = (block->fh->size / allocsize) * allocsize;
- if (len != block->fh->size)
- WT_RET(__wt_block_truncate(session, block->fh, len));
+ if (block->size > allocsize) {
+ len = (block->size / allocsize) * allocsize;
+ if (len != block->size)
+ WT_RET(__wt_block_truncate(session, block, len));
} else
len = allocsize;
block->live.file_size = len;
@@ -83,7 +83,7 @@ __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size)
if (size > WT_BTREE_PAGE_SIZE_MAX) /* > maximum page size */
return (true);
/* past end-of-file */
- if (offset + (wt_off_t)size > block->fh->size)
+ if (offset + (wt_off_t)size > block->size)
return (true);
return (false);
}
@@ -111,7 +111,7 @@ __wt_block_salvage_next(WT_SESSION_IMPL *session,
WT_ERR(__wt_scr_alloc(session, allocsize, &tmp));
/* Read through the file, looking for pages. */
- for (max = fh->size;;) {
+ for (max = block->size;;) {
offset = block->slvg_off;
if (offset >= max) { /* Check eof. */
*eofp = 1;
diff --git a/src/block/block_vrfy.c b/src/block/block_vrfy.c
index 35c7a2c218c..6570184ca10 100644
--- a/src/block/block_vrfy.c
+++ b/src/block/block_vrfy.c
@@ -57,7 +57,7 @@ __wt_block_verify_start(WT_SESSION_IMPL *session,
* a file immediately after creation or the checkpoint doesn't reflect
* any of the data pages).
*/
- size = block->fh->size;
+ size = block->size;
if (size <= block->allocsize)
return (0);
@@ -156,7 +156,7 @@ __verify_last_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt)
ci = &_ci;
WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name));
WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
- WT_ERR(__wt_block_truncate(session, block->fh, ci->file_size));
+ WT_ERR_BUSY_OK(__wt_block_truncate(session, block, ci->file_size));
err: __wt_block_ckpt_destroy(session, ci);
return (ret);
@@ -368,7 +368,7 @@ __verify_filefrag_add(WT_SESSION_IMPL *session, WT_BLOCK *block,
(uintmax_t)offset, (uintmax_t)(offset + size), (uintmax_t)size));
/* Check each chunk against the total file size. */
- if (offset + size > block->fh->size)
+ if (offset + size > block->size)
WT_RET_MSG(session, WT_ERROR,
"fragment %" PRIuMAX "-%" PRIuMAX " references "
"non-existent file blocks",
diff --git a/src/block/block_write.c b/src/block/block_write.c
index e05a430832e..134272b52f9 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -9,28 +9,47 @@
#include "wt_internal.h"
/*
- * __wt_block_header --
- * Return the size of the block-specific header.
+ * __wt_block_truncate --
+ * Truncate the file.
*/
-u_int
-__wt_block_header(WT_BLOCK *block)
+int
+__wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
{
- WT_UNUSED(block);
+ WT_RET(__wt_ftruncate(session, block->fh, len));
- return ((u_int)WT_BLOCK_HEADER_SIZE);
+ block->size = block->extend_size = len;
+
+ return (0);
}
/*
- * __wt_block_truncate --
- * Truncate the file.
+ * __wt_block_discard --
+ * Discard blocks from the system buffer cache.
*/
int
-__wt_block_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+__wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size)
{
- WT_RET(__wt_ftruncate(session, fh, len));
+ WT_DECL_RET;
- fh->size = fh->extend_size = len;
+ if (block->os_cache_max == 0)
+ return (0);
+ /*
+ * We're racing on the addition, but I'm not willing to serialize on it
+ * in the standard read path with more evidence it's needed.
+ */
+ if ((block->os_cache += added_size) <= block->os_cache_max)
+ return (0);
+
+ block->os_cache = 0;
+ WT_ERR(block->fh->fh_advise(session,
+ block->fh, (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED));
+ return (0);
+
+err: /* Ignore ENOTSUP, but don't try again. */
+ if (ret != ENOTSUP)
+ return (ret);
+ block->os_cache_max = 0;
return (0);
}
@@ -61,7 +80,7 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
locked = true;
/* If not configured to extend the file, we're done. */
- if (fh->extend_len == 0)
+ if (block->extend_len == 0)
return (0);
/*
@@ -73,9 +92,9 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
* why there's a check in case the extended file size becomes too small:
* if the file size catches up, every thread tries to extend it.
*/
- if (fh->extend_size > fh->size &&
- (offset > fh->extend_size ||
- offset + fh->extend_len + (wt_off_t)align_size < fh->extend_size))
+ if (block->extend_size > block->size &&
+ (offset > block->extend_size || offset +
+ block->extend_len + (wt_off_t)align_size < block->extend_size))
return (0);
/*
@@ -108,9 +127,9 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
* and that's OK, we simply may do another extension sooner than
* otherwise.
*/
- fh->extend_size = fh->size + fh->extend_len * 2;
+ block->extend_size = block->size + block->extend_len * 2;
if ((ret = __wt_fallocate(
- session, fh, fh->size, fh->extend_len * 2)) == 0)
+ session, fh, block->size, block->extend_len * 2)) == 0)
return (0);
if (ret != ENOTSUP)
return (ret);
@@ -130,13 +149,13 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
* extend length after locking so we don't overwrite already-written
* blocks.
*/
- fh->extend_size = fh->size + fh->extend_len * 2;
+ block->extend_size = block->size + block->extend_len * 2;
/*
* The truncate might fail if there's a mapped file (in other words, if
* there's an open checkpoint on the file), that's OK.
*/
- if ((ret = __wt_ftruncate(session, fh, fh->extend_size)) == EBUSY)
+ if ((ret = __wt_ftruncate(session, fh, block->extend_size)) == EBUSY)
ret = 0;
return (ret);
}
@@ -318,7 +337,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_RET(ret);
}
-#ifdef HAVE_SYNC_FILE_RANGE
/*
* Optionally schedule writes for dirty pages in the system buffer
* cache, but only if the current session can wait.
@@ -327,20 +345,19 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
(block->os_cache_dirty += align_size) > block->os_cache_dirty_max &&
__wt_session_can_wait(session)) {
block->os_cache_dirty = 0;
- WT_RET(__wt_fsync_async(session, fh));
- }
-#endif
-#ifdef HAVE_POSIX_FADVISE
- /* Optionally discard blocks from the system buffer cache. */
- if (block->os_cache_max != 0 &&
- (block->os_cache += align_size) > block->os_cache_max) {
- block->os_cache = 0;
- if ((ret = posix_fadvise(fh->fd,
- (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)) != 0)
- WT_RET_MSG(
- session, ret, "%s: posix_fadvise", block->name);
+ if ((ret = __wt_fsync(session, fh, false)) != 0) {
+ /*
+ * Ignore ENOTSUP, but don't try again.
+ */
+ if (ret != ENOTSUP)
+ return (ret);
+ block->os_cache_dirty_max = 0;
+ }
}
-#endif
+
+ /* Optionally discard blocks from the buffer cache. */
+ WT_RET(__wt_block_discard(session, block, align_size));
+
WT_STAT_FAST_CONN_INCR(session, block_write);
WT_STAT_FAST_CONN_INCRV(session, block_byte_write, align_size);
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c
index 2abe6ee9205..8ce1463a0db 100644
--- a/src/btree/bt_debug.c
+++ b/src/btree/bt_debug.c
@@ -19,7 +19,7 @@ typedef struct {
* When using the standard event handlers, the debugging output has to
* do its own message handling because its output isn't line-oriented.
*/
- FILE *fp; /* Output file stream */
+ WT_FH *fh; /* Output file stream */
WT_ITEM *msg; /* Buffered message */
WT_ITEM *tmp; /* Temporary space */
@@ -97,11 +97,8 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile)
if (ofile == NULL)
return (__wt_scr_alloc(session, 512, &ds->msg));
- /* If we're using a file, flush on each line. */
- WT_RET(__wt_fopen(session, ofile, WT_FHANDLE_WRITE, 0, &ds->fp));
-
- (void)setvbuf(ds->fp, NULL, _IOLBF, 1024);
- return (0);
+ return (__wt_open(session, ofile, WT_FILE_TYPE_REGULAR,
+ WT_OPEN_CREATE | WT_STREAM_LINE_BUFFER | WT_STREAM_WRITE, &ds->fh));
}
/*
@@ -130,7 +127,7 @@ __dmsg_wrapup(WT_DBG *ds)
}
/* Close any file we opened. */
- (void)__wt_fclose(&ds->fp, WT_FHANDLE_WRITE);
+ (void)__wt_close(session, &ds->fh);
}
/*
@@ -155,7 +152,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...)
* the output chunk, and pass it to the event handler once we see a
* terminating newline.
*/
- if (ds->fp == NULL) {
+ if (ds->fh == NULL) {
msg = ds->msg;
for (;;) {
p = (char *)msg->mem + msg->size;
@@ -187,7 +184,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...)
}
} else {
va_start(ap, fmt);
- (void)__wt_vfprintf(ds->fp, fmt, ap);
+ (void)__wt_vfprintf(session, ds->fh, fmt, ap);
va_end(ap);
}
}
@@ -204,7 +201,7 @@ __wt_debug_addr_print(
WT_DECL_RET;
WT_RET(__wt_scr_alloc(session, 128, &buf));
- ret = __wt_fprintf(stderr,
+ ret = __wt_fprintf(session, WT_STDERR(session),
"%s\n", __wt_addr_string(session, addr, addr_size, buf));
__wt_scr_free(session, &buf);
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 1f739c9572e..1181d92609f 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -40,6 +40,7 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref)
void
__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
{
+ WT_FH *fh;
WT_PAGE *page;
WT_PAGE_HEADER *dsk;
WT_PAGE_MODIFY *mod;
@@ -133,8 +134,10 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
dsk = (WT_PAGE_HEADER *)page->dsk;
if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
__wt_overwrite_and_free_len(session, dsk, dsk->mem_size);
- if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
- (void)__wt_mmap_discard(session, dsk, dsk->mem_size);
+ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) {
+ fh = S2BT(session)->bm->block->fh;
+ (void)fh->fh_map_discard(session, fh, dsk, dsk->mem_size);
+ }
__wt_overwrite_and_free(session, page);
}
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 1d33a7e7c9a..02eea9c2f0c 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -36,8 +36,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
btree = S2BT(session);
/* Checkpoint files are readonly. */
- readonly = (dhandle->checkpoint != NULL ||
- F_ISSET(S2C(session), WT_CONN_READONLY));
+ readonly = dhandle->checkpoint != NULL ||
+ F_ISSET(S2C(session), WT_CONN_READONLY);
/* Get the checkpoint information for this name/checkpoint pair. */
WT_CLEAR(ckpt);
diff --git a/src/btree/bt_huffman.c b/src/btree/bt_huffman.c
index 2c0238545fb..a1aaf2c7ea0 100644
--- a/src/btree/bt_huffman.c
+++ b/src/btree/bt_huffman.c
@@ -134,9 +134,9 @@ static int __wt_huffman_read(WT_SESSION_IMPL *,
*/
static int
__huffman_confchk_file(
- WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, FILE **fpp)
+ WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FH **fhp)
{
- FILE *fp;
+ WT_FH *fh;
WT_DECL_RET;
size_t len;
char *fname;
@@ -157,14 +157,14 @@ __huffman_confchk_file(
/* Check the file exists. */
WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname));
- WT_ERR(__wt_fopen(session,
- fname, WT_FHANDLE_READ, WT_FOPEN_FIXED, &fp));
+ WT_ERR(__wt_open(session, fname, WT_FILE_TYPE_REGULAR,
+ WT_OPEN_FIXED | WT_OPEN_READONLY | WT_STREAM_READ, &fh));
/* Optionally return the file handle. */
- if (fpp == NULL)
- (void)__wt_fclose(&fp, WT_FHANDLE_READ);
+ if (fhp == NULL)
+ (void)__wt_close(session, &fh);
else
- *fpp = fp;
+ *fhp = fh;
err: __wt_free(session, fname);
@@ -298,22 +298,24 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
struct __wt_huffman_table **tablep, u_int *entriesp, u_int *numbytesp)
{
struct __wt_huffman_table *table, *tp;
- FILE *fp;
+ WT_DECL_ITEM(tmp);
WT_DECL_RET;
+ WT_FH *fh;
int64_t symbol, frequency;
u_int entries, lineno;
+ int n;
bool is_utf8;
*tablep = NULL;
*entriesp = *numbytesp = 0;
- fp = NULL;
+ fh = NULL;
table = NULL;
/*
* Try and open the backing file.
*/
- WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fp));
+ WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fh));
/*
* UTF-8 table is 256 bytes, with a range of 0-255.
@@ -329,9 +331,13 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
WT_ERR(__wt_calloc_def(session, entries, &table));
}
- for (tp = table, lineno = 1; (ret =
- fscanf(fp, "%" SCNi64 " %" SCNi64, &symbol, &frequency)) != EOF;
- ++tp, ++lineno) {
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ for (tp = table, lineno = 1;; ++tp, ++lineno) {
+ WT_ERR(__wt_getline(session, tmp, fh));
+ if (tmp->size == 0)
+ break;
+ n = sscanf(
+ tmp->data, "%" SCNi64 " %" SCNi64, &symbol, &frequency);
/*
* Entries is 0-based, that is, there are (entries +1) possible
* values that can be configured. The line number is 1-based, so
@@ -343,7 +349,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
"Huffman table file %.*s is corrupted, "
"more than %" PRIu32 " entries",
(int)ip->len, ip->str, entries + 1);
- if (ret != 2)
+ if (n != 2)
WT_ERR_MSG(session, EINVAL,
"line %u of Huffman table file %.*s is corrupted: "
"expected two unsigned integral values",
@@ -365,7 +371,6 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
tp->symbol = (uint32_t)symbol;
tp->frequency = (uint32_t)frequency;
}
- ret = ferror(fp) ? WT_ERROR : 0;
*entriesp = lineno - 1;
*tablep = table;
@@ -373,7 +378,9 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
if (0) {
err: __wt_free(session, table);
}
- (void)__wt_fclose(&fp, WT_FHANDLE_READ);
+ (void)__wt_close(session, &fh);
+
+ __wt_scr_free(session, &tmp);
return (ret);
}
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 57056eb5c99..826589f8bdd 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -259,7 +259,7 @@ err: /* On error, clear any left-over tree walk. */
*/
if (ret == 0 &&
syncop == WT_SYNC_WRITE_LEAVES && F_ISSET(conn, WT_CONN_CKPT_SYNC))
- WT_RET(btree->bm->sync(btree->bm, session, true));
+ WT_RET(btree->bm->sync(btree->bm, session, false));
return (ret);
}
diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c
index 952298f2456..83dc7924312 100644
--- a/src/btree/bt_vrfy.c
+++ b/src/btree/bt_vrfy.c
@@ -84,7 +84,7 @@ __verify_config_offsets(
WT_CONFIG list;
WT_CONFIG_ITEM cval, k, v;
WT_DECL_RET;
- u_long offset;
+ uint64_t offset;
*quitp = false;
@@ -97,7 +97,7 @@ __verify_config_offsets(
* verify because that's where we "dump blocks" for debugging.)
*/
*quitp = true;
- if (v.len != 0 || sscanf(k.str, "%lu", &offset) != 1)
+ if (v.len != 0 || sscanf(k.str, "%" SCNu64, &offset) != 1)
WT_RET_MSG(session, EINVAL,
"unexpected dump offset format");
#if !defined(HAVE_DIAGNOSTIC)
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index 8796ec6b2fc..fd541458fa8 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -58,8 +58,10 @@ __wt_las_create(WT_SESSION_IMPL *session)
conn = S2C(session);
- if (F_ISSET(conn, WT_CONN_READONLY))
+ /* Read-only and in-memory configurations don't need the LAS table. */
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY))
return (0);
+
/*
* Done at startup: we cannot do it on demand because we require the
* schema lock to create and drop the table, and it may not always be
diff --git a/src/config/config_def.c b/src/config/config_def.c
index c752e5eb265..5b6f0bac323 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -150,9 +150,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
+ "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\","
+ "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
"\"shared_cache\",\"split\",\"temporary\",\"transaction\","
"\"verify\",\"version\",\"write\"]",
NULL, 0 },
@@ -680,9 +680,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "use_environment_priv", "boolean", NULL, NULL, NULL, 0 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
+ "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\","
+ "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
"\"shared_cache\",\"split\",\"temporary\",\"transaction\","
"\"verify\",\"version\",\"write\"]",
NULL, 0 },
@@ -761,9 +761,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "use_environment_priv", "boolean", NULL, NULL, NULL, 0 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
+ "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\","
+ "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
"\"shared_cache\",\"split\",\"temporary\",\"transaction\","
"\"verify\",\"version\",\"write\"]",
NULL, 0 },
@@ -837,9 +837,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
+ "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\","
+ "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
"\"shared_cache\",\"split\",\"temporary\",\"transaction\","
"\"verify\",\"version\",\"write\"]",
NULL, 0 },
@@ -913,9 +913,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
+ "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\","
+ "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\","
"\"shared_cache\",\"split\",\"temporary\",\"transaction\","
"\"verify\",\"version\",\"write\"]",
NULL, 0 },
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 6d115c8fdcd..9e2f03da21f 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -1118,7 +1118,8 @@ __conn_config_append(const char *cfg[], const char *config)
{
while (*cfg != NULL)
++cfg;
- *cfg = config;
+ cfg[0] = config;
+ cfg[1] = NULL;
}
/*
@@ -1196,7 +1197,8 @@ __conn_config_file(WT_SESSION_IMPL *session,
return (0);
/* Open the configuration file. */
- WT_RET(__wt_open(session, filename, false, false, 0, &fh));
+ WT_RET(__wt_open(
+ session, filename, WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &fh));
WT_ERR(__wt_filesize(session, fh, &size));
if (size == 0)
goto err;
@@ -1488,8 +1490,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
exist = false;
if (!is_create)
WT_ERR(__wt_exist(session, WT_WIREDTIGER, &exist));
- ret = __wt_open(session,
- WT_SINGLETHREAD, is_create || exist, false, 0, &conn->lock_fh);
+ ret = __wt_open(session, WT_SINGLETHREAD, WT_FILE_TYPE_REGULAR,
+ is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh);
/*
* If this is a read-only connection and we cannot grab the lock
@@ -1517,7 +1519,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
* zero-length, and that's OK, the underlying call supports
* locking past the end-of-file.
*/
- if (__wt_bytelock(conn->lock_fh, (wt_off_t)0, true) != 0)
+ if (__wt_file_lock(session, conn->lock_fh, true) != 0)
WT_ERR_MSG(session, EBUSY,
"WiredTiger database is already being managed by "
"another process");
@@ -1543,7 +1545,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
}
/* We own the lock file, optionally create the WiredTiger file. */
- ret = __wt_open(session, WT_WIREDTIGER, is_create, false, 0, &fh);
+ ret = __wt_open(session, WT_WIREDTIGER,
+ WT_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh);
/*
* If we're read-only, check for success as well as handled errors.
@@ -1564,12 +1567,12 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
* as described above). Immediately release the lock, it's
* just a test.
*/
- if (__wt_bytelock(fh, (wt_off_t)0, true) != 0) {
+ if (__wt_file_lock(session, fh, true) != 0) {
WT_ERR_MSG(session, EBUSY,
"WiredTiger database is already being managed by "
"another process");
}
- WT_ERR(__wt_bytelock(fh, (wt_off_t)0, false));
+ WT_ERR(__wt_file_lock(session, fh, false));
}
/*
@@ -1590,7 +1593,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
len = (size_t)snprintf(buf, sizeof(buf),
"%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING);
WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf));
- WT_ERR(__wt_fsync(session, fh));
+ WT_ERR(__wt_fsync(session, fh, true));
} else {
/*
* Although exclusive and the read-only configuration settings
@@ -1692,6 +1695,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{ "evict", WT_VERB_EVICT },
{ "evictserver", WT_VERB_EVICTSERVER },
{ "fileops", WT_VERB_FILEOPS },
+ { "handleops", WT_VERB_HANDLEOPS },
{ "log", WT_VERB_LOG },
{ "lsm", WT_VERB_LSM },
{ "lsm_manager", WT_VERB_LSM_MANAGER },
@@ -1750,14 +1754,14 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
static int
__conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- FILE *fp;
+ WT_FH *fh;
WT_CONFIG parser;
WT_CONFIG_ITEM cval, k, v;
WT_DECL_RET;
bool exist;
const char *base_config;
- fp = NULL;
+ fh = NULL;
base_config = NULL;
/*
@@ -1789,10 +1793,11 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
if (exist)
return (0);
- WT_RET(__wt_fopen(session,
- WT_BASECONFIG_SET, WT_FHANDLE_WRITE, 0, &fp));
+ WT_RET(__wt_open(session,
+ WT_BASECONFIG_SET, WT_FILE_TYPE_REGULAR,
+ WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE | WT_STREAM_WRITE, &fh));
- WT_ERR(__wt_fprintf(fp, "%s\n\n",
+ WT_ERR(__wt_fprintf(session, fh, "%s\n\n",
"# Do not modify this file.\n"
"#\n"
"# WiredTiger created this file when the database was created,\n"
@@ -1839,18 +1844,18 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
--v.str;
v.len += 2;
}
- WT_ERR(__wt_fprintf(fp,
+ WT_ERR(__wt_fprintf(session, fh,
"%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str));
}
WT_ERR_NOTFOUND_OK(ret);
/* Flush the handle and rename the file into place. */
- ret = __wt_sync_fp_and_rename(
- session, &fp, WT_BASECONFIG_SET, WT_BASECONFIG);
+ ret = __wt_sync_handle_and_rename(
+ session, &fh, WT_BASECONFIG_SET, WT_BASECONFIG);
if (0) {
/* Close open file handle, remove any temporary file. */
-err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_WRITE));
+err: WT_TRET(__wt_close(session, &fh));
WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET));
}
@@ -1932,44 +1937,71 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
session = conn->default_session = &conn->dummy_session;
session->iface.connection = &conn->iface;
session->name = "wiredtiger_open";
- __wt_random_init(&session->rnd);
+
+ /* Do standard I/O and error handling first. */
+ WT_ERR(__wt_os_stdio(session));
__wt_event_handler_set(session, event_handler);
- /* Remaining basic initialization of the connection structure. */
+ /* Basic initialization of the connection structure. */
WT_ERR(__wt_connection_init(conn));
- /* Check/set the application-specified configuration string. */
+ /* Check the application-specified configuration string. */
WT_ERR(__wt_config_check(session,
WT_CONFIG_REF(session, wiredtiger_open), config, 0));
+
+ /*
+ * Build the temporary, initial configuration stack, in the following
+ * order (where later entries override earlier entries):
+ *
+ * 1. the base configuration for the wiredtiger_open call
+ * 2. the config passed in by the application
+ * 3. environment variable settings (optional)
+ *
+ * In other words, a configuration stack based on the application's
+ * passed-in information and nothing else.
+ */
cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open);
cfg[1] = config;
-
- /* Capture the config_base setting file for later use. */
- WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval));
- config_base_set = cval.val != 0;
-
- /* Configure error messages so we get them right early. */
- WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval));
- if (cval.len != 0)
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &conn->error_prefix));
+ WT_ERR(__wt_scr_alloc(session, 0, &i1));
+ WT_ERR(__conn_config_env(session, cfg, i1));
/*
- * We need to look for read-only early so that we can use it
- * in __conn_single and whether to use the base config file.
- * XXX that means we can only make the choice in __conn_single if the
- * user passes it in via the config string to wiredtiger_open.
+ * We need to know if configured for read-only or in-memory behavior
+ * before reading/writing the filesystem. The only way the application
+ * can configure that before we touch the filesystem is the wiredtiger
+ * config string or the WIREDTIGER_CONFIG environment variable.
+ *
+ * The environment isn't trusted by default, for security reasons; if
+ * the application wants us to trust the environment before reading
+ * the filesystem, the wiredtiger_open config string is the only way.
*/
+ WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval));
+ if (cval.val != 0)
+ F_SET(conn, WT_CONN_IN_MEMORY);
WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval));
if (cval.val)
F_SET(conn, WT_CONN_READONLY);
/*
- * XXX ideally, we would check "in_memory" here, so we could completely
- * avoid having a database directory. However, it can be convenient to
- * pass "in_memory" via the WIREDTIGER_CONFIG environment variable, and
- * we haven't read it yet.
+ * After checking readonly and in-memory, but before we do anything that
+ * touches the filesystem, configure the OS layer.
*/
+ WT_ERR(__wt_os_init(session));
+
+ /*
+ * Capture the config_base setting file for later use. Again, if the
+ * application doesn't want us to read the base configuration file,
+ * the WIREDTIGER_CONFIG environment variable or the wiredtiger_open
+ * config string are the only ways.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval));
+ config_base_set = cval.val != 0;
+
+ /* Configure error messages so we get them right early. */
+ WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval));
+ if (cval.len != 0)
+ WT_ERR(__wt_strndup(
+ session, cval.str, cval.len, &conn->error_prefix));
/* Get the database home. */
WT_ERR(__conn_home(session, home, cfg));
@@ -1978,8 +2010,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
WT_ERR(__conn_single(session, cfg));
/*
- * Build the configuration stack, in the following order (where later
- * entries override earlier entries):
+ * Build the real configuration stack, in the following order (where
+ * later entries override earlier entries):
*
* 1. all possible wiredtiger_open configurations
* 2. the WiredTiger compilation version (expected to be overridden by
@@ -1993,7 +2025,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
* Clear the entries we added to the stack, we're going to build it in
* order.
*/
- WT_ERR(__wt_scr_alloc(session, 0, &i1));
WT_ERR(__wt_scr_alloc(session, 0, &i2));
WT_ERR(__wt_scr_alloc(session, 0, &i3));
cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open_all);
@@ -2016,11 +2047,15 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
* Merge the full configuration stack and save it for reconfiguration.
*/
WT_ERR(__wt_config_merge(session, cfg, NULL, &merge_cfg));
+
/*
- * The read-only setting may have been set in a configuration file.
- * Get it again so that we can override other configuration settings
- * before they are processed by the subsystems.
+ * Read-only and in-memory settings may have been set in a configuration
+ * file (not optimal, but we can handle it). Get those settings again so
+ * we can override other configuration settings as they are processed.
*/
+ WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval));
+ if (cval.val != 0)
+ F_SET(conn, WT_CONN_IN_MEMORY);
WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval));
if (cval.val)
F_SET(conn, WT_CONN_READONLY);
@@ -2054,6 +2089,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
* The error message configuration might have changed (if set in a
* configuration file, and not in the application's configuration
* string), get it again. Do it first, make error messages correct.
+ * Ditto verbose configuration so we dump everything the application
+ * wants to see.
*/
WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval));
if (cval.len != 0) {
@@ -2061,6 +2098,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
WT_ERR(__wt_strndup(
session, cval.str, cval.len, &conn->error_prefix));
}
+ WT_ERR(__wt_verbose_config(session, cfg));
WT_ERR(__wt_config_gets(session, cfg, "hazard_max", &cval));
conn->hazard_max = (uint32_t)cval.val;
@@ -2071,10 +2109,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
WT_ERR(__wt_config_gets(session, cfg, "session_scratch_max", &cval));
conn->session_scratch_max = (size_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval));
- if (cval.val != 0)
- F_SET(conn, WT_CONN_IN_MEMORY);
-
WT_ERR(__wt_config_gets(session, cfg, "checkpoint_sync", &cval));
if (cval.val)
F_SET(conn, WT_CONN_CKPT_SYNC);
@@ -2137,7 +2171,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
WT_ERR(__conn_statistics_config(session, cfg));
WT_ERR(__wt_lsm_manager_config(session, cfg));
WT_ERR(__wt_sweep_config(session, cfg));
- WT_ERR(__wt_verbose_config(session, cfg));
/* Initialize the OS page size for mmap */
conn->page_size = __wt_get_vm_pagesize();
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index 16717597f4d..5f4c38e7361 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -41,6 +41,9 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
TAILQ_INIT(&conn->lsm_manager.appqh);
TAILQ_INIT(&conn->lsm_manager.managerqh);
+ /* Random numbers. */
+ __wt_random_init(&session->rnd);
+
/* Configuration. */
WT_RET(__wt_conn_config_init(session));
@@ -119,14 +122,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
session = conn->default_session;
- /*
- * Close remaining open files (before discarding the mutex, the
- * underlying file-close code uses the mutex to guard lists of
- * open files.
- */
- if (conn->lock_fh)
- WT_TRET(__wt_close(session, &conn->lock_fh));
-
/* Remove from the list of connections. */
__wt_spin_lock(session, &__wt_process.spinlock);
TAILQ_REMOVE(&__wt_process.connqh, conn, q);
@@ -160,6 +155,9 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_free(session, conn->error_prefix);
__wt_free(session, conn->sessions);
+ /* Destroy the OS configuration. */
+ WT_TRET(__wt_os_cleanup(session));
+
__wt_free(NULL, conn);
return (ret);
}
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 757d69bf240..6cb8ba3d0f9 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -260,8 +260,7 @@ __log_prealloc_once(WT_SESSION_IMPL *session)
* files that may not have been used yet.
*/
WT_ERR(__wt_dirlist(session, conn->log_path,
- WT_LOG_PREPNAME, WT_DIRLIST_INCLUDE,
- &recfiles, &reccount));
+ WT_LOG_PREPNAME, WT_DIRLIST_INCLUDE, &recfiles, &reccount));
__wt_log_files_free(session, recfiles, reccount);
recfiles = NULL;
/*
@@ -399,7 +398,7 @@ __log_file_server(void *arg)
* to move the sync_lsn into the next file for
* later syncs.
*/
- WT_ERR(__wt_fsync(session, close_fh));
+ WT_ERR(__wt_fsync(session, close_fh, true));
/*
* We want to make sure the file size reflects
* actual data and has minimal pre-allocated
@@ -451,7 +450,7 @@ __log_file_server(void *arg)
log->bg_sync_lsn.l.file) ||
(log->sync_lsn.l.file < min_lsn.l.file))
continue;
- WT_ERR(__wt_fsync(session, log->log_fh));
+ WT_ERR(__wt_fsync(session, log->log_fh, true));
__wt_spin_lock(session, &log->log_sync_lock);
locked = true;
/*
diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c
index aff422654d7..38c3288209e 100644
--- a/src/conn/conn_open.c
+++ b/src/conn/conn_open.c
@@ -76,7 +76,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
WT_CONNECTION *wt_conn;
WT_DECL_RET;
WT_DLH *dlh;
- WT_FH *fh;
WT_SESSION_IMPL *s, *session;
WT_TXN_GLOBAL *txn_global;
u_int i;
@@ -150,20 +149,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
WT_TRET(__wt_conn_remove_encryptor(session));
WT_TRET(__wt_conn_remove_extractor(session));
- /*
- * Complain if files weren't closed, ignoring the lock file, we'll
- * close it in a minute.
- */
- TAILQ_FOREACH(fh, &conn->fhqh, q) {
- if (fh == conn->lock_fh)
- continue;
-
- __wt_errx(session,
- "Connection has open file handles: %s", fh->name);
- WT_TRET(__wt_close(session, &fh));
- fh = TAILQ_FIRST(&conn->fhqh);
- }
-
/* Disconnect from shared cache - must be before cache destroy. */
WT_TRET(__wt_conn_cache_pool_destroy(session));
@@ -182,6 +167,13 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
WT_TRET(__wt_dlclose(session, dlh));
}
+ /* Close the lock file, opening up the database to other connections. */
+ if (conn->lock_fh != NULL)
+ WT_TRET(__wt_close(session, &conn->lock_fh));
+
+ /* Close any file handles left open. */
+ WT_TRET(__wt_close_connection_close(session));
+
/*
* Close the internal (default) session, and switch back to the dummy
* session in case of any error messages from the remaining operations
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index d6e59a50da5..fccc4786402 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -209,10 +209,11 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
}
if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) {
- WT_ERR(__wt_fprintf(conn->stat_fp,
+ WT_ERR(__wt_fprintf(session, conn->stat_fh,
"{\"version\":\"%s\",\"localTime\":\"%s\"",
WIREDTIGER_VERSION_STRING, conn->stat_stamp));
- WT_ERR(__wt_fprintf(conn->stat_fp, ",\"wiredTiger\":{"));
+ WT_ERR(__wt_fprintf(
+ session, conn->stat_fh, ",\"wiredTiger\":{"));
while ((ret = cursor->next(cursor)) == 0) {
WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val));
/* Check if we are starting a new section. */
@@ -224,23 +225,23 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
strncmp(desc, tmp->data, tmp->size) != 0) {
WT_ERR(__wt_buf_set(
session, tmp, desc, prefixlen));
- WT_ERR(__wt_fprintf(conn->stat_fp,
+ WT_ERR(__wt_fprintf(session, conn->stat_fh,
"%s\"%.*s\":{", first ? "" : "},",
(int)prefixlen, desc));
first = false;
groupfirst = true;
}
- WT_ERR(__wt_fprintf(conn->stat_fp,
+ WT_ERR(__wt_fprintf(session, conn->stat_fh,
"%s\"%s\":%" PRId64,
groupfirst ? "" : ",", endprefix + 2, val));
groupfirst = false;
}
WT_ERR_NOTFOUND_OK(ret);
- WT_ERR(__wt_fprintf(conn->stat_fp, "}}}\n"));
+ WT_ERR(__wt_fprintf(session, conn->stat_fh, "}}}\n"));
} else {
while ((ret = cursor->next(cursor)) == 0) {
WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val));
- WT_ERR(__wt_fprintf(conn->stat_fp,
+ WT_ERR(__wt_fprintf(session, conn->stat_fh,
"%s %" PRId64 " %s %s\n",
conn->stat_stamp, val, name, desc));
}
@@ -349,11 +350,11 @@ err: if (locked)
static int
__statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
{
- FILE *log_file;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
struct timespec ts;
struct tm *tm, _tm;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *log_file;
conn = S2C(session);
@@ -366,16 +367,18 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
WT_RET_MSG(session, ENOMEM, "strftime path conversion");
/* If the path has changed, cycle the log file. */
- if ((log_file = conn->stat_fp) == NULL ||
+ if ((log_file = conn->stat_fh) == NULL ||
path == NULL || strcmp(tmp->mem, path->mem) != 0) {
- conn->stat_fp = NULL;
- WT_RET(__wt_fclose(&log_file, WT_FHANDLE_APPEND));
+ conn->stat_fh = NULL;
+ WT_RET(__wt_close(session, &log_file));
if (path != NULL)
(void)strcpy(path->mem, tmp->mem);
- WT_RET(__wt_fopen(session,
- tmp->mem, WT_FHANDLE_APPEND, WT_FOPEN_FIXED, &log_file));
+ WT_RET(__wt_open(session, tmp->mem,
+ WT_FILE_TYPE_REGULAR,
+ WT_OPEN_CREATE | WT_OPEN_FIXED | WT_STREAM_APPEND,
+ &log_file));
}
- conn->stat_fp = log_file;
+ conn->stat_fh = log_file;
/* Create the entry prefix for this time of day. */
if (strftime(tmp->mem, tmp->memsize, conn->stat_format, tm) == 0)
@@ -408,7 +411,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
WT_RET(__statlog_lsm_apply(session));
/* Flush. */
- return (__wt_fflush(conn->stat_fp));
+ return (__wt_fsync(session, conn->stat_fh, true));
}
/*
@@ -594,7 +597,7 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close)
conn->stat_session = NULL;
conn->stat_tid_set = false;
conn->stat_format = NULL;
- WT_TRET(__wt_fclose(&conn->stat_fp, WT_FHANDLE_APPEND));
+ WT_TRET(__wt_close(session, &conn->stat_fh));
conn->stat_path = NULL;
conn->stat_sources = NULL;
conn->stat_stamp = NULL;
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index cc0aa5a1322..5d24ea61607 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -369,8 +369,9 @@ __wt_sweep_create(WT_SESSION_IMPL *session)
*
* Don't tap the sweep thread for eviction.
*/
- session_flags = WT_SESSION_CAN_WAIT |
- WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION;
+ session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_NO_EVICTION;
+ if (F_ISSET(conn, WT_CONN_LAS_OPEN))
+ session_flags |= WT_SESSION_LOOKASIDE_CURSOR;
WT_RET(__wt_open_internal_session(
conn, "sweep-server", true, session_flags, &conn->sweep_session));
session = conn->sweep_session;
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index 2fb0c464a76..2ba73eb86c9 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -251,7 +251,7 @@ __backup_start(
* Close any hot backup file.
* We're about to open the incremental backup file.
*/
- WT_TRET(__wt_fclose(&cb->bfp, WT_FHANDLE_WRITE));
+ WT_TRET(__wt_close(session, &cb->bfh));
WT_ERR(__backup_file_create(session, cb, log_only));
WT_ERR(__backup_list_append(
session, cb, WT_INCREMENTAL_BACKUP));
@@ -269,7 +269,7 @@ __backup_start(
}
err: /* Close the hot backup file. */
- WT_TRET(__wt_fclose(&cb->bfp, WT_FHANDLE_WRITE));
+ WT_TRET(__wt_close(session, &cb->bfh));
if (ret != 0) {
WT_TRET(__backup_cleanup_handles(session, cb));
WT_TRET(__backup_stop(session));
@@ -411,9 +411,9 @@ static int
__backup_file_create(
WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool incremental)
{
- return (__wt_fopen(session,
+ return (__wt_open(session,
incremental ? WT_INCREMENTAL_BACKUP : WT_METADATA_BACKUP,
- WT_FHANDLE_WRITE, 0, &cb->bfp));
+ WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_STREAM_WRITE, &cb->bfh));
}
/*
@@ -472,7 +472,7 @@ __backup_list_uri_append(
/* Add the metadata entry to the backup file. */
WT_RET(__wt_metadata_search(session, name, &value));
- WT_RET(__wt_fprintf(cb->bfp, "%s\n%s\n", name, value));
+ WT_RET(__wt_fprintf(session, cb->bfh, "%s\n%s\n", name, value));
__wt_free(session, value);
/* Add file type objects to the list of files to be copied. */
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 50a00787f35..d3e32d7fc23 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1688,9 +1688,9 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session)
int
__wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
{
- FILE *fp;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle, *saved_dhandle;
+ WT_FH *fh;
WT_PAGE *page;
WT_REF *next_walk;
uint64_t dirty_bytes, dirty_pages, intl_bytes, intl_pages;
@@ -1702,12 +1702,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
total_bytes = 0;
if (ofile == NULL)
- fp = stderr;
+ fh = WT_STDERR(session);
else
- WT_RET(__wt_fopen(session, ofile, WT_FHANDLE_WRITE, 0, &fp));
+ WT_RET(__wt_open(session, ofile, WT_FILE_TYPE_REGULAR,
+ WT_OPEN_CREATE | WT_STREAM_WRITE, &fh));
/* Note: odd string concatenation avoids spelling errors. */
- (void)__wt_fprintf(fp, "==========\n" "cache dump\n");
+ (void)__wt_fprintf(session, fh, "==========\n" "cache dump\n");
saved_dhandle = session->dhandle;
TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
@@ -1746,23 +1747,25 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
session->dhandle = NULL;
if (dhandle->checkpoint == NULL)
- (void)__wt_fprintf(fp, "%s(<live>): \n", dhandle->name);
+ (void)__wt_fprintf(session, fh,
+ "%s(<live>): \n", dhandle->name);
else
- (void)__wt_fprintf(fp, "%s(checkpoint=%s): \n",
+ (void)__wt_fprintf(session, fh,
+ "%s(checkpoint=%s): \n",
dhandle->name, dhandle->checkpoint);
if (intl_pages != 0)
- (void)__wt_fprintf(fp, "\t" "internal pages: "
- "%" PRIu64 " pages, %" PRIu64
+ (void)__wt_fprintf(session, fh,
+ "\t" "internal pages: %" PRIu64 " pages, %" PRIu64
" max, %" PRIu64 "MB total\n",
intl_pages, max_intl_bytes, intl_bytes >> 20);
if (leaf_pages != 0)
- (void)__wt_fprintf(fp, "\t" "leaf pages: "
- "%" PRIu64 " pages, %" PRIu64
+ (void)__wt_fprintf(session, fh,
+ "\t" "leaf pages: %" PRIu64 " pages, %" PRIu64
" max, %" PRIu64 "MB total\n",
leaf_pages, max_leaf_bytes, leaf_bytes >> 20);
if (dirty_pages != 0)
- (void)__wt_fprintf(fp, "\t" "dirty pages: "
- "%" PRIu64 " pages, %" PRIu64
+ (void)__wt_fprintf(session, fh,
+ "\t" "dirty pages: %" PRIu64 " pages, %" PRIu64
" max, %" PRIu64 "MB total\n",
dirty_pages, max_dirty_bytes, dirty_bytes >> 20);
@@ -1777,12 +1780,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
if (conn->cache->overhead_pct != 0)
total_bytes +=
(total_bytes * (uint64_t)conn->cache->overhead_pct) / 100;
- (void)__wt_fprintf(fp, "cache dump: total found = %" PRIu64 "MB"
- " vs tracked inuse %" PRIu64 "MB\n",
+ (void)__wt_fprintf(session, fh,
+ "cache dump: total found = %" PRIu64
+ "MB vs tracked inuse %" PRIu64 "MB\n",
total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20);
- (void)__wt_fprintf(fp, "==========\n");
- if (fp != stderr)
- WT_RET(__wt_fclose(&fp, WT_FHANDLE_WRITE));
+ (void)__wt_fprintf(session, fh, "==========\n");
+ if (ofile != NULL)
+ WT_RET(__wt_close(session, &fh));
return (0);
}
#endif
diff --git a/src/include/block.h b/src/include/block.h
index 10efd35086c..e964fb4e8c2 100644
--- a/src/include/block.h
+++ b/src/include/block.h
@@ -217,10 +217,16 @@ struct __wt_block {
/* A list of block manager handles, sharing a file descriptor. */
uint32_t ref; /* References */
- WT_FH *fh; /* Backing file handle */
TAILQ_ENTRY(__wt_block) q; /* Linked list of handles */
TAILQ_ENTRY(__wt_block) hashq; /* Hashed list of handles */
+ WT_FH *fh; /* Backing file handle */
+ wt_off_t size; /* File size */
+ wt_off_t extend_size; /* File extended size */
+ wt_off_t extend_len; /* File extend chunk size */
+ bool nowait_sync_available; /* File can flush asynchronously */
+ bool preload_available; /* File pages can be preloaded */
+
/* Configuration information, set when the file is opened. */
uint32_t allocfirst; /* Allocation is first-fit */
uint32_t allocsize; /* Allocation size */
@@ -399,3 +405,15 @@ __wt_block_header_byteswap(WT_BLOCK_HEADER *blk)
*/
#define WT_BLOCK_COMPRESS_SKIP 64
#define WT_BLOCK_ENCRYPT_SKIP WT_BLOCK_HEADER_BYTE_SIZE
+
+/*
+ * __wt_block_header --
+ * Return the size of the block-specific header.
+ */
+static inline u_int
+__wt_block_header(WT_BLOCK *block)
+{
+ WT_UNUSED(block);
+
+ return ((u_int)WT_BLOCK_HEADER_SIZE);
+}
diff --git a/src/include/connection.h b/src/include/connection.h
index 2255056fcf6..c2b1dd68c18 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -333,7 +333,7 @@ struct __wt_connection_impl {
bool stat_tid_set; /* Statistics log thread set */
WT_CONDVAR *stat_cond; /* Statistics log wait mutex */
const char *stat_format; /* Statistics log timestamp format */
- FILE *stat_fp; /* Statistics log file handle */
+ WT_FH *stat_fh; /* Statistics log file handle */
char *stat_path; /* Statistics log path format */
char **stat_sources; /* Statistics log list of objects */
const char *stat_stamp; /* Statistics log entry timestamp */
@@ -421,5 +421,25 @@ struct __wt_connection_impl {
int page_size; /* OS page size for mmap alignment */
uint32_t verbose;
+ void *inmemory; /* In-memory configuration cookie */
+
+#define WT_STDERR(s) (&S2C(s)->wt_stderr)
+#define WT_STDOUT(s) (&S2C(s)->wt_stdout)
+ WT_FH wt_stderr, wt_stdout;
+
+ /*
+ * OS library/system call jump table, to support in-memory and readonly
+ * configurations as well as special devices with other non-POSIX APIs.
+ */
+ int (*file_directory_list)(WT_SESSION_IMPL *,
+ const char *, const char *, uint32_t, char ***, u_int *);
+ int (*file_directory_sync)(WT_SESSION_IMPL *, const char *);
+ int (*file_exist)(WT_SESSION_IMPL *, const char *, bool *);
+ int (*file_remove)(WT_SESSION_IMPL *, const char *);
+ int (*file_rename)(WT_SESSION_IMPL *, const char *, const char *);
+ int (*file_size)(WT_SESSION_IMPL *, const char *, bool, wt_off_t *);
+ int (*handle_open)(WT_SESSION_IMPL *,
+ WT_FH *, const char *, uint32_t, uint32_t);
+
uint32_t flags;
};
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 4b35daf106e..1d2ce1bfd82 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -67,7 +67,7 @@ struct __wt_cursor_backup {
WT_CURSOR iface;
size_t next; /* Cursor position */
- FILE *bfp; /* Backup file */
+ WT_FH *bfh; /* Backup file */
uint32_t maxid; /* Maximum log file ID seen */
WT_CURSOR_BACKUP_ENTRY *list; /* List of files to be copied. */
diff --git a/src/include/extern.h b/src/include/extern.h
index 48c52d4a109..d4e67b2f313 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -49,7 +49,7 @@ extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *file
extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on);
extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp);
extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block);
-extern int __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize);
+extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize);
extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats);
extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep);
extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep);
@@ -73,8 +73,8 @@ extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block);
extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci);
extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block);
extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size);
-extern u_int __wt_block_header(WT_BLOCK *block);
-extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len);
+extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len);
+extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size);
extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep);
extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum);
extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool caller_locked);
@@ -375,23 +375,23 @@ extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const
extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep);
extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value);
extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep);
-extern int __wt_logop_col_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
+extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags);
extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno);
extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop);
-extern int __wt_logop_col_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
+extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags);
extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop);
extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp);
-extern int __wt_logop_col_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
+extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags);
extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value);
extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep);
-extern int __wt_logop_row_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
+extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags);
extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key);
extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp);
-extern int __wt_logop_row_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
+extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags);
extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode);
extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep);
-extern int __wt_logop_row_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
-extern int __wt_txn_op_printlog( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
+extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags);
+extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags);
extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced);
extern int __wt_log_slot_new(WT_SESSION_IMPL *session);
@@ -484,74 +484,6 @@ extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session);
extern int __wt_turtle_init(WT_SESSION_IMPL *session);
extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep);
extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value);
-extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
-extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
-extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp);
-extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
-extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
-extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
-extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp);
-extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg);
-extern int __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp);
-extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp);
-extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret);
-extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh);
-extern int __wt_errno(void);
-extern int __wt_map_error_rdonly(int error);
-extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen);
-extern int __wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp);
-extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh);
-extern int __wt_fallocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len);
-extern int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep);
-extern int __wt_filesize_name(WT_SESSION_IMPL *session, const char *filename, bool silent, wt_off_t *sizep);
-extern int __wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock);
-extern int __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh);
-extern int __wt_directory_sync(WT_SESSION_IMPL *session, const char *path);
-extern int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh);
-extern int __wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh);
-extern int __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len);
-extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp);
-extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp);
-extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr);
-extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie);
-extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size);
-extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size);
-extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie);
-extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp);
-extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled);
-extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
-extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
-extern int __wt_rwlock_alloc( WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name);
-extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
-extern int __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
-extern int __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
-extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
-extern int __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
-extern int __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
-extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp);
-extern int __wt_once(void (*init_routine)(void));
-extern int __wt_open(WT_SESSION_IMPL *session, const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp);
-extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp);
-extern int __wt_get_vm_pagesize(void);
-extern bool __wt_absolute_path(const char *path);
-extern const char *__wt_path_separator(void);
-extern bool __wt_has_priv(void);
-extern int __wt_remove(WT_SESSION_IMPL *session, const char *name);
-extern int __wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to);
-extern int __wt_read( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf);
-extern int __wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, const void *buf);
-extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
-extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, WT_FHANDLE_MODE mode_flag, u_int flags, FILE **fpp);
-extern int __wt_vfprintf(FILE *fp, const char *fmt, va_list ap);
-extern int __wt_fprintf(FILE *fp, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
-extern int __wt_fflush(FILE *fp);
-extern int __wt_fclose(FILE **fpp, WT_FHANDLE_MODE mode_flag);
-extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base);
-extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg);
-extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid);
-extern void __wt_thread_id(char *buf, size_t buflen);
-extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
-extern void __wt_yield(void);
extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t size, const char *fmt, ...);
extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *sizep, const char *fmt, ...);
extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer, size_t size, const char *fmt, ...);
@@ -672,12 +604,6 @@ extern int __wt_panic(WT_SESSION_IMPL *session);
extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name);
extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri);
extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri);
-extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path);
-extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path);
-extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name);
-extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to);
-extern int __wt_fh_sync_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to);
-extern int __wt_sync_fp_and_rename( WT_SESSION_IMPL *session, FILE **fpp, const char *from, const char *to);
extern int __wt_library_init(void);
extern int __wt_breakpoint(void);
extern void __wt_attach(WT_SESSION_IMPL *session);
@@ -703,6 +629,14 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg);
extern int __wt_print_huffman_code(void *huffman_arg, uint16_t symbol);
extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf);
extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf);
+extern int __wt_rwlock_alloc( WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name);
+extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
+extern int __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
+extern int __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
+extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
+extern int __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
+extern int __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
+extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp);
extern uint32_t __wt_nlpo2_round(uint32_t v);
extern uint32_t __wt_nlpo2(uint32_t v);
extern uint32_t __wt_log2_int(uint32_t n);
@@ -771,10 +705,78 @@ extern int __wt_txn_checkpoint_logread( WT_SESSION_IMPL *session, const uint8_t
extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp);
extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop);
extern int __wt_txn_truncate_end(WT_SESSION_IMPL *session);
-extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags);
+extern int __wt_txn_printlog(WT_SESSION *wt_session, uint32_t flags);
extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval);
extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops);
extern int __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_recover(WT_SESSION_IMPL *session);
+extern bool __wt_absolute_path(const char *path);
+extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp);
+extern bool __wt_has_priv(void);
+extern const char *__wt_path_separator(void);
+extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen);
+extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
+extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp);
+extern int __wt_close_connection_close(WT_SESSION_IMPL *session);
+extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp);
+extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
+extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
+extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled);
+extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to);
+extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh);
+extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp);
+extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret);
+extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
+extern int __wt_errno(void);
+extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path);
+extern int __wt_get_vm_pagesize(void);
+extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp);
+extern int __wt_getlasterror(void);
+extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh);
+extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr);
+extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp);
+extern int __wt_map_error_rdonly(int error);
+extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path);
+extern int __wt_once(void (*init_routine)(void));
+extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp);
+extern int __wt_os_cleanup(WT_SESSION_IMPL *session);
+extern int __wt_os_init(WT_SESSION_IMPL *session);
+extern int __wt_os_inmemory(WT_SESSION_IMPL *session);
+extern int __wt_os_inmemory_cleanup(WT_SESSION_IMPL *session);
+extern int __wt_os_posix(WT_SESSION_IMPL *session);
+extern int __wt_os_posix_cleanup(WT_SESSION_IMPL *session);
+extern int __wt_os_stdio(WT_SESSION_IMPL *session);
+extern int __wt_os_win(WT_SESSION_IMPL *session);
+extern int __wt_os_win_cleanup(WT_SESSION_IMPL *session);
+extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp);
+extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len);
+extern int __wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie);
+extern int __wt_posix_map_discard( WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size);
+extern int __wt_posix_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size);
+extern int __wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie);
+extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
+extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
+extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
+extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name);
+extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to);
+extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp);
+extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to);
+extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg);
+extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid);
+extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp);
+extern int __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie);
+extern int __wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size);
+extern int __wt_win_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size);
+extern int __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie);
+extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base);
+extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg);
+extern void __wt_handle_search_unlock(WT_SESSION_IMPL *session);
+extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh);
+extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
+extern void __wt_stream_set_line_buffer(FILE *fp);
+extern void __wt_stream_set_no_buffer(FILE *fp);
+extern void __wt_thread_id(char *buf, size_t buflen);
+extern void __wt_yield(void);
diff --git a/src/include/flags.h b/src/include/flags.h
index a6f42a9938f..3d9b0ed716b 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -28,7 +28,7 @@
#define WT_FILE_TYPE_DATA 0x00000002
#define WT_FILE_TYPE_DIRECTORY 0x00000004
#define WT_FILE_TYPE_LOG 0x00000008
-#define WT_FILE_TYPE_TURTLE 0x00000010
+#define WT_FILE_TYPE_REGULAR 0x00000010
#define WT_LOGSCAN_FIRST 0x00000001
#define WT_LOGSCAN_FROM_CKP 0x00000002
#define WT_LOGSCAN_ONE 0x00000004
@@ -83,24 +83,25 @@
#define WT_VERB_EVICT 0x00000010
#define WT_VERB_EVICTSERVER 0x00000020
#define WT_VERB_FILEOPS 0x00000040
-#define WT_VERB_LOG 0x00000080
-#define WT_VERB_LSM 0x00000100
-#define WT_VERB_LSM_MANAGER 0x00000200
-#define WT_VERB_METADATA 0x00000400
-#define WT_VERB_MUTEX 0x00000800
-#define WT_VERB_OVERFLOW 0x00001000
-#define WT_VERB_READ 0x00002000
-#define WT_VERB_REBALANCE 0x00004000
-#define WT_VERB_RECONCILE 0x00008000
-#define WT_VERB_RECOVERY 0x00010000
-#define WT_VERB_SALVAGE 0x00020000
-#define WT_VERB_SHARED_CACHE 0x00040000
-#define WT_VERB_SPLIT 0x00080000
-#define WT_VERB_TEMPORARY 0x00100000
-#define WT_VERB_TRANSACTION 0x00200000
-#define WT_VERB_VERIFY 0x00400000
-#define WT_VERB_VERSION 0x00800000
-#define WT_VERB_WRITE 0x01000000
+#define WT_VERB_HANDLEOPS 0x00000080
+#define WT_VERB_LOG 0x00000100
+#define WT_VERB_LSM 0x00000200
+#define WT_VERB_LSM_MANAGER 0x00000400
+#define WT_VERB_METADATA 0x00000800
+#define WT_VERB_MUTEX 0x00001000
+#define WT_VERB_OVERFLOW 0x00002000
+#define WT_VERB_READ 0x00004000
+#define WT_VERB_REBALANCE 0x00008000
+#define WT_VERB_RECONCILE 0x00010000
+#define WT_VERB_RECOVERY 0x00020000
+#define WT_VERB_SALVAGE 0x00040000
+#define WT_VERB_SHARED_CACHE 0x00080000
+#define WT_VERB_SPLIT 0x00100000
+#define WT_VERB_TEMPORARY 0x00200000
+#define WT_VERB_TRANSACTION 0x00400000
+#define WT_VERB_VERIFY 0x00800000
+#define WT_VERB_VERSION 0x01000000
+#define WT_VERB_WRITE 0x02000000
#define WT_VISIBILITY_ERR 0x00000010
/*
* flags section: END
diff --git a/src/include/misc.i b/src/include/misc.i
index 04376441340..a96ce405c89 100644
--- a/src/include/misc.i
+++ b/src/include/misc.i
@@ -65,8 +65,255 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...)
return (ret);
#else
WT_UNUSED(session);
- WT_UNUSED(fmt);
WT_UNUSED(flag);
+ WT_UNUSED(fmt);
return (0);
#endif
}
+
+/*
+ * __wt_dirlist --
+ * Get a list of files from a directory.
+ */
+static inline int
+__wt_dirlist(WT_SESSION_IMPL *session, const char *dir,
+ const char *prefix, uint32_t flags, char ***dirlist, u_int *countp)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+
+ WT_RET(__wt_verbose(session, WT_VERB_FILEOPS,
+ "%s: directory-list: %s prefix %s",
+ dir, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude",
+ prefix == NULL ? "all" : prefix));
+
+ return (S2C(session)->file_directory_list(
+ session, dir, prefix, flags, dirlist, countp));
+}
+
+/*
+ * __wt_directory_sync --
+ * Flush a directory to ensure file creation is durable.
+ */
+static inline int
+__wt_directory_sync(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(
+ session, WT_VERB_FILEOPS, "%s: directory-sync", name));
+
+ return (S2C(session)->file_directory_sync(session, name));
+}
+
+/*
+ * __wt_exist --
+ * Return if the file exists.
+ */
+static inline int
+__wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
+{
+ WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name));
+
+ return (S2C(session)->file_exist(session, name, existp));
+}
+
+/*
+ * __wt_remove --
+ * POSIX remove.
+ */
+static inline int
+__wt_remove(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name));
+
+ return (S2C(session)->file_remove(session, name));
+}
+
+/*
+ * __wt_rename --
+ * POSIX rename.
+ */
+static inline int
+__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(
+ session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to));
+
+ return (S2C(session)->file_rename(session, from, to));
+}
+
+/*
+ * __wt_filesize_name --
+ * Get the size of a file in bytes, by file name.
+ */
+static inline int
+__wt_filesize_name(
+ WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep)
+{
+ WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name));
+
+ return (S2C(session)->file_size(session, name, silent, sizep));
+}
+
+/*
+ * __wt_directory_sync_fh --
+ * Flush a directory file handle to ensure file creation is durable.
+ *
+ * We don't use the normal sync path because many file systems don't require
+ * this step and we don't want to penalize them.
+ */
+static inline int
+__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ return (fh->fh_sync(session, fh, true));
+}
+
+/*
+ * __wt_fallocate --
+ * Extend a file.
+ */
+static inline int
+__wt_fallocate(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-allocate: %" PRIuMAX " at %" PRIuMAX,
+ fh->name, (uintmax_t)len, (uintmax_t)offset));
+
+ return (fh->fh_allocate(session, fh, offset, len));
+}
+
+/*
+ * __wt_file_lock --
+ * Lock/unlock a file.
+ */
+static inline int
+__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock)
+{
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-lock: %s", fh->name, lock ? "lock" : "unlock"));
+
+ return (fh->fh_lock(session, fh, lock));
+}
+
+/*
+ * __wt_vfprintf --
+ * ANSI C vfprintf.
+ */
+static inline int
+__wt_vfprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
+{
+ WT_RET(__wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: handle-printf", fh->name));
+
+ return (fh->fh_printf(session, fh, fmt, ap));
+}
+
+/*
+ * __wt_fprintf --
+ * ANSI C fprintf.
+ */
+static inline int
+__wt_fprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
+{
+ WT_DECL_RET;
+ va_list ap;
+
+ va_start(ap, fmt);
+ ret = __wt_vfprintf(session, fh, fmt, ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+/*
+ * __wt_read --
+ * POSIX pread.
+ */
+static inline int
+__wt_read(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+{
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX,
+ fh->name, len, (uintmax_t)offset));
+
+ WT_STAT_FAST_CONN_INCR(session, read_io);
+
+ return (fh->fh_read(session, fh, offset, len, buf));
+}
+
+/*
+ * __wt_filesize --
+ * Get the size of a file in bytes, by file handle.
+ */
+static inline int
+__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+{
+ WT_RET(__wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->name));
+
+ return (fh->fh_size(session, fh, sizep));
+}
+
+/*
+ * __wt_fsync --
+ * POSIX fflush/fsync.
+ */
+static inline int
+__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->name));
+
+ return (fh->fh_sync(session, fh, block));
+}
+
+/*
+ * __wt_ftruncate --
+ * POSIX ftruncate.
+ */
+static inline int
+__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-truncate: %" PRIuMAX,
+ fh->name, (uintmax_t)len));
+
+ return (fh->fh_truncate(session, fh, len));
+}
+
+/*
+ * __wt_write --
+ * POSIX pwrite.
+ */
+static inline int
+__wt_write(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
+ WT_STRING_MATCH(fh->name,
+ WT_SINGLETHREAD, strlen(WT_SINGLETHREAD)));
+
+ WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX,
+ fh->name, len, (uintmax_t)offset));
+
+ WT_STAT_FAST_CONN_INCR(session, write_io);
+
+ return (fh->fh_write(session, fh, offset, len, buf));
+}
diff --git a/src/include/msvc.h b/src/include/msvc.h
index d5be5bd8c60..222c24c3bc6 100644
--- a/src/include/msvc.h
+++ b/src/include/msvc.h
@@ -17,6 +17,17 @@
#define WT_SIZET_FMT "Iu" /* size_t format string */
/*
+ * The Windows fadvise calls will return ENOTSUP, but the WiredTiger code
+ * currently uses POSIX flags in the API.
+ */
+#ifndef POSIX_FADV_DONTNEED
+#define POSIX_FADV_DONTNEED 0
+#endif
+#ifndef POSIX_FADV_WILLNEED
+#define POSIX_FADV_WILLNEED 0
+#endif
+
+/*
* Add MSVC-specific attributes and pragmas to types and function declarations.
*/
#define WT_COMPILER_TYPE_ALIGN(x) __declspec(align(x))
diff --git a/src/include/os.h b/src/include/os.h
index fbba7f05f88..5034b17511d 100644
--- a/src/include/os.h
+++ b/src/include/os.h
@@ -7,29 +7,6 @@
*/
/*
- * FILE handle close/open configuration.
- */
-typedef enum {
- WT_FHANDLE_APPEND, WT_FHANDLE_READ, WT_FHANDLE_WRITE
-} WT_FHANDLE_MODE;
-
-#ifdef _WIN32
-/*
- * Open in binary (untranslated) mode; translations involving carriage-return
- * and linefeed characters are suppressed.
- */
-#define WT_FOPEN_APPEND "ab"
-#define WT_FOPEN_READ "rb"
-#define WT_FOPEN_WRITE "wb"
-#else
-#define WT_FOPEN_APPEND "a"
-#define WT_FOPEN_READ "r"
-#define WT_FOPEN_WRITE "w"
-#endif
-
-#define WT_FOPEN_FIXED 0x1 /* Path isn't relative to home */
-
-/*
* Number of directory entries can grow dynamically.
*/
#define WT_DIR_ENTRY 32
@@ -81,24 +58,40 @@ typedef enum {
(t1).tv_nsec < (t2).tv_nsec ? -1 : \
(t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1)
+#define WT_OPEN_CREATE 0x001 /* Create is OK */
+#define WT_OPEN_EXCLUSIVE 0x002 /* Exclusive open */
+#define WT_OPEN_FIXED 0x004 /* Path isn't relative to home */
+#define WT_OPEN_READONLY 0x008 /* Readonly open */
+#define WT_STREAM_APPEND 0x010 /* Open a stream: append */
+#define WT_STREAM_LINE_BUFFER 0x010 /* Line buffer the stream */
+#define WT_STREAM_READ 0x020 /* Open a stream: read */
+#define WT_STREAM_WRITE 0x040 /* Open a stream: write */
+
struct __wt_fh {
- char *name; /* File name */
+ const char *name; /* File name */
uint64_t name_hash; /* Hash of name */
TAILQ_ENTRY(__wt_fh) q; /* List of open handles */
TAILQ_ENTRY(__wt_fh) hashq; /* Hashed list of handles */
u_int ref; /* Reference count */
-#ifndef _WIN32
- int fd; /* POSIX file handle */
-#else
+ /*
+ * Underlying file system handle support.
+ */
+#ifdef _WIN32
HANDLE filehandle; /* Windows file handle */
HANDLE filehandle_secondary; /* Windows file handle
for file size changes */
+#else
+ int fd; /* POSIX file handle */
#endif
- wt_off_t size; /* File size */
- wt_off_t extend_size; /* File extended size */
- wt_off_t extend_len; /* File extend chunk size */
+ FILE *fp; /* ANSI C stdio handle */
+
+ /*
+ * Underlying in-memory handle support.
+ */
+ size_t off; /* Read/write offset */
+ WT_ITEM buf; /* Data */
bool direct_io; /* O_DIRECT configured */
@@ -109,4 +102,26 @@ struct __wt_fh {
WT_FALLOCATE_STD,
WT_FALLOCATE_SYS } fallocate_available;
bool fallocate_requires_locking;
+
+#define WT_FH_FLUSH_ON_CLOSE 0x01 /* Flush when closing */
+#define WT_FH_IN_MEMORY 0x02 /* In-memory, don't remove */
+ uint32_t flags;
+
+ int (*fh_advise)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t, int);
+ int (*fh_allocate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t);
+ int (*fh_close)(WT_SESSION_IMPL *, WT_FH *);
+ int (*fh_getc)(WT_SESSION_IMPL *, WT_FH *, int *);
+ int (*fh_lock)(WT_SESSION_IMPL *, WT_FH *, bool);
+ int (*fh_map)(WT_SESSION_IMPL *, WT_FH *, void *, size_t *, void **);
+ int (*fh_map_discard)(WT_SESSION_IMPL *, WT_FH *, void *, size_t);
+ int (*fh_map_preload)(WT_SESSION_IMPL *, WT_FH *, const void *, size_t);
+ int (*fh_map_unmap)(
+ WT_SESSION_IMPL *, WT_FH *, void *, size_t, void **);
+ int (*fh_printf)(WT_SESSION_IMPL *, WT_FH *, const char *, va_list);
+ int (*fh_read)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, void *);
+ int (*fh_size)(WT_SESSION_IMPL *, WT_FH *, wt_off_t *);
+ int (*fh_sync)(WT_SESSION_IMPL *, WT_FH *, bool);
+ int (*fh_truncate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t);
+ int (*fh_write)(
+ WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, const void *);
};
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 1e263f22880..279858a808e 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -1922,8 +1922,8 @@ struct __wt_connection {
* as a list\, such as <code>"verbose=[evictserver\,read]"</code>., a
* list\, with values chosen from the following options: \c "api"\, \c
* "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c
- * "evictserver"\, \c "fileops"\, \c "log"\, \c "lsm"\, \c
- * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c
+ * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lsm"\,
+ * \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c
* "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c
* "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c
* "transaction"\, \c "verify"\, \c "version"\, \c "write"; default
@@ -2417,10 +2417,10 @@ struct __wt_connection {
* list\, such as <code>"verbose=[evictserver\,read]"</code>., a list\, with
* values chosen from the following options: \c "api"\, \c "block"\, \c
* "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\,
- * \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c
- * "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c
- * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c
- * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.}
+ * \c "handleops"\, \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c
+ * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c
+ * "recovery"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\,
+ * \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.}
* @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to
* files. Ignored on non-Windows systems. Options are given as a list\, such
* as <code>"write_through=[data]"</code>. Configuring \c write_through requires
diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h
index 4533c8cbca0..9e5007b38ed 100644
--- a/src/include/wt_internal.h
+++ b/src/include/wt_internal.h
@@ -349,15 +349,15 @@ union __wt_rand_state;
#include "extern.h"
#include "verify_build.h"
-#include "buf.i"
-#include "misc.i"
#include "intpack.i" /* required by cell.i, packing.i */
-#include "packing.i"
+
+#include "buf.i"
#include "cache.i" /* required by txn.i */
#include "cell.i" /* required by btree.i */
-
#include "log.i"
+#include "misc.i"
#include "mutex.i" /* required by btree.i */
+#include "packing.i"
#include "txn.i" /* required by btree.i */
#include "btree.i" /* required by cursor.i */
diff --git a/src/log/log.c b/src/log/log.c
index e41073299a8..1132b54f335 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -132,7 +132,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
log->log_fh->name, min_lsn->l.file, min_lsn->l.offset));
- WT_ERR(__wt_fsync(session, log->log_fh));
+ WT_ERR(__wt_fsync(session, log->log_fh, true));
log->sync_lsn = *min_lsn;
WT_STAT_FAST_CONN_INCR(session, log_sync);
WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
@@ -641,7 +641,7 @@ __log_file_header(
/*
* Make sure the header gets to disk.
*/
- WT_ERR(__wt_fsync(session, tmp.slot_fh));
+ WT_ERR(__wt_fsync(session, tmp.slot_fh, true));
if (end_lsn != NULL)
*end_lsn = tmp.slot_end_lsn;
@@ -655,7 +655,7 @@ err: __wt_scr_free(session, &buf);
*/
static int
__log_openfile(WT_SESSION_IMPL *session,
- bool ok_create, WT_FH **fh, const char *file_prefix, uint32_t id)
+ bool ok_create, WT_FH **fhp, const char *file_prefix, uint32_t id)
{
WT_DECL_ITEM(buf);
WT_DECL_RET;
@@ -673,8 +673,8 @@ __log_openfile(WT_SESSION_IMPL *session,
WT_ERR(__log_filename(session, id, file_prefix, buf));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"opening log %s", (const char *)buf->data));
- WT_ERR(__wt_open(
- session, buf->data, ok_create, false, WT_FILE_TYPE_LOG, fh));
+ WT_ERR(__wt_open(session, buf->data,
+ WT_FILE_TYPE_LOG, ok_create ? WT_OPEN_CREATE : 0, fhp));
/*
* If we are not creating the log file but opening it for reading,
* check that the magic number and versions are correct.
@@ -682,7 +682,7 @@ __log_openfile(WT_SESSION_IMPL *session,
if (!ok_create) {
WT_ERR(__wt_buf_grow(session, buf, allocsize));
memset(buf->mem, 0, allocsize);
- WT_ERR(__wt_read(session, *fh, 0, allocsize, buf->mem));
+ WT_ERR(__wt_read(session, *fhp, 0, allocsize, buf->mem));
logrec = (WT_LOG_RECORD *)buf->mem;
__wt_log_record_byteswap(logrec);
desc = (WT_LOG_DESC *)logrec->record;
@@ -690,7 +690,7 @@ __log_openfile(WT_SESSION_IMPL *session,
if (desc->log_magic != WT_LOG_MAGIC)
WT_PANIC_RET(session, WT_ERROR,
"log file %s corrupted: Bad magic number %" PRIu32,
- (*fh)->name, desc->log_magic);
+ (*fhp)->name, desc->log_magic);
if (desc->majorv > WT_LOG_MAJOR_VERSION ||
(desc->majorv == WT_LOG_MAJOR_VERSION &&
desc->minorv > WT_LOG_MINOR_VERSION))
@@ -724,8 +724,7 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num)
* If there are no pre-allocated files, return WT_NOTFOUND.
*/
logfiles = NULL;
- WT_ERR(__log_get_files(session,
- WT_LOG_PREPNAME, &logfiles, &logcount));
+ WT_ERR(__log_get_files(session, WT_LOG_PREPNAME, &logfiles, &logcount));
if (logcount == 0)
return (WT_NOTFOUND);
@@ -850,7 +849,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
* the LSNs since we're the only write in progress.
*/
if (conn_open) {
- WT_RET(__wt_fsync(session, log->log_fh));
+ WT_RET(__wt_fsync(session, log->log_fh, true));
log->sync_lsn = end_lsn;
log->write_lsn = end_lsn;
log->write_start_lsn = end_lsn;
@@ -946,7 +945,7 @@ __log_truncate(WT_SESSION_IMPL *session,
WT_ERR(__log_openfile(session,
false, &log_fh, file_prefix, lsn->l.file));
WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset));
- WT_ERR(__wt_fsync(session, log_fh));
+ WT_ERR(__wt_fsync(session, log_fh, true));
WT_ERR(__wt_close(session, &log_fh));
/*
@@ -955,8 +954,7 @@ __log_truncate(WT_SESSION_IMPL *session,
*/
if (this_log)
goto err;
- WT_ERR(__log_get_files(session,
- WT_LOG_FILENAME, &logfiles, &logcount));
+ WT_ERR(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount));
for (i = 0; i < logcount; i++) {
WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
if (lognum > lsn->l.file &&
@@ -969,7 +967,7 @@ __log_truncate(WT_SESSION_IMPL *session,
*/
WT_ERR(__wt_ftruncate(session,
log_fh, WT_LOG_FIRST_RECORD));
- WT_ERR(__wt_fsync(session, log_fh));
+ WT_ERR(__wt_fsync(session, log_fh, true));
WT_ERR(__wt_close(session, &log_fh));
}
}
@@ -1019,7 +1017,7 @@ __wt_log_allocfile(
WT_ERR(__log_file_header(session, log_fh, NULL, true));
WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD));
WT_ERR(__log_prealloc(session, log_fh));
- WT_ERR(__wt_fsync(session, log_fh));
+ WT_ERR(__wt_fsync(session, log_fh, true));
WT_ERR(__wt_close(session, &log_fh));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_prealloc: rename %s to %s",
@@ -1086,7 +1084,7 @@ __wt_log_open(WT_SESSION_IMPL *session)
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"log_open: open fh to directory %s", conn->log_path));
WT_RET(__wt_open(session, conn->log_path,
- false, false, WT_FILE_TYPE_DIRECTORY, &log->log_dir_fh));
+ WT_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh));
}
if (!F_ISSET(conn, WT_CONN_READONLY)) {
@@ -1175,14 +1173,14 @@ __wt_log_close(WT_SESSION_IMPL *session)
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"closing old log %s", log->log_close_fh->name));
if (!F_ISSET(conn, WT_CONN_READONLY))
- WT_RET(__wt_fsync(session, log->log_close_fh));
+ WT_RET(__wt_fsync(session, log->log_close_fh, true));
WT_RET(__wt_close(session, &log->log_close_fh));
}
if (log->log_fh != NULL) {
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"closing log %s", log->log_fh->name));
if (!F_ISSET(conn, WT_CONN_READONLY))
- WT_RET(__wt_fsync(session, log->log_fh));
+ WT_RET(__wt_fsync(session, log->log_fh, true));
WT_RET(__wt_close(session, &log->log_fh));
log->log_fh = NULL;
}
@@ -1206,18 +1204,18 @@ __wt_log_close(WT_SESSION_IMPL *session)
* file is zeroes.
*/
static int
-__log_has_hole(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, bool *hole)
+__log_has_hole(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t log_size, wt_off_t offset, bool *hole)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_LOG *log;
- wt_off_t log_size, off, remainder;
+ wt_off_t off, remainder;
size_t bufsz, rdlen;
char *buf, *zerobuf;
conn = S2C(session);
log = conn->log;
- log_size = fh->size;
remainder = log_size - offset;
*hole = false;
@@ -1415,7 +1413,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
log->log_fh->name,
sync_lsn.l.file, sync_lsn.l.offset));
WT_STAT_FAST_CONN_INCR(session, log_sync);
- WT_ERR(__wt_fsync(session, log->log_fh));
+ WT_ERR(__wt_fsync(session, log->log_fh, true));
log->sync_lsn = sync_lsn;
WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
}
@@ -1561,7 +1559,8 @@ advance:
* See if there is anything non-zero at the
* end of this log file.
*/
- WT_ERR(__log_has_hole(session, log_fh,
+ WT_ERR(__log_has_hole(
+ session, log_fh, log_size,
rd_lsn.l.offset, &partial_record));
/*
* If we read the last record, go to the next file.
@@ -1625,7 +1624,7 @@ advance:
*/
if (reclen == 0) {
WT_ERR(__log_has_hole(
- session, log_fh, rd_lsn.l.offset, &eol));
+ session, log_fh, log_size, rd_lsn.l.offset, &eol));
if (eol)
/* Found a hole. This LSN is the end. */
break;
diff --git a/src/log/log_auto.c b/src/log/log_auto.c
index 54df01d01ab..d4dab4e1a33 100644
--- a/src/log/log_auto.c
+++ b/src/log/log_auto.c
@@ -131,9 +131,8 @@ __wt_logop_col_put_unpack(
}
int
-__wt_logop_col_put_print(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- FILE *out, uint32_t flags)
+__wt_logop_col_put_print(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
WT_DECL_RET;
uint32_t fileid;
@@ -145,17 +144,18 @@ __wt_logop_col_put_print(
WT_RET(__wt_logop_col_put_unpack(
session, pp, end, &fileid, &recno, &value));
- WT_RET(__wt_fprintf(out, " \"optype\": \"col_put\",\n"));
- WT_ERR(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"optype\": \"col_put\",\n"));
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"fileid\": \"%" PRIu32 "\",\n", fileid));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"recno\": \"%" PRIu64 "\",\n", recno));
WT_ERR(__logrec_make_json_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"value\": \"%s\"", escaped));
if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
",\n \"value-hex\": \"%s\"", escaped));
}
@@ -204,9 +204,8 @@ __wt_logop_col_remove_unpack(
}
int
-__wt_logop_col_remove_print(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- FILE *out, uint32_t flags)
+__wt_logop_col_remove_print(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
uint32_t fileid;
uint64_t recno;
@@ -215,10 +214,11 @@ __wt_logop_col_remove_print(
WT_RET(__wt_logop_col_remove_unpack(
session, pp, end, &fileid, &recno));
- WT_RET(__wt_fprintf(out, " \"optype\": \"col_remove\",\n"));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"optype\": \"col_remove\",\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"fileid\": \"%" PRIu32 "\",\n", fileid));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"recno\": \"%" PRIu64 "\"", recno));
return (0);
}
@@ -264,9 +264,8 @@ __wt_logop_col_truncate_unpack(
}
int
-__wt_logop_col_truncate_print(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- FILE *out, uint32_t flags)
+__wt_logop_col_truncate_print(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
uint32_t fileid;
uint64_t start;
@@ -276,12 +275,13 @@ __wt_logop_col_truncate_print(
WT_RET(__wt_logop_col_truncate_unpack(
session, pp, end, &fileid, &start, &stop));
- WT_RET(__wt_fprintf(out, " \"optype\": \"col_truncate\",\n"));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"optype\": \"col_truncate\",\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"fileid\": \"%" PRIu32 "\",\n", fileid));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"start\": \"%" PRIu64 "\",\n", start));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"stop\": \"%" PRIu64 "\"", stop));
return (0);
}
@@ -327,9 +327,8 @@ __wt_logop_row_put_unpack(
}
int
-__wt_logop_row_put_print(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- FILE *out, uint32_t flags)
+__wt_logop_row_put_print(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
WT_DECL_RET;
uint32_t fileid;
@@ -341,23 +340,24 @@ __wt_logop_row_put_print(
WT_RET(__wt_logop_row_put_unpack(
session, pp, end, &fileid, &key, &value));
- WT_RET(__wt_fprintf(out, " \"optype\": \"row_put\",\n"));
- WT_ERR(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"optype\": \"row_put\",\n"));
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"fileid\": \"%" PRIu32 "\",\n", fileid));
WT_ERR(__logrec_make_json_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"key\": \"%s\",\n", escaped));
if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"key-hex\": \"%s\",\n", escaped));
}
WT_ERR(__logrec_make_json_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"value\": \"%s\"", escaped));
if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
",\n \"value-hex\": \"%s\"", escaped));
}
@@ -406,9 +406,8 @@ __wt_logop_row_remove_unpack(
}
int
-__wt_logop_row_remove_print(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- FILE *out, uint32_t flags)
+__wt_logop_row_remove_print(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
WT_DECL_RET;
uint32_t fileid;
@@ -419,15 +418,16 @@ __wt_logop_row_remove_print(
WT_RET(__wt_logop_row_remove_unpack(
session, pp, end, &fileid, &key));
- WT_RET(__wt_fprintf(out, " \"optype\": \"row_remove\",\n"));
- WT_ERR(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"optype\": \"row_remove\",\n"));
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"fileid\": \"%" PRIu32 "\",\n", fileid));
WT_ERR(__logrec_make_json_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"key\": \"%s\"", escaped));
if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
",\n \"key-hex\": \"%s\"", escaped));
}
@@ -476,9 +476,8 @@ __wt_logop_row_truncate_unpack(
}
int
-__wt_logop_row_truncate_print(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- FILE *out, uint32_t flags)
+__wt_logop_row_truncate_print(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
WT_DECL_RET;
uint32_t fileid;
@@ -491,26 +490,27 @@ __wt_logop_row_truncate_print(
WT_RET(__wt_logop_row_truncate_unpack(
session, pp, end, &fileid, &start, &stop, &mode));
- WT_RET(__wt_fprintf(out, " \"optype\": \"row_truncate\",\n"));
- WT_ERR(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"optype\": \"row_truncate\",\n"));
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"fileid\": \"%" PRIu32 "\",\n", fileid));
WT_ERR(__logrec_make_json_str(session, &escaped, &start));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"start\": \"%s\",\n", escaped));
if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
WT_ERR(__logrec_make_hex_str(session, &escaped, &start));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"start-hex\": \"%s\",\n", escaped));
}
WT_ERR(__logrec_make_json_str(session, &escaped, &stop));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"stop\": \"%s\",\n", escaped));
if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
WT_ERR(__logrec_make_hex_str(session, &escaped, &stop));
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"stop-hex\": \"%s\",\n", escaped));
}
- WT_ERR(__wt_fprintf(out,
+ WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"mode\": \"%" PRIu32 "\"", mode));
err: __wt_free(session, escaped);
@@ -518,9 +518,8 @@ err: __wt_free(session, escaped);
}
int
-__wt_txn_op_printlog(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- FILE *out, uint32_t flags)
+__wt_txn_op_printlog(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
uint32_t optype, opsize;
@@ -530,33 +529,27 @@ __wt_txn_op_printlog(
switch (optype) {
case WT_LOGOP_COL_PUT:
- WT_RET(__wt_logop_col_put_print(session, pp, end, out,
- flags));
+ WT_RET(__wt_logop_col_put_print(session, pp, end, flags));
break;
case WT_LOGOP_COL_REMOVE:
- WT_RET(__wt_logop_col_remove_print(session, pp, end, out,
- flags));
+ WT_RET(__wt_logop_col_remove_print(session, pp, end, flags));
break;
case WT_LOGOP_COL_TRUNCATE:
- WT_RET(__wt_logop_col_truncate_print(session, pp, end, out,
- flags));
+ WT_RET(__wt_logop_col_truncate_print(session, pp, end, flags));
break;
case WT_LOGOP_ROW_PUT:
- WT_RET(__wt_logop_row_put_print(session, pp, end, out,
- flags));
+ WT_RET(__wt_logop_row_put_print(session, pp, end, flags));
break;
case WT_LOGOP_ROW_REMOVE:
- WT_RET(__wt_logop_row_remove_print(session, pp, end, out,
- flags));
+ WT_RET(__wt_logop_row_remove_print(session, pp, end, flags));
break;
case WT_LOGOP_ROW_TRUNCATE:
- WT_RET(__wt_logop_row_truncate_print(session, pp, end, out,
- flags));
+ WT_RET(__wt_logop_row_truncate_print(session, pp, end, flags));
break;
WT_ILLEGAL_VALUE(session);
diff --git a/src/log/log_slot.c b/src/log/log_slot.c
index bcca212603a..b7efb1d9018 100644
--- a/src/log/log_slot.c
+++ b/src/log/log_slot.c
@@ -97,14 +97,12 @@ retry:
end_offset =
WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered;
slot->slot_end_lsn.l.offset += (uint32_t)end_offset;
- WT_STAT_FAST_CONN_INCRV(session,
- log_slot_consolidated, end_offset);
+ WT_STAT_FAST_CONN_INCRV(session, log_slot_consolidated, end_offset);
/*
* XXX Would like to change so one piece of code advances the LSN.
*/
log->alloc_lsn = slot->slot_end_lsn;
- WT_ASSERT(session,
- log->alloc_lsn.l.file >= log->write_lsn.l.file);
+ WT_ASSERT(session, log->alloc_lsn.l.file >= log->write_lsn.l.file);
return (0);
}
diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c
index 471bb65cac0..d958e733cbe 100644
--- a/src/meta/meta_turtle.c
+++ b/src/meta/meta_turtle.c
@@ -68,27 +68,27 @@ __metadata_init(WT_SESSION_IMPL *session)
static int
__metadata_load_hot_backup(WT_SESSION_IMPL *session)
{
- FILE *fp;
WT_DECL_ITEM(key);
WT_DECL_ITEM(value);
WT_DECL_RET;
+ WT_FH *fh;
bool exist;
/* Look for a hot backup file: if we find it, load it. */
WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist));
if (!exist)
return (0);
- WT_RET(__wt_fopen(session,
- WT_METADATA_BACKUP, WT_FHANDLE_READ, 0, &fp));
+ WT_RET(__wt_open(session, WT_METADATA_BACKUP,
+ WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh));
/* Read line pairs and load them into the metadata file. */
WT_ERR(__wt_scr_alloc(session, 512, &key));
WT_ERR(__wt_scr_alloc(session, 512, &value));
for (;;) {
- WT_ERR(__wt_getline(session, key, fp));
+ WT_ERR(__wt_getline(session, key, fh));
if (key->size == 0)
break;
- WT_ERR(__wt_getline(session, value, fp));
+ WT_ERR(__wt_getline(session, value, fh));
if (value->size == 0)
WT_ERR(__wt_illegal_value(session, WT_METADATA_BACKUP));
WT_ERR(__wt_metadata_update(session, key->data, value->data));
@@ -96,7 +96,7 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session)
F_SET(S2C(session), WT_CONN_WAS_BACKUP);
-err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_READ));
+err: WT_TRET(__wt_close(session, &fh));
__wt_scr_free(session, &key);
__wt_scr_free(session, &value);
return (ret);
@@ -240,9 +240,9 @@ err: __wt_free(session, metaconf);
int
__wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
{
- FILE *fp;
WT_DECL_ITEM(buf);
WT_DECL_RET;
+ WT_FH *fh;
bool exist, match;
*valuep = NULL;
@@ -257,20 +257,20 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
if (!exist)
return (strcmp(key, WT_METAFILE_URI) == 0 ?
__metadata_config(session, valuep) : WT_NOTFOUND);
- WT_RET(__wt_fopen(session,
- WT_METADATA_TURTLE, WT_FHANDLE_READ, 0, &fp));
+ WT_RET(__wt_open(session, WT_METADATA_TURTLE,
+ WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh));
/* Search for the key. */
WT_ERR(__wt_scr_alloc(session, 512, &buf));
for (match = false;;) {
- WT_ERR(__wt_getline(session, buf, fp));
+ WT_ERR(__wt_getline(session, buf, fh));
if (buf->size == 0)
WT_ERR(WT_NOTFOUND);
if (strcmp(key, buf->data) == 0)
match = true;
/* Key matched: read the subsequent line for the value. */
- WT_ERR(__wt_getline(session, buf, fp));
+ WT_ERR(__wt_getline(session, buf, fh));
if (buf->size == 0)
WT_ERR(__wt_illegal_value(session, WT_METADATA_TURTLE));
if (match)
@@ -280,7 +280,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
/* Copy the value for the caller. */
WT_ERR(__wt_strdup(session, buf->data, valuep));
-err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_READ));
+err: WT_TRET(__wt_close(session, &fh));
__wt_scr_free(session, &buf);
return (ret);
}
@@ -304,8 +304,8 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
* Create the turtle setup file: we currently re-write it from scratch
* every time.
*/
- WT_RET(__wt_open(session,
- WT_METADATA_TURTLE_SET, true, true, WT_FILE_TYPE_TURTLE, &fh));
+ WT_RET(__wt_open(session, WT_METADATA_TURTLE_SET,
+ WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh));
version = wiredtiger_version(&vmajor, &vminor, &vpatch);
WT_ERR(__wt_scr_alloc(session, 2 * 1024, &buf));
@@ -317,7 +317,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
WT_ERR(__wt_write(session, fh, 0, buf->size, buf->data));
/* Flush the handle and rename the file into place. */
- ret = __wt_fh_sync_and_rename(
+ ret = __wt_sync_handle_and_rename(
session, &fh, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE);
/* Close any file handle left open, remove any temporary file. */
diff --git a/src/support/filename.c b/src/os_common/filename.c
index 215f5b47997..83a1a985378 100644
--- a/src/support/filename.c
+++ b/src/os_common/filename.c
@@ -24,8 +24,6 @@ __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path)
* Build a file name in a scratch buffer. If the name is already an
* absolute path duplicate it, otherwise generate a path relative to the
* connection home directory.
- * Needs to work with a NULL session handle - since this is called via
- * the exists API which is used by the test utilities.
*/
int
__wt_nfilename(
@@ -36,6 +34,10 @@ __wt_nfilename(
*path = NULL;
+ /*
+ * Needs to work with a NULL session handle - since this is called via
+ * the exists API which is used by the test utilities.
+ */
if (session == NULL || __wt_absolute_path(name))
WT_RET(__wt_strndup(session, name, namelen, path));
else {
@@ -103,11 +105,11 @@ __wt_rename_and_sync_directory(
}
/*
- * __wt_fh_sync_and_rename --
- * Sync and close a file, and swap it into place.
+ * __wt_sync_handle_and_rename --
+ * Sync and close a handle, and swap it into place.
*/
int
-__wt_fh_sync_and_rename(
+__wt_sync_handle_and_rename(
WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to)
{
WT_DECL_RET;
@@ -117,7 +119,7 @@ __wt_fh_sync_and_rename(
*fhp = NULL;
/* Flush to disk and close the handle. */
- ret = __wt_fsync(session, fh);
+ ret = __wt_fsync(session, fh, true);
WT_TRET(__wt_close(session, &fh));
WT_RET(ret);
@@ -125,20 +127,68 @@ __wt_fh_sync_and_rename(
}
/*
- * __wt_sync_fp_and_rename --
- * Sync and close a file, and swap it into place.
+ * __wt_copy_and_sync --
+ * Copy a file safely; here to support the wt utility.
*/
int
-__wt_sync_fp_and_rename(
- WT_SESSION_IMPL *session, FILE **fpp, const char *from, const char *to)
+__wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to)
{
- FILE *fp;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_FH *ffh, *tfh;
+ WT_SESSION_IMPL *session;
+ size_t n;
+ wt_off_t offset, size;
+ char *buf;
- fp = *fpp;
- *fpp = NULL;
+ session = (WT_SESSION_IMPL *)wt_session;
+ ffh = tfh = NULL;
+ buf = NULL;
- /* Flush to disk and close the handle. */
- WT_RET(__wt_fclose(&fp, WT_FHANDLE_WRITE));
+ /*
+ * Remove the target file if it exists, then create a temporary file,
+ * copy the original into it and rename it into place. I don't think
+ * its necessary to remove the file, or create a copy and do a rename,
+ * it's likely safe to overwrite the backup file directly. I'm doing
+ * the remove and rename to insulate us from errors in other programs
+ * that might not detect a corrupted backup file; it's cheap insurance
+ * in a path where undetected failure is very bad.
+ */
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to));
- return (__wt_rename_and_sync_directory(session, from, to));
+ WT_ERR(__wt_remove_if_exists(session, to));
+ WT_ERR(__wt_remove_if_exists(session, tmp->data));
+
+ /* Open the from and temporary file handles. */
+ WT_ERR(__wt_open(session, from,
+ WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &ffh));
+ WT_ERR(__wt_open(session, tmp->data,
+ WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh));
+
+ /*
+ * Allocate a copy buffer. Don't use a scratch buffer, this thing is
+ * big, and we don't want it hanging around.
+ */
+#define WT_BACKUP_COPY_SIZE (128 * 1024)
+ WT_ERR(__wt_malloc(session, WT_BACKUP_COPY_SIZE, &buf));
+
+ /* Get the file's size, then copy the bytes. */
+ WT_ERR(__wt_filesize(session, ffh, &size));
+ for (offset = 0; size > 0; size -= n, offset += n) {
+ n = (size_t)WT_MIN(size, WT_BACKUP_COPY_SIZE);
+ WT_ERR(__wt_read(session, ffh, offset, n, buf));
+ WT_ERR(__wt_write(session, tfh, offset, n, buf));
+ }
+
+ /* Close the from handle, then swap the temporary file into place. */
+ WT_ERR(__wt_close(session, &ffh));
+ ret = __wt_sync_handle_and_rename(session, &tfh, tmp->data, to);
+
+err: WT_TRET(__wt_close(session, &ffh));
+ WT_TRET(__wt_close(session, &tfh));
+
+ __wt_free(session, buf);
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
diff --git a/src/os_posix/os_abort.c b/src/os_common/os_abort.c
index 034eedcfbf8..034eedcfbf8 100644
--- a/src/os_posix/os_abort.c
+++ b/src/os_common/os_abort.c
diff --git a/src/os_posix/os_alloc.c b/src/os_common/os_alloc.c
index cfc7b80450e..cfc7b80450e 100644
--- a/src/os_posix/os_alloc.c
+++ b/src/os_common/os_alloc.c
diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c
new file mode 100644
index 00000000000..749617b928a
--- /dev/null
+++ b/src/os_common/os_fhandle.c
@@ -0,0 +1,321 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_handle_search --
+ * Search for a matching handle.
+ */
+bool
+__wt_handle_search(WT_SESSION_IMPL *session, const char *name,
+ bool increment_ref, bool unlock, WT_FH *newfh, WT_FH **fhp)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_FH *fh;
+ uint64_t bucket, hash;
+ bool found;
+
+ if (fhp != NULL)
+ *fhp = NULL;
+
+ conn = S2C(session);
+ found = false;
+
+ hash = __wt_hash_city64(name, strlen(name));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+
+ __wt_spin_lock(session, &conn->fh_lock);
+
+ /*
+ * If we already have the file open, optionally increment the reference
+ * count and return a pointer.
+ */
+ TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
+ if (strcmp(name, fh->name) == 0) {
+ if (increment_ref)
+ ++fh->ref;
+ if (fhp != NULL)
+ *fhp = fh;
+ found = true;
+ break;
+ }
+
+ /* If we don't find a match, optionally add a new entry. */
+ if (!found && newfh != NULL) {
+ newfh->name_hash = hash;
+ WT_CONN_FILE_INSERT(conn, newfh, bucket);
+ (void)__wt_atomic_add32(&conn->open_file_count, 1);
+
+ if (increment_ref)
+ ++newfh->ref;
+ if (fhp != NULL)
+ *fhp = newfh;
+ }
+
+ /*
+ * Our caller may be operating on the handle itself, optionally leave
+ * the list locked.
+ */
+ if (unlock)
+ __wt_spin_unlock(session, &conn->fh_lock);
+
+ return (found);
+}
+
+/*
+ * __wt_handle_search_unlock --
+ * Release handle lock.
+ */
+void
+__wt_handle_search_unlock(WT_SESSION_IMPL *session)
+{
+ __wt_spin_unlock(session, &S2C(session)->fh_lock);
+}
+
+/*
+ * __open_verbose --
+ * Optionally output a verbose message on handle open.
+ */
+static inline int
+__open_verbose(WT_SESSION_IMPL *session,
+ const char *name, uint32_t file_type, uint32_t flags)
+{
+#ifdef HAVE_VERBOSE
+ WT_DECL_RET;
+ WT_DECL_ITEM(tmp);
+ const char *file_type_tag, *sep;
+
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS))
+ return (0);
+
+ /*
+ * It's useful to track file opens when debugging platforms, take some
+ * effort to output good tracking information.
+ */
+
+ switch (file_type) {
+ case WT_FILE_TYPE_CHECKPOINT:
+ file_type_tag = "checkpoint";
+ break;
+ case WT_FILE_TYPE_DATA:
+ file_type_tag = "data";
+ break;
+ case WT_FILE_TYPE_DIRECTORY:
+ file_type_tag = "directory";
+ break;
+ case WT_FILE_TYPE_LOG:
+ file_type_tag = "log";
+ break;
+ case WT_FILE_TYPE_REGULAR:
+ file_type_tag = "regular";
+ break;
+ default:
+ file_type_tag = "unknown open type";
+ break;
+ }
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ sep = " (";
+#define WT_OPEN_VERBOSE_FLAG(f, name) \
+ if (LF_ISSET(f)) { \
+ WT_ERR(__wt_buf_catfmt( \
+ session, tmp, "%s%s", sep, name)); \
+ sep = ", "; \
+ }
+
+ WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create");
+ WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive");
+ WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed");
+ WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly");
+ WT_OPEN_VERBOSE_FLAG(WT_STREAM_APPEND, "stream-append");
+ WT_OPEN_VERBOSE_FLAG(WT_STREAM_READ, "stream-read");
+ WT_OPEN_VERBOSE_FLAG(WT_STREAM_WRITE, "stream-write");
+
+ if (tmp->size != 0)
+ WT_ERR(__wt_buf_catfmt(session, tmp, ")"));
+
+ ret = __wt_verbose(session, WT_VERB_FILEOPS,
+ "%s: handle-open: type %s%s",
+ name, file_type_tag, tmp->size == 0 ? "" : (char *)tmp->data);
+
+err: __wt_scr_free(session, &tmp);
+ return (ret);
+#else
+ WT_UNUSED(session);
+ WT_UNUSED(name);
+ WT_UNUSED(file_type);
+ WT_UNUSED(flags);
+ return (0);
+#endif
+}
+
+/*
+ * __wt_open --
+ * Open a file handle.
+ */
+int
+__wt_open(WT_SESSION_IMPL *session,
+ const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *fh;
+ bool lock_file, open_called;
+ char *path;
+
+ WT_ASSERT(session, file_type != 0); /* A file type is required. */
+
+ conn = S2C(session);
+ fh = NULL;
+ open_called = false;
+ path = NULL;
+
+ WT_RET(__open_verbose(session, name, file_type, flags));
+
+ /* Check if the handle is already open. */
+ if (__wt_handle_search(session, name, true, true, NULL, &fh)) {
+ /*
+ * XXX
+ * The in-memory implementation has to reset the file offset
+ * when a file is re-opened (which obviously also depends on
+ * in-memory configurations never opening a file in more than
+ * one thread at a time). This needs to be fixed.
+ */
+ if (F_ISSET(fh, WT_FH_IN_MEMORY) && fh->ref == 1)
+ fh->off = 0;
+ *fhp = fh;
+ return (0);
+ }
+
+ /* Allocate a structure and set the name. */
+ WT_ERR(__wt_calloc_one(session, &fh));
+ WT_ERR(__wt_strdup(session, name, &fh->name));
+
+ /*
+ * If this is a read-only connection, open all files read-only except
+ * the lock file.
+ *
+ * The only file created in read-only mode is the lock file.
+ */
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ lock_file = strcmp(name, WT_SINGLETHREAD) == 0;
+ if (!lock_file)
+ LF_SET(WT_OPEN_READONLY);
+ WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE));
+ }
+
+ /* Create the path to the file. */
+ if (!LF_ISSET(WT_OPEN_FIXED))
+ WT_ERR(__wt_filename(session, name, &path));
+
+ /* Call the underlying open function. */
+ WT_ERR(conn->handle_open(
+ session, fh, path == NULL ? name : path, file_type, flags));
+ open_called = true;
+
+ /*
+ * Repeat the check for a match: if there's no match, link our newly
+ * created handle onto the database's list of files.
+ */
+ if (__wt_handle_search(session, name, true, true, fh, fhp)) {
+err: if (open_called)
+ WT_TRET(fh->fh_close(session, fh));
+ if (fh != NULL) {
+ __wt_free(session, fh->name);
+ __wt_free(session, fh);
+ }
+ }
+
+ __wt_free(session, path);
+ return (ret);
+}
+
+/*
+ * __wt_close --
+ * Close a file handle.
+ */
+int
+__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *fh;
+ uint64_t bucket;
+
+ conn = S2C(session);
+
+ if (*fhp == NULL)
+ return (0);
+ fh = *fhp;
+ *fhp = NULL;
+
+ /* Track handle-close as a file operation, so open and close match. */
+ WT_RET(__wt_verbose(
+ session, WT_VERB_FILEOPS, "%s: handle-close", fh->name));
+
+ /*
+ * If the reference count hasn't gone to 0, or if it's an in-memory
+ * object, we're done.
+ *
+ * Assert the reference count is correct, but don't let it wrap.
+ */
+ __wt_spin_lock(session, &conn->fh_lock);
+ WT_ASSERT(session, fh->ref > 0);
+ if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) {
+ __wt_spin_unlock(session, &conn->fh_lock);
+ return (0);
+ }
+
+ /* Remove from the list. */
+ bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_CONN_FILE_REMOVE(conn, fh, bucket);
+ (void)__wt_atomic_sub32(&conn->open_file_count, 1);
+
+ __wt_spin_unlock(session, &conn->fh_lock);
+
+ /* Discard underlying resources. */
+ ret = fh->fh_close(session, fh);
+
+ __wt_free(session, fh->name);
+ __wt_free(session, fh);
+
+ return (ret);
+}
+
+/*
+ * __wt_close_connection_close --
+ * Close any open file handles at connection close.
+ */
+int
+__wt_close_connection_close(WT_SESSION_IMPL *session)
+{
+ WT_DECL_RET;
+ WT_FH *fh;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) {
+ /*
+ * In-memory configurations will have open files, but the ref
+ * counts should be zero.
+ */
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY) || fh->ref != 0) {
+ ret = EBUSY;
+ __wt_errx(session,
+ "Connection has open file handles: %s", fh->name);
+ }
+
+ fh->ref = 1;
+ F_CLR(fh, WT_FH_IN_MEMORY);
+
+ WT_TRET(__wt_close(session, &fh));
+ }
+ return (ret);
+}
diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c
new file mode 100644
index 00000000000..e79054e56ed
--- /dev/null
+++ b/src/os_common/os_fs_inmemory.c
@@ -0,0 +1,466 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * In-memory information.
+ */
+typedef struct {
+ WT_SPINLOCK lock;
+} WT_IM;
+
+/*
+ * __im_directory_list --
+ * Get a list of files from a directory, in-memory version.
+ */
+static int
+__im_directory_list(WT_SESSION_IMPL *session, const char *dir,
+ const char *prefix, uint32_t flags, char ***dirlist, u_int *countp)
+{
+ WT_UNUSED(session);
+ WT_UNUSED(dir);
+ WT_UNUSED(prefix);
+ WT_UNUSED(flags);
+ WT_UNUSED(dirlist);
+ WT_UNUSED(countp);
+
+ WT_RET_MSG(session, ENOTSUP, "directory-list");
+}
+
+/*
+ * __im_directory_sync --
+ * Flush a directory to ensure file creation is durable.
+ */
+static int
+__im_directory_sync(WT_SESSION_IMPL *session, const char *path)
+{
+ WT_UNUSED(session);
+ WT_UNUSED(path);
+ return (0);
+}
+
+/*
+ * __im_file_exist --
+ * Return if the file exists.
+ */
+static int
+__im_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
+{
+ *existp = __wt_handle_search(session, name, false, true, NULL, NULL);
+ return (0);
+}
+
+/*
+ * __im_file_remove --
+ * POSIX remove.
+ */
+static int
+__im_file_remove(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_DECL_RET;
+ WT_FH *fh;
+
+ if (__wt_handle_search(session, name, true, true, NULL, &fh)) {
+ WT_ASSERT(session, fh->ref == 1);
+
+ /* Force a discard of the handle. */
+ F_CLR(fh, WT_FH_IN_MEMORY);
+ ret = __wt_close(session, &fh);
+ }
+ return (ret);
+}
+
+/*
+ * __im_file_rename --
+ * POSIX rename.
+ */
+static int
+__im_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *fh;
+ uint64_t bucket, hash;
+ char *to_name;
+
+ conn = S2C(session);
+
+ /* We'll need a copy of the target name. */
+ WT_RET(__wt_strdup(session, to, &to_name));
+
+ __wt_spin_lock(session, &conn->fh_lock);
+
+ /* Make sure the target name isn't active. */
+ hash = __wt_hash_city64(to, strlen(to));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+ TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
+ if (strcmp(to, fh->name) == 0)
+ WT_ERR(EPERM);
+
+ /* Find the source name. */
+ hash = __wt_hash_city64(from, strlen(from));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+ TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
+ if (strcmp(from, fh->name) == 0)
+ break;
+ if (fh == NULL)
+ WT_ERR(ENOENT);
+
+ /* Remove source from the list. */
+ WT_CONN_FILE_REMOVE(conn, fh, bucket);
+
+ /* Swap the names. */
+ __wt_free(session, fh->name);
+ fh->name = to_name;
+ to_name = NULL;
+
+ /* Put source back on the list. */
+ hash = __wt_hash_city64(to, strlen(to));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+ WT_CONN_FILE_INSERT(conn, fh, bucket);
+
+ if (0) {
+err: __wt_free(session, to_name);
+ }
+ __wt_spin_unlock(session, &conn->fh_lock);
+
+ return (ret);
+}
+
+/*
+ * __im_file_size --
+ * Get the size of a file in bytes, by file name.
+ */
+static int
+__im_file_size(
+ WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep)
+{
+ WT_DECL_RET;
+ WT_FH *fh;
+ WT_IM *im;
+
+ WT_UNUSED(silent);
+
+ im = S2C(session)->inmemory;
+ __wt_spin_lock(session, &im->lock);
+
+ if (__wt_handle_search(session, name, false, false, NULL, &fh)) {
+ *sizep = (wt_off_t)fh->buf.size;
+ __wt_handle_search_unlock(session);
+ } else
+ ret = ENOENT;
+
+ __wt_spin_unlock(session, &im->lock);
+ return (ret);
+}
+
+/*
+ * __im_handle_advise --
+ * POSIX fadvise.
+ */
+static int
+__im_handle_advise(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, wt_off_t len, int advice)
+{
+ WT_UNUSED(session);
+ WT_UNUSED(fh);
+ WT_UNUSED(offset);
+ WT_UNUSED(len);
+ WT_UNUSED(advice);
+ return (ENOTSUP);
+}
+
+/*
+ * __im_handle_close --
+ * ANSI C close/fclose.
+ */
+static int
+__im_handle_close(WT_SESSION_IMPL *session, WT_FH *fh)
+{
+ __wt_buf_free(session, &fh->buf);
+
+ return (0);
+}
+
+/*
+ * __im_handle_getc --
+ * ANSI C fgetc.
+ */
+static int
+__im_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp)
+{
+ WT_IM *im;
+
+ im = S2C(session)->inmemory;
+ __wt_spin_lock(session, &im->lock);
+
+ if (fh->off >= fh->buf.size)
+ *chp = EOF;
+ else
+ *chp = ((char *)fh->buf.data)[fh->off++];
+
+ __wt_spin_unlock(session, &im->lock);
+ return (0);
+}
+
+/*
+ * __im_handle_lock --
+ * Lock/unlock a file.
+ */
+static int
+__im_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
+{
+ WT_UNUSED(session);
+ WT_UNUSED(fh);
+ WT_UNUSED(lock);
+ return (0);
+}
+
+/*
+ * __im_handle_printf --
+ * ANSI C vfprintf.
+ */
+static int
+__im_handle_printf(
+ WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
+{
+ va_list ap_copy;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_IM *im;
+ size_t len;
+
+ im = S2C(session)->inmemory;
+
+ /* Build the string we're writing. */
+ WT_RET(__wt_scr_alloc(session, strlen(fmt) * 2 + 128, &tmp));
+ for (;;) {
+ va_copy(ap_copy, ap);
+ len = (size_t)vsnprintf(tmp->mem, tmp->memsize, fmt, ap_copy);
+ if (len < tmp->memsize) {
+ tmp->data = tmp->mem;
+ tmp->size = len;
+ break;
+ }
+ WT_ERR(__wt_buf_extend(session, tmp, len + 1));
+ }
+
+ __wt_spin_lock(session, &im->lock);
+
+ /* Grow the handle's buffer as necessary. */
+ WT_ERR(__wt_buf_grow(session, &fh->buf, fh->off + len));
+
+ /* Copy the data into place and update the offset. */
+ memcpy((uint8_t *)fh->buf.mem + fh->off, tmp->data, len);
+ fh->off += len;
+
+err: __wt_spin_unlock(session, &im->lock);
+
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
+ * __im_handle_read --
+ * POSIX pread.
+ */
+static int
+__im_handle_read(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+{
+ WT_DECL_RET;
+ WT_IM *im;
+ size_t off;
+
+ im = S2C(session)->inmemory;
+ __wt_spin_lock(session, &im->lock);
+
+ off = (size_t)offset;
+ if (off < fh->buf.size) {
+ len = WT_MIN(len, fh->buf.size - off);
+ memcpy(buf, (uint8_t *)fh->buf.mem + off, len);
+ fh->off = off + len;
+ } else
+ ret = WT_ERROR;
+
+ __wt_spin_unlock(session, &im->lock);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, WT_ERROR,
+ "%s: handle-read: failed to read %" WT_SIZET_FMT " bytes at "
+ "offset %" WT_SIZET_FMT,
+ fh->name, len, off);
+}
+
+/*
+ * __im_handle_size --
+ * Get the size of a file in bytes, by file handle.
+ */
+static int
+__im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+{
+ WT_UNUSED(session);
+
+ *sizep = (wt_off_t)fh->buf.size;
+ return (0);
+}
+
+/*
+ * __im_handle_sync --
+ * POSIX fflush/fsync.
+ */
+static int
+__im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+{
+ WT_UNUSED(session);
+ WT_UNUSED(fh);
+
+ /*
+ * Callers attempting asynchronous flush handle ENOTSUP returns, and
+ * won't make further attempts.
+ */
+ return (block ? 0 : ENOTSUP);
+}
+
+/*
+ * __im_handle_truncate --
+ * POSIX ftruncate.
+ */
+static int
+__im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+{
+ WT_DECL_RET;
+ WT_IM *im;
+
+ im = S2C(session)->inmemory;
+ __wt_spin_lock(session, &im->lock);
+
+ WT_ERR(__wt_buf_grow(session, &fh->buf, (size_t)len));
+ memset((uint8_t *)
+ fh->buf.mem + fh->buf.size, 0, fh->buf.memsize - fh->buf.size);
+
+err: __wt_spin_unlock(session, &im->lock);
+ return (ret);
+}
+
+/*
+ * __im_handle_write --
+ * POSIX pwrite.
+ */
+static int
+__im_handle_write(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+{
+ WT_DECL_RET;
+ WT_IM *im;
+ size_t off;
+
+ im = S2C(session)->inmemory;
+ __wt_spin_lock(session, &im->lock);
+
+ off = (size_t)offset;
+ WT_ERR(__wt_buf_grow(session, &fh->buf, off + len + 1024));
+
+ memcpy((uint8_t *)fh->buf.data + off, buf, len);
+ if (off + len > fh->buf.size)
+ fh->buf.size = off + len;
+ fh->off = off + len;
+
+err: __wt_spin_unlock(session, &im->lock);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret,
+ "%s: handle-write: failed to write %" WT_SIZET_FMT " bytes at "
+ "offset %" WT_SIZET_FMT,
+ fh->name, len, off);
+}
+
+/*
+ * __im_handle_open --
+ * POSIX fopen/open.
+ */
+static int
+__im_handle_open(WT_SESSION_IMPL *session,
+ WT_FH *fh, const char *path, uint32_t file_type, uint32_t flags)
+{
+ WT_UNUSED(session);
+ WT_UNUSED(path);
+ WT_UNUSED(file_type);
+ WT_UNUSED(flags);
+
+ fh->off = 0;
+ F_SET(fh, WT_FH_IN_MEMORY);
+
+ fh->fh_advise = __im_handle_advise;
+ fh->fh_close = __im_handle_close;
+ fh->fh_getc = __im_handle_getc;
+ fh->fh_lock = __im_handle_lock;
+ fh->fh_printf = __im_handle_printf;
+ fh->fh_read = __im_handle_read;
+ fh->fh_size = __im_handle_size;
+ fh->fh_sync = __im_handle_sync;
+ fh->fh_truncate = __im_handle_truncate;
+ fh->fh_write = __im_handle_write;
+
+ return (0);
+}
+
+/*
+ * __wt_os_inmemory --
+ * Initialize an in-memory configuration.
+ */
+int
+__wt_os_inmemory(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_IM *im;
+
+ conn = S2C(session);
+ im = NULL;
+
+ /* Initialize the in-memory jump table. */
+ conn->file_directory_list = __im_directory_list;
+ conn->file_directory_sync = __im_directory_sync;
+ conn->file_exist = __im_file_exist;
+ conn->file_remove = __im_file_remove;
+ conn->file_rename = __im_file_rename;
+ conn->file_size = __im_file_size;
+ conn->handle_open = __im_handle_open;
+
+ /* Allocate an in-memory structure. */
+ WT_RET(__wt_calloc_one(session, &im));
+ WT_ERR(__wt_spin_init(session, &im->lock, "in-memory I/O"));
+ conn->inmemory = im;
+
+ return (0);
+
+err: __wt_free(session, im);
+ return (ret);
+}
+
+/*
+ * __wt_os_inmemory_cleanup --
+ * Discard an in-memory configuration.
+ */
+int
+__wt_os_inmemory_cleanup(WT_SESSION_IMPL *session)
+{
+ WT_DECL_RET;
+ WT_IM *im;
+
+ if ((im = S2C(session)->inmemory) == NULL)
+ return (0);
+ S2C(session)->inmemory = NULL;
+
+ __wt_spin_destroy(session, &im->lock);
+ __wt_free(session, im);
+
+ return (ret);
+}
diff --git a/src/os_common/os_fs_stdio.c b/src/os_common/os_fs_stdio.c
new file mode 100644
index 00000000000..9baba9b6945
--- /dev/null
+++ b/src/os_common/os_fs_stdio.c
@@ -0,0 +1,239 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __stdio_handle_advise --
+ * POSIX fadvise.
+ */
+static int
+__stdio_handle_advise(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, wt_off_t len, int advice)
+{
+ WT_UNUSED(offset);
+ WT_UNUSED(len);
+ WT_UNUSED(advice);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-advise", fh->name);
+}
+
+/*
+ * __stdio_handle_allocate --
+ * POSIX fallocate.
+ */
+static int
+__stdio_handle_allocate(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
+{
+ WT_UNUSED(offset);
+ WT_UNUSED(len);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name);
+}
+
+/*
+ * __stdio_handle_close --
+ * ANSI C close/fclose.
+ */
+static int
+__stdio_handle_close(WT_SESSION_IMPL *session, WT_FH *fh)
+{
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-close", fh->name);
+}
+
+/*
+ * __stdio_handle_getc --
+ * ANSI C fgetc.
+ */
+static int
+__stdio_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp)
+{
+ WT_UNUSED(chp);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-getc", fh->name);
+}
+
+/*
+ * __stdio_handle_lock --
+ * Lock/unlock a file.
+ */
+static int
+__stdio_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
+{
+ WT_UNUSED(lock);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-lock", fh->name);
+}
+
+/*
+ * __stdio_handle_map --
+ * Map a file.
+ */
+static int
+__stdio_handle_map(WT_SESSION_IMPL *session,
+ WT_FH *fh, void *p, size_t *lenp, void **mappingcookie)
+{
+ WT_UNUSED(p);
+ WT_UNUSED(lenp);
+ WT_UNUSED(mappingcookie);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-map", fh->name);
+}
+
+/*
+ * __stdio_handle_map_discard --
+ * Discard a section of a mapped region.
+ */
+static int
+__stdio_handle_map_discard(
+ WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len)
+{
+ WT_UNUSED(p);
+ WT_UNUSED(len);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-map-discard", fh->name);
+}
+
+/*
+ * __stdio_handle_map_preload --
+ * Preload a section of a mapped region.
+ */
+static int
+__stdio_handle_map_preload(
+ WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t len)
+{
+ WT_UNUSED(p);
+ WT_UNUSED(len);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-map-preload", fh->name);
+}
+
+/*
+ * __stdio_handle_map_unmap --
+ * Unmap a file.
+ */
+static int
+__stdio_handle_map_unmap(WT_SESSION_IMPL *session,
+ WT_FH *fh, void *p, size_t len, void **mappingcookie)
+{
+ WT_UNUSED(p);
+ WT_UNUSED(len);
+ WT_UNUSED(mappingcookie);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-map-unmap", fh->name);
+}
+
+/*
+ * __stdio_handle_printf --
+ * ANSI C vfprintf.
+ */
+static int
+__stdio_handle_printf(
+ WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
+{
+ if (vfprintf(fh->fp, fmt, ap) >= 0)
+ return (0);
+ WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name);
+}
+
+/*
+ * __stdio_handle_read --
+ * POSIX pread.
+ */
+static int
+__stdio_handle_read(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+{
+ WT_UNUSED(offset);
+ WT_UNUSED(len);
+ WT_UNUSED(buf);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-read", fh->name);
+}
+
+/*
+ * __stdio_handle_size --
+ * Get the size of a file in bytes, by file handle.
+ */
+static int
+__stdio_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+{
+ WT_UNUSED(sizep);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-size", fh->name);
+}
+
+/*
+ * __stdio_handle_sync --
+ * POSIX fflush/fsync.
+ */
+static int
+__stdio_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+{
+ WT_UNUSED(block);
+
+ if (fflush(fh->fp) == 0)
+ return (0);
+ WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name);
+}
+
+/*
+ * __stdio_handle_truncate --
+ * POSIX ftruncate.
+ */
+static int
+__stdio_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+{
+ WT_UNUSED(len);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-truncate", fh->name);
+}
+
+/*
+ * __stdio_handle_write --
+ * POSIX pwrite.
+ */
+static int
+__stdio_handle_write(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+{
+ WT_UNUSED(offset);
+ WT_UNUSED(len);
+ WT_UNUSED(buf);
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-write", fh->name);
+}
+
+/*
+ * __stdio_func_init --
+ * Initialize stdio functions.
+ */
+static void
+__stdio_func_init(WT_FH *fh, const char *name, FILE *fp)
+{
+ fh->name = name;
+ fh->fp = fp;
+
+ fh->fh_advise = __stdio_handle_advise;
+ fh->fh_allocate = __stdio_handle_allocate;
+ fh->fh_close = __stdio_handle_close;
+ fh->fh_getc = __stdio_handle_getc;
+ fh->fh_lock = __stdio_handle_lock;
+ fh->fh_map = __stdio_handle_map;
+ fh->fh_map_discard = __stdio_handle_map_discard;
+ fh->fh_map_preload = __stdio_handle_map_preload;
+ fh->fh_map_unmap = __stdio_handle_map_unmap;
+ fh->fh_printf = __stdio_handle_printf;
+ fh->fh_read = __stdio_handle_read;
+ fh->fh_size = __stdio_handle_size;
+ fh->fh_sync = __stdio_handle_sync;
+ fh->fh_truncate = __stdio_handle_truncate;
+ fh->fh_write = __stdio_handle_write;
+}
+
+/*
+ * __wt_os_stdio --
+ * Initialize the stdio configuration.
+ */
+int
+__wt_os_stdio(WT_SESSION_IMPL *session)
+{
+ __stdio_func_init(WT_STDERR(session), "stderr", stderr);
+ __stdio_func_init(WT_STDOUT(session), "stdout", stdout);
+
+ return (0);
+}
diff --git a/src/os_posix/os_getline.c b/src/os_common/os_getline.c
index c0ca96852de..01e11581edf 100644
--- a/src/os_posix/os_getline.c
+++ b/src/os_common/os_getline.c
@@ -20,7 +20,7 @@
* (so the caller's EOF marker is a returned line length of 0).
*/
int
-__wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp)
+__wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh)
{
int c;
@@ -30,7 +30,11 @@ __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp)
*/
WT_RET(__wt_buf_init(session, buf, 100));
- while ((c = fgetc(fp)) != EOF) {
+ for (;;) {
+ WT_RET(fh->fh_getc(session, fh, &c));
+ if (c == EOF)
+ break;
+
/* Leave space for a trailing NUL. */
WT_RET(__wt_buf_extend(session, buf, buf->size + 2));
if (c == '\n') {
@@ -40,8 +44,6 @@ __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp)
}
((char *)buf->mem)[buf->size++] = (char)c;
}
- if (c == EOF && ferror(fp))
- WT_RET_MSG(session, __wt_errno(), "file read");
((char *)buf->mem)[buf->size] = '\0';
diff --git a/src/os_posix/os_getopt.c b/src/os_common/os_getopt.c
index 0306ad1d79d..0306ad1d79d 100644
--- a/src/os_posix/os_getopt.c
+++ b/src/os_common/os_getopt.c
diff --git a/src/os_common/os_init.c b/src/os_common/os_init.c
new file mode 100644
index 00000000000..512216c52a5
--- /dev/null
+++ b/src/os_common/os_init.c
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_os_init --
+ * Initialize the OS layer.
+ */
+int
+__wt_os_init(WT_SESSION_IMPL *session)
+{
+ return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ?
+ __wt_os_inmemory(session) :
+#if defined(_MSC_VER)
+ __wt_os_win(session));
+#else
+ __wt_os_posix(session));
+#endif
+}
+
+/*
+ * __wt_os_cleanup --
+ * Clean up the OS layer.
+ */
+int
+__wt_os_cleanup(WT_SESSION_IMPL *session)
+{
+ return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ?
+ __wt_os_inmemory_cleanup(session) :
+#if defined(_MSC_VER)
+ __wt_os_win_cleanup(session));
+#else
+ __wt_os_posix_cleanup(session));
+#endif
+}
diff --git a/src/os_posix/os_strtouq.c b/src/os_common/os_strtouq.c
index 0ae604fc761..0ae604fc761 100644
--- a/src/os_posix/os_strtouq.c
+++ b/src/os_common/os_strtouq.c
diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c
index 83e77aa5312..78ae5f8edd4 100644
--- a/src/os_posix/os_dir.c
+++ b/src/os_posix/os_dir.c
@@ -11,13 +11,12 @@
#include <dirent.h>
/*
- * __wt_dirlist --
- * Get a list of files from a directory, optionally filtered by
- * a given prefix.
+ * __wt_posix_directory_list --
+ * Get a list of files from a directory, POSIX version.
*/
int
-__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix,
- uint32_t flags, char ***dirlist, u_int *countp)
+__wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir,
+ const char *prefix, uint32_t flags, char ***dirlist, u_int *countp)
{
struct dirent *dp;
DIR *dirp;
@@ -36,24 +35,20 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix,
dirallocsz = 0;
dirsz = 0;
entries = NULL;
- if (flags == 0)
- LF_SET(WT_DIRLIST_INCLUDE);
-
- WT_ERR(__wt_verbose(session, WT_VERB_FILEOPS,
- "wt_dirlist of %s %s prefix %s",
- path, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude",
- prefix == NULL ? "all" : prefix));
WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? 1 : 0), ret);
if (ret != 0)
- WT_ERR_MSG(session, ret, "%s: opendir", path);
- for (dirsz = 0, count = 0; (dp = readdir(dirp)) != NULL;) {
+ WT_ERR_MSG(session, ret, "%s: directory-list: opendir", path);
+
+ for (count = 0; (dp = readdir(dirp)) != NULL;) {
/*
* Skip . and ..
*/
if (strcmp(dp->d_name, ".") == 0 ||
strcmp(dp->d_name, "..") == 0)
continue;
+
+ /* The list of files is optionally filtered by a prefix. */
match = false;
if (prefix != NULL &&
((LF_ISSET(WT_DIRLIST_INCLUDE) &&
@@ -78,8 +73,8 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix,
if (count > 0)
*dirlist = entries;
*countp = count;
-err:
- if (dirp != NULL)
+
+err: if (dirp != NULL)
(void)closedir(dirp);
__wt_free(session, path);
@@ -91,5 +86,7 @@ err:
__wt_free(session, entries[count]);
__wt_free(session, entries);
}
- WT_RET_MSG(session, ret, "dirlist %s prefix %s", dir, prefix);
+ WT_RET_MSG(session, ret,
+ "%s: directory-list, prefix \"%s\"",
+ dir, prefix == NULL ? "" : prefix);
}
diff --git a/src/os_posix/os_exist.c b/src/os_posix/os_exist.c
deleted file mode 100644
index 87f0e219d2e..00000000000
--- a/src/os_posix/os_exist.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_exist --
- * Return if the file exists.
- */
-int
-__wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp)
-{
- struct stat sb;
- WT_DECL_RET;
- char *path;
-
- *existp = false;
-
- WT_RET(__wt_filename(session, filename, &path));
-
- WT_SYSCALL_RETRY(stat(path, &sb), ret);
-
- __wt_free(session, path);
-
- if (ret == 0) {
- *existp = true;
- return (0);
- }
- if (ret == ENOENT)
- return (0);
-
- WT_RET_MSG(session, ret, "%s: fstat", filename);
-}
diff --git a/src/os_posix/os_fallocate.c b/src/os_posix/os_fallocate.c
index bf20a99bdef..22879d36182 100644
--- a/src/os_posix/os_fallocate.c
+++ b/src/os_posix/os_fallocate.c
@@ -13,11 +13,11 @@
#include <sys/syscall.h>
#endif
/*
- * __wt_fallocate_config --
- * Configure file-extension behavior for a file handle.
+ * __wt_posix_handle_allocate_configure --
+ * Configure POSIX file-extension behavior for a file handle.
*/
void
-__wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh)
+__wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh)
{
WT_UNUSED(session);
@@ -40,11 +40,11 @@ __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh)
}
/*
- * __wt_std_fallocate --
+ * __posix_std_fallocate --
* Linux fallocate call.
*/
static int
-__wt_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
+__posix_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
{
#if defined(HAVE_FALLOCATE)
WT_DECL_RET;
@@ -60,11 +60,11 @@ __wt_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
}
/*
- * __wt_sys_fallocate --
+ * __posix_sys_fallocate --
* Linux fallocate call (system call version).
*/
static int
-__wt_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
+__posix_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
{
#if defined(__linux__) && defined(SYS_fallocate)
WT_DECL_RET;
@@ -86,11 +86,11 @@ __wt_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
}
/*
- * __wt_posix_fallocate --
+ * __posix_posix_fallocate --
* POSIX fallocate call.
*/
static int
-__wt_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
+__posix_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
{
#if defined(HAVE_POSIX_FALLOCATE)
WT_DECL_RET;
@@ -106,36 +106,29 @@ __wt_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len)
}
/*
- * __wt_fallocate --
- * Extend a file.
+ * __wt_posix_handle_allocate --
+ * POSIX fallocate.
*/
int
-__wt_fallocate(
+__wt_posix_handle_allocate(
WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
{
WT_DECL_RET;
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
switch (fh->fallocate_available) {
/*
* Check for already configured handles and make the configured call.
*/
case WT_FALLOCATE_POSIX:
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "%s: posix_fallocate", fh->name));
- if ((ret = __wt_posix_fallocate(fh, offset, len)) == 0)
+ if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0)
return (0);
WT_RET_MSG(session, ret, "%s: posix_fallocate", fh->name);
case WT_FALLOCATE_STD:
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "%s: fallocate", fh->name));
- if ((ret = __wt_std_fallocate(fh, offset, len)) == 0)
+ if ((ret = __posix_std_fallocate(fh, offset, len)) == 0)
return (0);
WT_RET_MSG(session, ret, "%s: fallocate", fh->name);
case WT_FALLOCATE_SYS:
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "%s: sys_fallocate", fh->name));
- if ((ret = __wt_sys_fallocate(fh, offset, len)) == 0)
+ if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0)
return (0);
WT_RET_MSG(session, ret, "%s: sys_fallocate", fh->name);
@@ -152,17 +145,17 @@ __wt_fallocate(
* fallocate (and the system call version of fallocate) first to
* avoid locking on Linux if at all possible.
*/
- if ((ret = __wt_std_fallocate(fh, offset, len)) == 0) {
+ if ((ret = __posix_std_fallocate(fh, offset, len)) == 0) {
fh->fallocate_available = WT_FALLOCATE_STD;
fh->fallocate_requires_locking = false;
return (0);
}
- if ((ret = __wt_sys_fallocate(fh, offset, len)) == 0) {
+ if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0) {
fh->fallocate_available = WT_FALLOCATE_SYS;
fh->fallocate_requires_locking = false;
return (0);
}
- if ((ret = __wt_posix_fallocate(fh, offset, len)) == 0) {
+ if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0) {
fh->fallocate_available = WT_FALLOCATE_POSIX;
#if !defined(__linux__)
fh->fallocate_requires_locking = false;
diff --git a/src/os_posix/os_filesize.c b/src/os_posix/os_filesize.c
deleted file mode 100644
index 72242e351bf..00000000000
--- a/src/os_posix/os_filesize.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_filesize --
- * Get the size of a file in bytes.
- */
-int
-__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
-{
- struct stat sb;
- WT_DECL_RET;
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fstat", fh->name));
-
- WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret);
- if (ret == 0) {
- *sizep = sb.st_size;
- return (0);
- }
-
- WT_RET_MSG(session, ret, "%s: fstat", fh->name);
-}
-
-/*
- * __wt_filesize_name --
- * Return the size of a file in bytes, given a file name.
- */
-int
-__wt_filesize_name(WT_SESSION_IMPL *session,
- const char *filename, bool silent, wt_off_t *sizep)
-{
- struct stat sb;
- WT_DECL_RET;
- char *path;
-
- WT_RET(__wt_filename(session, filename, &path));
-
- WT_SYSCALL_RETRY(stat(path, &sb), ret);
-
- __wt_free(session, path);
-
- if (ret == 0) {
- *sizep = sb.st_size;
- return (0);
- }
-
- /*
- * Some callers of this function expect failure if the file doesn't
- * exist, and don't want an error message logged.
- */
- if (!silent)
- WT_RET_MSG(session, ret, "%s: fstat", filename);
- return (ret);
-}
diff --git a/src/os_posix/os_flock.c b/src/os_posix/os_flock.c
deleted file mode 100644
index e2056f7636c..00000000000
--- a/src/os_posix/os_flock.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_bytelock --
- * Lock/unlock a byte in a file.
- */
-int
-__wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock)
-{
- struct flock fl;
- WT_DECL_RET;
-
- /*
- * WiredTiger requires this function be able to acquire locks past
- * the end of file.
- *
- * Note we're using fcntl(2) locking: all fcntl locks associated with a
- * file for a given process are removed when any file descriptor for the
- * file is closed by the process, even if a lock was never requested for
- * that file descriptor.
- */
- fl.l_start = byte;
- fl.l_len = 1;
- fl.l_type = lock ? F_WRLCK : F_UNLCK;
- fl.l_whence = SEEK_SET;
-
- WT_SYSCALL_RETRY(fcntl(fhp->fd, F_SETLK, &fl), ret);
-
- return (ret);
-}
diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c
new file mode 100644
index 00000000000..68d70594582
--- /dev/null
+++ b/src/os_posix/os_fs.c
@@ -0,0 +1,734 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __posix_sync --
+ * Underlying support function to flush a file handle.
+ */
+static int
+__posix_sync(WT_SESSION_IMPL *session,
+ int fd, const char *name, const char *func, bool block)
+{
+ WT_DECL_RET;
+
+#ifdef HAVE_SYNC_FILE_RANGE
+ if (!block) {
+ WT_SYSCALL_RETRY(sync_file_range(fd,
+ (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func);
+ }
+#else
+ /*
+ * Callers attempting asynchronous flush handle ENOTSUP returns, and
+ * won't make further attempts.
+ */
+ if (!block)
+ return (ENOTSUP);
+#endif
+
+#if defined(F_FULLFSYNC)
+ /*
+ * OS X fsync documentation:
+ * "Note that while fsync() will flush all data from the host to the
+ * drive (i.e. the "permanent storage device"), the drive itself may
+ * not physically write the data to the platters for quite some time
+ * and it may be written in an out-of-order sequence. For applications
+ * that require tighter guarantees about the integrity of their data,
+ * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks
+ * the drive to flush all buffered data to permanent storage."
+ *
+ * OS X F_FULLFSYNC fcntl documentation:
+ * "This is currently implemented on HFS, MS-DOS (FAT), and Universal
+ * Disk Format (UDF) file systems."
+ */
+ WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret);
+ if (ret == 0)
+ return (0);
+ /*
+ * Assume F_FULLFSYNC failed because the file system doesn't support it
+ * and fallback to fsync.
+ */
+#endif
+#if defined(HAVE_FDATASYNC)
+ WT_SYSCALL_RETRY(fdatasync(fd), ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: %s: fdatasync", name, func);
+#else
+ WT_SYSCALL_RETRY(fsync(fd), ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: %s: fsync", name, func);
+#endif
+}
+
+/*
+ * __posix_directory_sync --
+ * Flush a directory to ensure file creation is durable.
+ */
+static int
+__posix_directory_sync(WT_SESSION_IMPL *session, const char *path)
+{
+#ifdef __linux__
+ WT_DECL_RET;
+ int fd, tret;
+ const char *dir;
+ char *copy;
+
+ tret = 0;
+ /*
+ * POSIX 1003.1 does not require that fsync of a file handle ensures the
+ * entry in the directory containing the file has also reached disk (and
+ * there are historic Linux filesystems requiring this), do an explicit
+ * fsync on a file descriptor for the directory to be sure.
+ */
+ copy = NULL;
+ if (path == NULL || (dir = strrchr(path, '/')) == NULL)
+ path = S2C(session)->home;
+ else {
+ /*
+ * Copy the directory name, leaving the trailing slash in place,
+ * so a path of "/foo" doesn't result in an empty string.
+ */
+ WT_RET(__wt_strndup(
+ session, path, (size_t)(dir - path) + 1, &copy));
+ path = copy;
+ }
+
+ WT_SYSCALL_RETRY((
+ (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "%s: directory-sync: open", path);
+
+ ret = __posix_sync(session, fd, path, "directory-sync", true);
+
+ WT_SYSCALL_RETRY(close(fd), tret);
+ if (tret != 0) {
+ __wt_err(session, tret, "%s: directory-sync: close", path);
+ if (ret == 0)
+ ret = tret;
+ }
+err: __wt_free(session, copy);
+ return (ret);
+#else
+ WT_UNUSED(session);
+ WT_UNUSED(path);
+ return (0);
+#endif
+}
+
+/*
+ * __posix_file_exist --
+ * Return if the file exists.
+ */
+static int
+__posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
+{
+ struct stat sb;
+ WT_DECL_RET;
+ char *path;
+
+ WT_RET(__wt_filename(session, name, &path));
+ name = path;
+
+ WT_SYSCALL_RETRY(stat(name, &sb), ret);
+ if (ret == 0)
+ *existp = true;
+ else if (ret == ENOENT) {
+ *existp = false;
+ ret = 0;
+ } else
+ __wt_err(session, ret, "%s: file-exist: stat", name);
+
+ __wt_free(session, path);
+ return (ret);
+}
+
+/*
+ * __posix_file_remove --
+ * Remove a file.
+ */
+static int
+__posix_file_remove(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_DECL_RET;
+ char *path;
+
+#ifdef HAVE_DIAGNOSTIC
+ if (__wt_handle_search(session, name, false, true, NULL, NULL))
+ WT_RET_MSG(session, EINVAL,
+ "%s: file-remove: file has open handles", name);
+#endif
+
+ WT_RET(__wt_filename(session, name, &path));
+ name = path;
+
+ WT_SYSCALL_RETRY(remove(name), ret);
+ if (ret != 0)
+ __wt_err(session, ret, "%s: file-remove: remove", name);
+
+ __wt_free(session, path);
+ return (ret);
+}
+
+/*
+ * __posix_file_rename --
+ * Rename a file.
+ */
+static int
+__posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+{
+ WT_DECL_RET;
+ char *from_path, *to_path;
+
+#ifdef HAVE_DIAGNOSTIC
+ if (__wt_handle_search(session, from, false, true, NULL, NULL))
+ WT_RET_MSG(session, EINVAL,
+ "%s: file-rename: file has open handles", from);
+ if (__wt_handle_search(session, to, false, true, NULL, NULL))
+ WT_RET_MSG(session, EINVAL,
+ "%s: file-rename: file has open handles", to);
+#endif
+
+ from_path = to_path = NULL;
+ WT_ERR(__wt_filename(session, from, &from_path));
+ from = from_path;
+ WT_ERR(__wt_filename(session, to, &to_path));
+ to = to_path;
+
+ WT_SYSCALL_RETRY(rename(from, to), ret);
+ if (ret != 0)
+ __wt_err(session, ret,
+ "%s to %s: file-rename: rename", from, to);
+
+err: __wt_free(session, from_path);
+ __wt_free(session, to_path);
+ return (ret);
+}
+
+/*
+ * __posix_file_size --
+ * Get the size of a file in bytes, by file name.
+ */
+static int
+__posix_file_size(
+ WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep)
+{
+ struct stat sb;
+ WT_DECL_RET;
+ char *path;
+
+ WT_RET(__wt_filename(session, name, &path));
+ name = path;
+
+ /*
+ * Optionally don't log errors on ENOENT; some callers of this function
+ * expect failure in that case and don't want an error message logged.
+ */
+ WT_SYSCALL_RETRY(stat(name, &sb), ret);
+ if (ret == 0)
+ *sizep = sb.st_size;
+ else if (ret != ENOENT || !silent)
+ __wt_err(session, ret, "%s: file-size: stat", name);
+
+ __wt_free(session, path);
+
+ return (ret);
+}
+
+/*
+ * __posix_handle_advise --
+ * POSIX fadvise.
+ */
+static int
+__posix_handle_advise(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, wt_off_t len, int advice)
+{
+#if defined(HAVE_POSIX_FADVISE)
+ WT_DECL_RET;
+
+ /*
+ * Refuse pre-load when direct I/O is configured for the file, the
+ * kernel cache isn't interesting.
+ */
+ if (advice == POSIX_MADV_WILLNEED && fh->direct_io)
+ return (ENOTSUP);
+
+ WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret);
+ if (ret == 0)
+ return (0);
+
+ /*
+ * Treat EINVAL as not-supported, some systems don't support some flags.
+ * Quietly fail, callers expect not-supported failures.
+ */
+ if (ret == EINVAL)
+ return (ENOTSUP);
+
+ WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name);
+#else
+ WT_UNUSED(session);
+ WT_UNUSED(fh);
+ WT_UNUSED(offset);
+ WT_UNUSED(len);
+ WT_UNUSED(advice);
+
+ /* Quietly fail, callers expect not-supported failures. */
+ return (ENOTSUP);
+#endif
+}
+
+/*
+ * __posix_handle_close --
+ * ANSI C close/fclose.
+ */
+static int
+__posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh)
+{
+ WT_DECL_RET;
+
+ if (fh->fp == NULL) {
+ WT_SYSCALL_RETRY(close(fh->fd), ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name);
+ }
+
+ /* If the stream was opened for writing, flush the file. */
+ if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) {
+ ret = __wt_errno();
+ __wt_err(session, ret, "%s: handle-close: fflush", fh->name);
+ }
+
+ /* Close the file. */
+ if (fclose(fh->fp) != 0) {
+ ret = __wt_errno();
+ __wt_err(session, ret, "%s: handle-close: fclose", fh->name);
+ }
+ return (ret);
+}
+
+/*
+ * __posix_handle_getc --
+ * ANSI C fgetc.
+ */
+static int
+__posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp)
+{
+ if (fh->fp == NULL)
+ WT_RET_MSG(session,
+ ENOTSUP, "%s: handle-getc: no stream configured", fh->name);
+
+ *chp = fgetc(fh->fp);
+ if (*chp != EOF || !ferror(fh->fp))
+ return (0);
+ WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name);
+}
+
+/*
+ * __posix_handle_lock --
+ * Lock/unlock a file.
+ */
+static int
+__posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
+{
+ struct flock fl;
+ WT_DECL_RET;
+
+ /*
+ * WiredTiger requires this function be able to acquire locks past
+ * the end of file.
+ *
+ * Note we're using fcntl(2) locking: all fcntl locks associated with a
+ * file for a given process are removed when any file descriptor for the
+ * file is closed by the process, even if a lock was never requested for
+ * that file descriptor.
+ */
+ fl.l_start = 0;
+ fl.l_len = 1;
+ fl.l_type = lock ? F_WRLCK : F_UNLCK;
+ fl.l_whence = SEEK_SET;
+
+ WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name);
+}
+
+/*
+ * __posix_handle_printf --
+ * ANSI C vfprintf.
+ */
+static int
+__posix_handle_printf(
+ WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
+{
+ if (fh->fp == NULL)
+ WT_RET_MSG(session, ENOTSUP,
+ "%s: vfprintf: no stream configured", fh->name);
+
+ if (vfprintf(fh->fp, fmt, ap) >= 0)
+ return (0);
+ WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name);
+}
+
+/*
+ * __posix_handle_read --
+ * POSIX pread.
+ */
+static int
+__posix_handle_read(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+{
+ size_t chunk;
+ ssize_t nr;
+ uint8_t *addr;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(session,
+ !fh->direct_io ||
+ S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf &
+ (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment &&
+ len % S2C(session)->buffer_alignment == 0));
+
+ /* Break reads larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
+ chunk = WT_MIN(len, WT_GIGABYTE);
+ if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0)
+ WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(),
+ "%s: handle-read: pread: failed to read %"
+ WT_SIZET_FMT " bytes at offset %" PRIuMAX,
+ fh->name, chunk, (uintmax_t)offset);
+ }
+ return (0);
+}
+
+/*
+ * __posix_handle_size --
+ * Get the size of a file in bytes, by file handle.
+ */
+static int
+__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+{
+ struct stat sb;
+ WT_DECL_RET;
+
+ WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret);
+ if (ret == 0) {
+ *sizep = sb.st_size;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "%s: handle-size: fstat", fh->name);
+}
+
+/*
+ * __posix_handle_sync --
+ * POSIX fflush/fsync.
+ */
+static int
+__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+{
+ if (fh->fp == NULL)
+ return (__posix_sync(
+ session, fh->fd, fh->name, "handle-sync", block));
+
+ if (fflush(fh->fp) == 0)
+ return (0);
+ WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name);
+}
+
+/*
+ * __posix_handle_truncate --
+ * POSIX ftruncate.
+ */
+static int
+__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+{
+ WT_DECL_RET;
+
+ WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", fh->name);
+}
+
+/*
+ * __posix_handle_write --
+ * POSIX pwrite.
+ */
+static int
+__posix_handle_write(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+{
+ size_t chunk;
+ ssize_t nw;
+ const uint8_t *addr;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(session,
+ !fh->direct_io ||
+ S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf &
+ (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment &&
+ len % S2C(session)->buffer_alignment == 0));
+
+ /* Break writes larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
+ chunk = WT_MIN(len, WT_GIGABYTE);
+ if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0)
+ WT_RET_MSG(session, __wt_errno(),
+ "%s: handle-write: pwrite: failed to write %"
+ WT_SIZET_FMT " bytes at offset %" PRIuMAX,
+ fh->name, chunk, (uintmax_t)offset);
+ }
+ return (0);
+}
+
+/*
+ * __posix_handle_open_cloexec --
+ * Prevent child access to file handles.
+ */
+static inline int
+__posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name)
+{
+#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC)
+ int f;
+
+ /*
+ * Security:
+ * The application may spawn a new process, and we don't want another
+ * process to have access to our file handles. There's an obvious race
+ * between the open and this call, prefer the flag to open if available.
+ */
+ if ((f = fcntl(fd, F_GETFD)) == -1 ||
+ fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1)
+ WT_RET_MSG(session, __wt_errno(),
+ "%s: handle-open: fcntl", name);
+ return (0);
+#else
+ WT_UNUSED(session);
+ WT_UNUSED(fd);
+ WT_UNUSED(name);
+ return (0);
+#endif
+}
+
+/*
+ * __posix_handle_open --
+ * Open a file handle.
+ */
+static int
+__posix_handle_open(WT_SESSION_IMPL *session,
+ WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ mode_t mode;
+ int f, fd, tret;
+ bool direct_io;
+ const char *stream_mode;
+
+ conn = S2C(session);
+ direct_io = false;
+
+ /* Set up error handling. */
+ fh->fd = fd = -1;
+ fh->fp = NULL;
+
+ if (file_type == WT_FILE_TYPE_DIRECTORY) {
+ f = O_RDONLY;
+#ifdef O_CLOEXEC
+ /*
+ * Security:
+ * The application may spawn a new process, and we don't want
+ * another process to have access to our file handles.
+ */
+ f |= O_CLOEXEC;
+#endif
+ WT_SYSCALL_RETRY((
+ (fd = open(name, f, 0444)) == -1 ? 1 : 0), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "%s: handle-open: open", name);
+ WT_ERR(__posix_handle_open_cloexec(session, fd, name));
+ goto directory_open;
+ }
+
+ f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR;
+ if (LF_ISSET(WT_OPEN_CREATE)) {
+ f |= O_CREAT;
+ if (LF_ISSET(WT_OPEN_EXCLUSIVE))
+ f |= O_EXCL;
+ mode = 0666;
+ } else
+ mode = 0;
+
+#ifdef O_BINARY
+ /* Windows clones: we always want to treat the file as a binary. */
+ f |= O_BINARY;
+#endif
+#ifdef O_CLOEXEC
+ /*
+ * Security:
+ * The application may spawn a new process, and we don't want another
+ * process to have access to our file handles.
+ */
+ f |= O_CLOEXEC;
+#endif
+#ifdef O_DIRECT
+ /*
+ * Direct I/O: file-type is a flag from the set of possible flags stored
+ * in the connection handle during configuration, check for a match.
+ * Also, "direct_io=checkpoint" configures direct I/O for readonly data
+ * files.
+ */
+ if (FLD_ISSET(conn->direct_io, file_type) ||
+ (LF_ISSET(WT_OPEN_READONLY) &&
+ file_type == WT_FILE_TYPE_DATA &&
+ FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) {
+ f |= O_DIRECT;
+ direct_io = true;
+ }
+#endif
+ fh->direct_io = direct_io;
+#ifdef O_NOATIME
+ /* Avoid updating metadata for read-only workloads. */
+ if (file_type == WT_FILE_TYPE_DATA)
+ f |= O_NOATIME;
+#endif
+
+ if (file_type == WT_FILE_TYPE_LOG &&
+ FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
+#ifdef O_DSYNC
+ f |= O_DSYNC;
+#elif defined(O_SYNC)
+ f |= O_SYNC;
+#else
+ WT_ERR_MSG(session, ENOTSUP,
+ "unsupported log sync mode configured");
+#endif
+ }
+
+ WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? 1 : 0), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret,
+ direct_io ?
+ "%s: handle-open: open: failed with direct I/O configured, "
+ "some filesystem types do not support direct I/O" :
+ "%s: handle-open: open", name);
+ WT_ERR(__posix_handle_open_cloexec(session, fd, name));
+
+ /* Disable read-ahead on trees: it slows down random read workloads. */
+#if defined(HAVE_POSIX_FADVISE)
+ if (file_type == WT_FILE_TYPE_DATA) {
+ WT_SYSCALL_RETRY(
+ posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret,
+ "%s: handle-open: posix_fadvise", name);
+ }
+#endif
+
+ /* Optionally configure a stdio stream API. */
+ switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) {
+ case WT_STREAM_APPEND:
+ stream_mode = "a";
+ F_SET(fh, WT_FH_FLUSH_ON_CLOSE);
+ break;
+ case WT_STREAM_READ:
+ stream_mode = "r";
+ break;
+ case WT_STREAM_WRITE:
+ stream_mode = "w";
+ F_SET(fh, WT_FH_FLUSH_ON_CLOSE);
+ break;
+ case 0:
+ default:
+ stream_mode = NULL;
+ break;
+ }
+ if (stream_mode != NULL) {
+ if ((fh->fp = fdopen(fd, stream_mode)) == NULL)
+ WT_ERR_MSG(session, __wt_errno(),
+ "%s: handle-open: fdopen", name);
+ if (LF_ISSET(WT_STREAM_LINE_BUFFER))
+ __wt_stream_set_line_buffer(fh->fp);
+ }
+
+directory_open:
+ fh->fd = fd;
+
+ /* Configure fallocate calls. */
+ __wt_posix_handle_allocate_configure(session, fh);
+
+ fh->fh_advise = __posix_handle_advise;
+ fh->fh_allocate = __wt_posix_handle_allocate;
+ fh->fh_close = __posix_handle_close;
+ fh->fh_getc = __posix_handle_getc;
+ fh->fh_lock = __posix_handle_lock;
+ fh->fh_map = __wt_posix_map;
+ fh->fh_map_discard = __wt_posix_map_discard;
+ fh->fh_map_preload = __wt_posix_map_preload;
+ fh->fh_map_unmap = __wt_posix_map_unmap;
+ fh->fh_printf = __posix_handle_printf;
+ fh->fh_read = __posix_handle_read;
+ fh->fh_size = __posix_handle_size;
+ fh->fh_sync = __posix_handle_sync;
+ fh->fh_truncate = __posix_handle_truncate;
+ fh->fh_write = __posix_handle_write;
+
+ return (0);
+
+err: if (fd != -1) {
+ WT_SYSCALL_RETRY(close(fd), tret);
+ if (tret != 0)
+ __wt_err(session, tret, "%s: handle-open: close", name);
+ }
+ return (ret);
+}
+
+/*
+ * __wt_os_posix --
+ * Initialize a POSIX configuration.
+ */
+int
+__wt_os_posix(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /* Initialize the POSIX jump table. */
+ conn->file_directory_list = __wt_posix_directory_list;
+ conn->file_directory_sync = __posix_directory_sync;
+ conn->file_exist = __posix_file_exist;
+ conn->file_remove = __posix_file_remove;
+ conn->file_rename = __posix_file_rename;
+ conn->file_size = __posix_file_size;
+ conn->handle_open = __posix_handle_open;
+
+ return (0);
+}
+
+/*
+ * __wt_os_posix_cleanup --
+ * Discard a POSIX configuration.
+ */
+int
+__wt_os_posix_cleanup(WT_SESSION_IMPL *session)
+{
+ WT_UNUSED(session);
+
+ return (0);
+}
diff --git a/src/os_posix/os_fsync.c b/src/os_posix/os_fsync.c
deleted file mode 100644
index 0bd0359338b..00000000000
--- a/src/os_posix/os_fsync.c
+++ /dev/null
@@ -1,171 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_handle_sync --
- * Flush a file handle.
- */
-static int
-__wt_handle_sync(int fd)
-{
- WT_DECL_RET;
-
-#if defined(F_FULLFSYNC)
- /*
- * OS X fsync documentation:
- * "Note that while fsync() will flush all data from the host to the
- * drive (i.e. the "permanent storage device"), the drive itself may
- * not physically write the data to the platters for quite some time
- * and it may be written in an out-of-order sequence. For applications
- * that require tighter guarantees about the integrity of their data,
- * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks
- * the drive to flush all buffered data to permanent storage."
- *
- * OS X F_FULLFSYNC fcntl documentation:
- * "This is currently implemented on HFS, MS-DOS (FAT), and Universal
- * Disk Format (UDF) file systems."
- */
- WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret);
- if (ret == 0)
- return (0);
- /*
- * Assume F_FULLFSYNC failed because the file system doesn't support it
- * and fallback to fsync.
- */
-#endif
-#if defined(HAVE_FDATASYNC)
- WT_SYSCALL_RETRY(fdatasync(fd), ret);
-#else
- WT_SYSCALL_RETRY(fsync(fd), ret);
-#endif
- return (ret);
-}
-
-/*
- * __wt_directory_sync_fh --
- * Flush a directory file handle. We don't use __wt_fsync because
- * most file systems don't require this step and we don't want to
- * penalize them by calling fsync.
- */
-int
-__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh)
-{
-#ifdef __linux__
- WT_DECL_RET;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- if ((ret = __wt_handle_sync(fh->fd)) == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: fsync", fh->name);
-#else
- WT_UNUSED(session);
- WT_UNUSED(fh);
- return (0);
-#endif
-}
-
-/*
- * __wt_directory_sync --
- * Flush a directory to ensure a file creation is durable.
- */
-int
-__wt_directory_sync(WT_SESSION_IMPL *session, const char *path)
-{
-#ifdef __linux__
- WT_DECL_RET;
- int fd, tret;
- const char *dir;
- char *copy;
-
- /*
- * POSIX 1003.1 does not require that fsync of a file handle ensures the
- * entry in the directory containing the file has also reached disk (and
- * there are historic Linux filesystems requiring this), do an explicit
- * fsync on a file descriptor for the directory to be sure.
- */
- copy = NULL;
- if (path == NULL || (dir = strrchr(path, '/')) == NULL)
- path = S2C(session)->home;
- else {
- /*
- * Copy the directory name, leaving the trailing slash in place,
- * so a path of "/foo" doesn't result in an empty string.
- */
- WT_RET(__wt_strndup(
- session, path, (size_t)(dir - path) + 1, &copy));
- path = copy;
- }
-
- WT_SYSCALL_RETRY(((fd =
- open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret);
- __wt_free(session, copy);
- if (ret != 0)
- WT_RET_MSG(session, ret, "%s: open", path);
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- if ((ret = __wt_handle_sync(fd)) != 0)
- WT_ERR_MSG(session, ret, "%s: fsync", path);
-
-err: WT_SYSCALL_RETRY(close(fd), tret);
- if (tret != 0)
- __wt_err(session, tret, "%s: close", path);
- WT_TRET(tret);
- return (ret);
-#else
- WT_UNUSED(session);
- WT_UNUSED(path);
- return (0);
-#endif
-}
-
-/*
- * __wt_fsync --
- * Flush a file handle.
- */
-int
-__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_DECL_RET;
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fsync", fh->name));
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
- WT_STRING_MATCH(fh->name, WT_SINGLETHREAD,
- strlen(WT_SINGLETHREAD)));
- if ((ret = __wt_handle_sync(fh->fd)) == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s fsync error", fh->name);
-}
-
-/*
- * __wt_fsync_async --
- * Flush a file handle and don't wait for the result.
- */
-int
-__wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh)
-{
-#ifdef HAVE_SYNC_FILE_RANGE
- WT_DECL_RET;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "%s: sync_file_range", fh->name));
-
- WT_SYSCALL_RETRY(sync_file_range(fh->fd,
- (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret);
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: sync_file_range", fh->name);
-#else
- WT_UNUSED(session);
- WT_UNUSED(fh);
- return (0);
-#endif
-}
diff --git a/src/os_posix/os_ftruncate.c b/src/os_posix/os_ftruncate.c
deleted file mode 100644
index 94d6cba3bf5..00000000000
--- a/src/os_posix/os_ftruncate.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_ftruncate --
- * Truncate a file.
- */
-int
-__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
-{
- WT_DECL_RET;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret);
- if (ret == 0)
- return (0);
-
- WT_RET_MSG(session, ret, "%s ftruncate error", fh->name);
-}
diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c
index 42aeeac4a5e..de28891ffd1 100644
--- a/src/os_posix/os_map.c
+++ b/src/os_posix/os_map.c
@@ -9,58 +9,74 @@
#include "wt_internal.h"
/*
- * __wt_mmap --
+ * __wt_posix_map --
* Map a file into memory.
*/
int
-__wt_mmap(WT_SESSION_IMPL *session,
+__wt_posix_map(WT_SESSION_IMPL *session,
WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie)
{
+ size_t len;
+ wt_off_t file_size;
void *map;
- size_t orig_size;
WT_UNUSED(mappingcookie);
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+
+ /*
+ * Mapping isn't possible if direct I/O configured for the file, the
+ * Linux open(2) documentation says applications should avoid mixing
+ * mmap(2) of files with direct I/O to the same files.
+ */
+ if (fh->direct_io)
+ return (ENOTSUP);
+
/*
- * Record the current size and only map and set that as the length, it
- * could change between the map call and when we set the return length.
- * For the same reason we could actually map past the end of the file;
- * we don't read bytes past the end of the file though, so as long as
- * the map call succeeds, it's all OK.
+ * There's no locking here to prevent the underlying file from changing
+ * underneath us, our caller needs to ensure consistency of the mapped
+ * region vs. any other file activity.
*/
- orig_size = (size_t)fh->size;
- if ((map = mmap(NULL, orig_size,
+ WT_RET(__wt_filesize(session, fh, &file_size));
+ len = (size_t)file_size;
+
+ (void)__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len);
+
+ if ((map = mmap(NULL, len,
PROT_READ,
#ifdef MAP_NOCORE
MAP_NOCORE |
#endif
MAP_PRIVATE,
- fh->fd, (wt_off_t)0)) == MAP_FAILED) {
- WT_RET_MSG(session, __wt_errno(),
- "%s map error: failed to map %" WT_SIZET_FMT " bytes",
- fh->name, orig_size);
- }
- (void)__wt_verbose(session, WT_VERB_FILEOPS,
- "%s: map %p: %" WT_SIZET_FMT " bytes", fh->name, map, orig_size);
+ fh->fd, (wt_off_t)0)) == MAP_FAILED)
+ WT_RET_MSG(session,
+ __wt_errno(), "%s: memory-map: mmap", fh->name);
*(void **)mapp = map;
- *lenp = orig_size;
+ *lenp = len;
return (0);
}
+#ifdef HAVE_POSIX_MADVISE
/*
- * __wt_mmap_preload --
+ * __posix_map_preload_madvise --
* Cause a section of a memory map to be faulted in.
*/
-int
-__wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size)
+static int
+__posix_map_preload_madvise(
+ WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size)
{
-#ifdef HAVE_POSIX_MADVISE
- /* Linux requires the address be aligned to a 4KB boundary. */
- WT_CONNECTION_IMPL *conn = S2C(session);
- WT_BM *bm = S2BT(session)->bm;
+ WT_BM *bm;
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1));
+ void *blk;
+
+ conn = S2C(session);
+ bm = S2BT(session)->bm;
+
+ /* Linux requires the address be aligned to a 4KB boundary. */
+ blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1));
size += WT_PTRDIFF(p, blk);
/* XXX proxy for "am I doing a scan?" -- manual read-ahead */
@@ -79,59 +95,99 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size)
*/
size &= ~(size_t)(conn->page_size - 1);
- if (size > (size_t)conn->page_size &&
- (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) != 0)
- WT_RET_MSG(session, ret, "posix_madvise will need");
+ if (size <= (size_t)conn->page_size ||
+ (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) == 0)
+ return (0);
+ WT_RET_MSG(session, ret,
+ "%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED",
+ fh->name);
+}
+#endif
+
+/*
+ * __wt_posix_map_preload --
+ * Cause a section of a memory map to be faulted in.
+ */
+int
+__wt_posix_map_preload(
+ WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+
+#ifdef HAVE_POSIX_MADVISE
+ return (__posix_map_preload_madvise(session, fh, p, size));
#else
- WT_UNUSED(session);
+ WT_UNUSED(fh);
WT_UNUSED(p);
WT_UNUSED(size);
+ return (ENOTSUP);
#endif
-
- return (0);
}
+#ifdef HAVE_POSIX_MADVISE
/*
- * __wt_mmap_discard --
+ * __posix_map_discard_madvise --
* Discard a chunk of the memory map.
*/
-int
-__wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size)
+static int
+__posix_map_discard_madvise(
+ WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size)
{
-#ifdef HAVE_POSIX_MADVISE
- /* Linux requires the address be aligned to a 4KB boundary. */
- WT_CONNECTION_IMPL *conn = S2C(session);
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1));
+ void *blk;
+
+ conn = S2C(session);
+
+ /* Linux requires the address be aligned to a 4KB boundary. */
+ blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1));
size += WT_PTRDIFF(p, blk);
- if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) != 0)
- WT_RET_MSG(session, ret, "posix_madvise don't need");
+ if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) == 0)
+ return (0);
+ WT_RET_MSG(session, ret,
+ "%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED",
+ fh->name);
+}
+#endif
+
+/*
+ * __wt_posix_map_discard --
+ * Discard a chunk of the memory map.
+ */
+int
+__wt_posix_map_discard(
+ WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size)
+{
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+
+#ifdef HAVE_POSIX_MADVISE
+ return (__posix_map_discard_madvise(session, fh, p, size));
#else
- WT_UNUSED(session);
+ WT_UNUSED(fh);
WT_UNUSED(p);
WT_UNUSED(size);
+ return (ENOTSUP);
#endif
- return (0);
}
/*
- * __wt_munmap --
+ * __wt_posix_map_unmap --
* Remove a memory mapping.
*/
int
-__wt_munmap(WT_SESSION_IMPL *session,
+__wt_posix_map_unmap(WT_SESSION_IMPL *session,
WT_FH *fh, void *map, size_t len, void **mappingcookie)
{
WT_UNUSED(mappingcookie);
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS,
- "%s: unmap %p: %" WT_SIZET_FMT " bytes", fh->name, map, len));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+
+ (void)__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len);
if (munmap(map, len) == 0)
return (0);
- WT_RET_MSG(session, __wt_errno(),
- "%s unmap error: failed to unmap %" WT_SIZET_FMT " bytes",
- fh->name, len);
+ WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name);
}
diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c
deleted file mode 100644
index 219b26c2fa1..00000000000
--- a/src/os_posix/os_open.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __open_directory --
- * Open up a file handle to a directory.
- */
-static int
-__open_directory(WT_SESSION_IMPL *session, char *path, int *fd)
-{
- WT_DECL_RET;
-
- WT_SYSCALL_RETRY(((*fd =
- open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret);
- if (ret != 0)
- WT_RET_MSG(session, ret, "%s: open_directory", path);
- return (ret);
-}
-
-/*
- * __wt_open --
- * Open a file handle.
- */
-int
-__wt_open(WT_SESSION_IMPL *session,
- const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *fh, *tfh;
- mode_t mode;
- uint64_t bucket, hash;
- int f, fd;
- bool direct_io, matched;
- char *path;
-
- conn = S2C(session);
- direct_io = false;
- fh = NULL;
- fd = -1;
- path = NULL;
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name));
-
- /* Increment the reference count if we already have the file open. */
- matched = false;
- hash = __wt_hash_city64(name, strlen(name));
- bucket = hash % WT_HASH_ARRAY_SIZE;
- __wt_spin_lock(session, &conn->fh_lock);
- TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) {
- if (strcmp(name, tfh->name) == 0) {
- ++tfh->ref;
- *fhp = tfh;
- matched = true;
- break;
- }
- }
- __wt_spin_unlock(session, &conn->fh_lock);
- if (matched)
- return (0);
-
- WT_RET(__wt_filename(session, name, &path));
-
- if (dio_type == WT_FILE_TYPE_DIRECTORY) {
- WT_ERR(__open_directory(session, path, &fd));
- goto setupfh;
- }
-
- /*
- * If this is a read-only connection, open all files read-only
- * except the lock file.
- */
- if (F_ISSET(conn, WT_CONN_READONLY) &&
- !WT_STRING_MATCH(name, WT_SINGLETHREAD,
- strlen(WT_SINGLETHREAD)))
- f = O_RDONLY;
- else
- f = O_RDWR;
-#ifdef O_BINARY
- /* Windows clones: we always want to treat the file as a binary. */
- f |= O_BINARY;
-#endif
-#ifdef O_CLOEXEC
- /*
- * Security:
- * The application may spawn a new process, and we don't want another
- * process to have access to our file handles.
- */
- f |= O_CLOEXEC;
-#endif
-#ifdef O_NOATIME
- /* Avoid updating metadata for read-only workloads. */
- if (dio_type == WT_FILE_TYPE_DATA ||
- dio_type == WT_FILE_TYPE_CHECKPOINT)
- f |= O_NOATIME;
-#endif
-
- if (ok_create) {
- WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) ||
- WT_STRING_MATCH(name, WT_SINGLETHREAD,
- strlen(WT_SINGLETHREAD)));
- f |= O_CREAT;
- if (exclusive)
- f |= O_EXCL;
- mode = 0666;
- } else
- mode = 0;
-
-#ifdef O_DIRECT
- if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) {
- f |= O_DIRECT;
- direct_io = true;
- }
-#endif
- if (dio_type == WT_FILE_TYPE_LOG &&
- FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
-#ifdef O_DSYNC
- f |= O_DSYNC;
-#elif defined(O_SYNC)
- f |= O_SYNC;
-#else
- WT_ERR_MSG(session, ENOTSUP,
- "Unsupported log sync mode requested");
-#endif
- WT_SYSCALL_RETRY(((fd = open(path, f, mode)) == -1 ? 1 : 0), ret);
- if (ret != 0)
- WT_ERR_MSG(session, ret,
- direct_io ?
- "%s: open failed with direct I/O configured, some "
- "filesystem types do not support direct I/O" : "%s", path);
-
-setupfh:
-#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC)
- /*
- * Security:
- * The application may spawn a new process, and we don't want another
- * process to have access to our file handles. There's an obvious
- * race here, so we prefer the flag to open if available.
- */
- if ((f = fcntl(fd, F_GETFD)) == -1 ||
- fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1)
- WT_ERR_MSG(session, __wt_errno(), "%s: fcntl", name);
-#endif
-
-#if defined(HAVE_POSIX_FADVISE)
- /* Disable read-ahead on trees: it slows down random read workloads. */
- if (dio_type == WT_FILE_TYPE_DATA ||
- dio_type == WT_FILE_TYPE_CHECKPOINT)
- WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM));
-#endif
-
- WT_ERR(__wt_calloc_one(session, &fh));
- WT_ERR(__wt_strdup(session, name, &fh->name));
- fh->name_hash = hash;
- fh->fd = fd;
- fh->ref = 1;
- fh->direct_io = direct_io;
-
- /* Set the file's size. */
- WT_ERR(__wt_filesize(session, fh, &fh->size));
-
- /* Configure file extension. */
- if (dio_type == WT_FILE_TYPE_DATA ||
- dio_type == WT_FILE_TYPE_CHECKPOINT)
- fh->extend_len = conn->data_extend_len;
-
- /* Configure fallocate/posix_fallocate calls. */
- __wt_fallocate_config(session, fh);
-
- /*
- * Repeat the check for a match, but then link onto the database's list
- * of files.
- */
- matched = false;
- __wt_spin_lock(session, &conn->fh_lock);
- TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) {
- if (strcmp(name, tfh->name) == 0) {
- ++tfh->ref;
- *fhp = tfh;
- matched = true;
- break;
- }
- }
- if (!matched) {
- WT_CONN_FILE_INSERT(conn, fh, bucket);
- (void)__wt_atomic_add32(&conn->open_file_count, 1);
- *fhp = fh;
- }
- __wt_spin_unlock(session, &conn->fh_lock);
- if (matched) {
-err: if (fh != NULL) {
- __wt_free(session, fh->name);
- __wt_free(session, fh);
- }
- if (fd != -1)
- (void)close(fd);
- }
-
- __wt_free(session, path);
- return (ret);
-}
-
-/*
- * __wt_close --
- * Close a file handle.
- */
-int
-__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *fh;
- uint64_t bucket;
-
- conn = S2C(session);
-
- if (*fhp == NULL)
- return (0);
- fh = *fhp;
- *fhp = NULL;
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: close", fh->name));
-
- __wt_spin_lock(session, &conn->fh_lock);
- if (fh == NULL || fh->ref == 0 || --fh->ref > 0) {
- __wt_spin_unlock(session, &conn->fh_lock);
- return (0);
- }
-
- /* Remove from the list. */
- bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
- WT_CONN_FILE_REMOVE(conn, fh, bucket);
- (void)__wt_atomic_sub32(&conn->open_file_count, 1);
-
- __wt_spin_unlock(session, &conn->fh_lock);
-
- /* Discard the memory. */
- if (close(fh->fd) != 0) {
- ret = __wt_errno();
- __wt_err(session, ret, "close: %s", fh->name);
- }
-
- __wt_free(session, fh->name);
- __wt_free(session, fh);
- return (ret);
-}
diff --git a/src/os_posix/os_remove.c b/src/os_posix/os_remove.c
deleted file mode 100644
index eb2e37fdc38..00000000000
--- a/src/os_posix/os_remove.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __remove_file_check --
- * Check if the file is currently open before removing it.
- */
-static void
-__remove_file_check(WT_SESSION_IMPL *session, const char *name)
-{
-#ifdef HAVE_DIAGNOSTIC
- WT_CONNECTION_IMPL *conn;
- WT_FH *fh;
- uint64_t bucket;
-
- conn = S2C(session);
- WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY));
- fh = NULL;
- bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE;
-
- /*
- * Check if the file is open: it's an error if it is, since a higher
- * level should have closed it before removing.
- */
- __wt_spin_lock(session, &conn->fh_lock);
- TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
- if (strcmp(name, fh->name) == 0)
- break;
- __wt_spin_unlock(session, &conn->fh_lock);
-
- WT_ASSERT(session, fh == NULL);
-#else
- WT_UNUSED(session);
- WT_UNUSED(name);
-#endif
-}
-
-/*
- * __wt_remove --
- * Remove a file.
- */
-int
-__wt_remove(WT_SESSION_IMPL *session, const char *name)
-{
- WT_DECL_RET;
- char *path;
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: remove", name));
-
- __remove_file_check(session, name);
-
- WT_RET(__wt_filename(session, name, &path));
-
- WT_SYSCALL_RETRY(remove(path), ret);
-
- __wt_free(session, path);
-
- if (ret == 0 || ret == ENOENT)
- return (0);
-
- WT_RET_MSG(session, ret, "%s: remove", name);
-}
diff --git a/src/os_posix/os_rename.c b/src/os_posix/os_rename.c
deleted file mode 100644
index 8ec4ee3aa23..00000000000
--- a/src/os_posix/os_rename.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_rename --
- * Rename a file.
- */
-int
-__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
-{
- WT_DECL_RET;
- char *from_path, *to_path;
-
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "rename %s to %s", from, to));
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- from_path = to_path = NULL;
-
- WT_RET(__wt_filename(session, from, &from_path));
- WT_TRET(__wt_filename(session, to, &to_path));
-
- if (ret == 0)
- WT_SYSCALL_RETRY(rename(from_path, to_path), ret);
-
- __wt_free(session, from_path);
- __wt_free(session, to_path);
-
- if (ret == 0)
- return (0);
-
- WT_RET_MSG(session, ret, "rename %s to %s", from, to);
-}
diff --git a/src/os_posix/os_rw.c b/src/os_posix/os_rw.c
deleted file mode 100644
index 3d49fa7e712..00000000000
--- a/src/os_posix/os_rw.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_read --
- * Read a chunk.
- */
-int
-__wt_read(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
-{
- size_t chunk;
- ssize_t nr;
- uint8_t *addr;
-
- WT_STAT_FAST_CONN_INCR(session, read_io);
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS,
- "%s: read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- fh->name, len, (uintmax_t)offset));
-
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !fh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break reads larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
- chunk = WT_MIN(len, WT_GIGABYTE);
- if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0)
- WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(),
- "%s read error: failed to read %" WT_SIZET_FMT
- " bytes at offset %" PRIuMAX,
- fh->name, chunk, (uintmax_t)offset);
- }
- return (0);
-}
-
-/*
- * __wt_write --
- * Write a chunk.
- */
-int
-__wt_write(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
-{
- size_t chunk;
- ssize_t nw;
- const uint8_t *addr;
-
- WT_STAT_FAST_CONN_INCR(session, write_io);
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS,
- "%s: write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- fh->name, len, (uintmax_t)offset));
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
- WT_STRING_MATCH(fh->name, WT_SINGLETHREAD,
- strlen(WT_SINGLETHREAD)));
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !fh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break writes larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
- chunk = WT_MIN(len, WT_GIGABYTE);
- if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0)
- WT_RET_MSG(session, __wt_errno(),
- "%s write error: failed to write %" WT_SIZET_FMT
- " bytes at offset %" PRIuMAX,
- fh->name, chunk, (uintmax_t)offset);
- }
- return (0);
-}
diff --git a/src/os_posix/os_setvbuf.c b/src/os_posix/os_setvbuf.c
new file mode 100644
index 00000000000..d6107115eb3
--- /dev/null
+++ b/src/os_posix/os_setvbuf.c
@@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_stream_set_line_buffer --
+ * Set line buffering on a stream.
+ */
+void
+__wt_stream_set_line_buffer(FILE *fp)
+{
+ /*
+ * This function exists because MSVC doesn't support buffer sizes of 0
+ * to the setvbuf call. To avoid re-introducing the bug, we have helper
+ * functions and disallow calling setvbuf directly in WiredTiger code.
+ */
+ (void)setvbuf(fp, NULL, _IOLBF, 1024);
+}
+
+/*
+ * __wt_stream_set_no_buffer --
+ * Turn off buffering on a stream.
+ */
+void
+__wt_stream_set_no_buffer(FILE *fp)
+{
+ (void)setvbuf(fp, NULL, _IONBF, 0);
+}
diff --git a/src/os_posix/os_stdio.c b/src/os_posix/os_stdio.c
deleted file mode 100644
index 65a0f40a659..00000000000
--- a/src/os_posix/os_stdio.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_fopen --
- * Open a FILE handle.
- */
-int
-__wt_fopen(WT_SESSION_IMPL *session,
- const char *name, WT_FHANDLE_MODE mode_flag, u_int flags, FILE **fpp)
-{
- WT_DECL_RET;
- const char *mode, *path;
- char *pathbuf;
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fopen", name));
-
- pathbuf = NULL;
- if (LF_ISSET(WT_FOPEN_FIXED))
- path = name;
- else {
- WT_RET(__wt_filename(session, name, &pathbuf));
- path = pathbuf;
- }
-
- mode = NULL;
- switch (mode_flag) {
- case WT_FHANDLE_APPEND:
- mode = WT_FOPEN_APPEND;
- break;
- case WT_FHANDLE_READ:
- mode = WT_FOPEN_READ;
- break;
- case WT_FHANDLE_WRITE:
- mode = WT_FOPEN_WRITE;
- break;
- }
- *fpp = fopen(path, mode);
- if (*fpp == NULL)
- ret = __wt_errno();
-
- __wt_free(session, pathbuf);
-
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: fopen", name);
-}
-
-/*
- * __wt_vfprintf --
- * Vfprintf for a FILE handle.
- */
-int
-__wt_vfprintf(FILE *fp, const char *fmt, va_list ap)
-{
- return (vfprintf(fp, fmt, ap) < 0 ? __wt_errno() : 0);
-}
-
-/*
- * __wt_fprintf --
- * Fprintf for a FILE handle.
- */
-int
-__wt_fprintf(FILE *fp, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
-{
- WT_DECL_RET;
- va_list ap;
-
- va_start(ap, fmt);
- ret = __wt_vfprintf(fp, fmt, ap);
- va_end(ap);
-
- return (ret);
-}
-
-/*
- * __wt_fflush --
- * Flush a FILE handle.
- */
-int
-__wt_fflush(FILE *fp)
-{
- /* Flush the handle. */
- return (fflush(fp) == 0 ? 0 : __wt_errno());
-}
-
-/*
- * __wt_fclose --
- * Close a FILE handle.
- */
-int
-__wt_fclose(FILE **fpp, WT_FHANDLE_MODE mode_flag)
-{
- FILE *fp;
- WT_DECL_RET;
-
- if (*fpp == NULL)
- return (0);
-
- fp = *fpp;
- *fpp = NULL;
-
- /*
- * If the handle was opened for writing, flush the file to the backing
- * OS buffers, then flush the OS buffers to the backing disk.
- */
- if (mode_flag == WT_FHANDLE_APPEND || mode_flag == WT_FHANDLE_WRITE) {
- ret = __wt_fflush(fp);
- if (fsync(fileno(fp)) != 0)
- WT_TRET(__wt_errno());
- }
-
- /* Close the handle. */
- if (fclose(fp) != 0)
- WT_TRET(__wt_errno());
-
- return (ret);
-}
diff --git a/src/os_win/os_dir.c b/src/os_win/os_dir.c
index 00ec4f252e4..64eae60983c 100644
--- a/src/os_win/os_dir.c
+++ b/src/os_win/os_dir.c
@@ -9,13 +9,12 @@
#include "wt_internal.h"
/*
- * __wt_dirlist --
- * Get a list of files from a directory, optionally filtered by
- * a given prefix.
+ * __wt_win_directory_list --
+ * Get a list of files from a directory, MSVC version.
*/
int
-__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix,
- uint32_t flags, char ***dirlist, u_int *countp)
+__wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir,
+ const char *prefix, uint32_t flags, char ***dirlist, u_int *countp)
{
HANDLE findhandle;
WIN32_FIND_DATA finddata;
@@ -29,72 +28,60 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix,
*dirlist = NULL;
*countp = 0;
- findhandle = INVALID_HANDLE_VALUE;
- count = 0;
-
WT_RET(__wt_filename(session, dir, &path));
pathlen = strlen(path);
- if (path[pathlen - 1] == '\\') {
+ if (path[pathlen - 1] == '\\')
path[pathlen - 1] = '\0';
- }
-
WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf));
WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", path));
+ findhandle = INVALID_HANDLE_VALUE;
dirallocsz = 0;
dirsz = 0;
entries = NULL;
- if (flags == 0)
- LF_SET(WT_DIRLIST_INCLUDE);
-
- WT_ERR(__wt_verbose(session, WT_VERB_FILEOPS,
- "wt_dirlist of %s %s prefix %s",
- pathbuf->data, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude",
- prefix == NULL ? "all" : prefix));
findhandle = FindFirstFileA(pathbuf->data, &finddata);
+ if (findhandle == INVALID_HANDLE_VALUE)
+ WT_ERR_MSG(session, __wt_getlasterror(),
+ "%s: directory-list: FindFirstFile", pathbuf->data);
- if (INVALID_HANDLE_VALUE == findhandle)
- WT_ERR_MSG(session, __wt_errno(), "%s: FindFirstFile",
- pathbuf->data);
- else {
- do {
+ count = 0;
+ do {
+ /*
+ * Skip . and ..
+ */
+ if (strcmp(finddata.cFileName, ".") == 0 ||
+ strcmp(finddata.cFileName, "..") == 0)
+ continue;
+
+ /* The list of files is optionally filtered by a prefix. */
+ match = false;
+ if (prefix != NULL &&
+ ((LF_ISSET(WT_DIRLIST_INCLUDE) &&
+ WT_PREFIX_MATCH(finddata.cFileName, prefix)) ||
+ (LF_ISSET(WT_DIRLIST_EXCLUDE) &&
+ !WT_PREFIX_MATCH(finddata.cFileName, prefix))))
+ match = true;
+ if (prefix == NULL || match) {
/*
- * Skip . and ..
+ * We have a file name we want to return.
*/
- if (strcmp(finddata.cFileName, ".") == 0 ||
- strcmp(finddata.cFileName, "..") == 0)
- continue;
- match = false;
- if (prefix != NULL &&
- ((LF_ISSET(WT_DIRLIST_INCLUDE) &&
- WT_PREFIX_MATCH(finddata.cFileName, prefix)) ||
- (LF_ISSET(WT_DIRLIST_EXCLUDE) &&
- !WT_PREFIX_MATCH(finddata.cFileName, prefix))))
- match = true;
- if (prefix == NULL || match) {
- /*
- * We have a file name we want to return.
- */
- count++;
- if (count > dirsz) {
- dirsz += WT_DIR_ENTRY;
- WT_ERR(__wt_realloc_def(session,
- &dirallocsz, dirsz, &entries));
- }
- WT_ERR(__wt_strdup(session,
- finddata.cFileName, &entries[count - 1]));
+ count++;
+ if (count > dirsz) {
+ dirsz += WT_DIR_ENTRY;
+ WT_ERR(__wt_realloc_def(session,
+ &dirallocsz, dirsz, &entries));
}
- } while (FindNextFileA(findhandle, &finddata) != 0);
- }
-
+ WT_ERR(__wt_strdup(session,
+ finddata.cFileName, &entries[count - 1]));
+ }
+ } while (FindNextFileA(findhandle, &finddata) != 0);
if (count > 0)
*dirlist = entries;
*countp = count;
-err:
- if (findhandle != INVALID_HANDLE_VALUE)
+err: if (findhandle != INVALID_HANDLE_VALUE)
(void)FindClose(findhandle);
__wt_free(session, path);
__wt_scr_free(session, &pathbuf);
@@ -108,5 +95,7 @@ err:
__wt_free(session, entries);
}
- WT_RET_MSG(session, ret, "dirlist %s prefix %s", dir, prefix);
+ WT_RET_MSG(session, ret,
+ "%s: directory-list, prefix \"%s\"",
+ dir, prefix == NULL ? "" : prefix);
}
diff --git a/src/os_win/os_dlopen.c b/src/os_win/os_dlopen.c
index 0bad39d681d..ce949e4ea5f 100644
--- a/src/os_win/os_dlopen.c
+++ b/src/os_win/os_dlopen.c
@@ -23,18 +23,17 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
/* NULL means load from the current binary */
if (path == NULL) {
- ret = GetModuleHandleExA(0, NULL, (HMODULE *)&dlh->handle);
- if (ret == FALSE)
- WT_ERR_MSG(session,
- __wt_errno(), "GetModuleHandleEx(%s): %s", path, 0);
+ if (GetModuleHandleExA(
+ 0, NULL, (HMODULE *)&dlh->handle) == FALSE) {
+ ret = __wt_getlasterror();
+ WT_ERR_MSG(session, ret,
+ "GetModuleHandleEx(%s): %s", path, 0);
+ }
} else {
// TODO: load dll here
DebugBreak();
}
- /* Windows returns 0 on failure, WT expects 0 on success */
- ret = !ret;
-
*dlhp = dlh;
if (0) {
err: __wt_free(session, dlh->name);
@@ -56,10 +55,9 @@ __wt_dlsym(WT_SESSION_IMPL *session,
*(void **)sym_ret = NULL;
sym = GetProcAddress(dlh->handle, name);
- if (sym == NULL && fail) {
- WT_RET_MSG(session, __wt_errno(),
+ if (sym == NULL && fail)
+ WT_RET_MSG(session, __wt_getlasterror(),
"GetProcAddress(%s in %s)", name, dlh->name);
- }
*(void **)sym_ret = sym;
return (0);
@@ -74,13 +72,11 @@ __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh)
{
WT_DECL_RET;
- if ((ret = FreeLibrary(dlh->handle)) == FALSE) {
- __wt_err(session, __wt_errno(), "FreeLibrary");
+ if (FreeLibrary(dlh->handle) == FALSE) {
+ ret = __wt_getlasterror();
+ __wt_err(session, ret, "FreeLibrary: %s", dlh->name);
}
- /* Windows returns 0 on failure, WT expects 0 on success */
- ret = !ret;
-
__wt_free(session, dlh->name);
__wt_free(session, dlh);
return (ret);
diff --git a/src/os_win/os_errno.c b/src/os_win/os_errno.c
index 590fcdc9d44..f3fffd5ef42 100644
--- a/src/os_win/os_errno.c
+++ b/src/os_win/os_errno.c
@@ -46,13 +46,13 @@ __wt_map_windows_error_to_error(DWORD winerr)
* of failures.
*/
int
-__wt_map_error_rdonly(int winerr)
+__wt_map_error_rdonly(int error)
{
- if (winerr == ERROR_FILE_NOT_FOUND)
+ if (error == ERROR_FILE_NOT_FOUND)
return (WT_NOTFOUND);
- else if (winerr == ERROR_ACCESS_DENIED)
+ else if (error == ERROR_ACCESS_DENIED)
return (WT_PERM_DENIED);
- return (winerr);
+ return (error);
}
/*
@@ -63,14 +63,33 @@ int
__wt_errno(void)
{
/*
+ * Check for 0:
+ * It's easy to introduce a problem by calling the wrong error function,
+ * for example, this function when the MSVC function set the C runtime
+ * error value. Handle gracefully and always return an error.
+ */
+ return (errno == 0 ? WT_ERROR : errno);
+}
+
+/*
+ * __wt_getlasterror --
+ * Return GetLastError, or WT_ERROR if error not set.
+ */
+int
+__wt_getlasterror(void)
+{
+ /*
* Called when we know an error occurred, and we want the system
- * error code, but there's some chance it's not set.
+ * error code.
*/
DWORD err = GetLastError();
- /* GetLastError should only be called if we hit an actual error */
- WT_ASSERT(NULL, err != ERROR_SUCCESS);
-
+ /*
+ * Check for ERROR_SUCCESS:
+ * It's easy to introduce a problem by calling the wrong error function,
+ * for example, this function when the MSVC function set the C runtime
+ * error value. Handle gracefully and always return an error.
+ */
return (err == ERROR_SUCCESS ?
WT_ERROR : __wt_map_windows_error_to_error(err));
}
diff --git a/src/os_win/os_exist.c b/src/os_win/os_exist.c
deleted file mode 100644
index ec1369cc727..00000000000
--- a/src/os_win/os_exist.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_exist --
- * Return if the file exists.
- */
-int
-__wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp)
-{
- WT_DECL_RET;
- char *path;
-
- WT_RET(__wt_filename(session, filename, &path));
-
- ret = GetFileAttributesA(path);
-
- __wt_free(session, path);
-
- if (ret != INVALID_FILE_ATTRIBUTES)
- *existp = true;
- else
- *existp = false;
-
- return (0);
-}
diff --git a/src/os_win/os_fallocate.c b/src/os_win/os_fallocate.c
deleted file mode 100644
index a324687ca73..00000000000
--- a/src/os_win/os_fallocate.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_fallocate_config --
- * Configure fallocate behavior for a file handle.
- */
-void
-__wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_UNUSED(session);
-
- /*
- * fallocate on Windows would be implemented using SetEndOfFile, which
- * can also truncate the file. WiredTiger expects fallocate to ignore
- * requests to truncate the file which Windows does not do, so we don't
- * support the call.
- */
- fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE;
- fh->fallocate_requires_locking = false;
-}
-
-/*
- * __wt_fallocate --
- * Allocate space for a file handle.
- */
-int
-__wt_fallocate(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- WT_UNUSED(session);
- WT_UNUSED(fh);
- WT_UNUSED(offset);
- WT_UNUSED(len);
-
- return (ENOTSUP);
-}
diff --git a/src/os_win/os_filesize.c b/src/os_win/os_filesize.c
deleted file mode 100644
index c9925fb18a8..00000000000
--- a/src/os_win/os_filesize.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_filesize --
- * Get the size of a file in bytes.
- */
-int
-__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
-{
- LARGE_INTEGER size;
- WT_DECL_RET;
-
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "%s: GetFileSizeEx", fh->name));
-
- if ((ret = GetFileSizeEx(fh->filehandle, &size)) != 0) {
- *sizep = size.QuadPart;
- return (0);
- }
-
- WT_RET_MSG(session, __wt_errno(), "%s: GetFileSizeEx", fh->name);
-}
-
-/*
- * __wt_filesize_name --
- * Return the size of a file in bytes, given a file name.
- */
-int
-__wt_filesize_name(WT_SESSION_IMPL *session,
- const char *filename, bool silent, wt_off_t *sizep)
-{
- WIN32_FILE_ATTRIBUTE_DATA data;
- WT_DECL_RET;
- char *path;
-
- WT_RET(__wt_filename(session, filename, &path));
-
- ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data);
-
- __wt_free(session, path);
-
- if (ret != 0) {
- *sizep =
- ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow;
- return (0);
- }
-
- /*
- * Some callers of this function expect failure if the file doesn't
- * exist, and don't want an error message logged.
- */
- ret = __wt_errno();
- if (!silent)
- WT_RET_MSG(session, ret, "%s: GetFileAttributesEx", filename);
- return (ret);
-}
diff --git a/src/os_win/os_flock.c b/src/os_win/os_flock.c
deleted file mode 100644
index 60a981499a5..00000000000
--- a/src/os_win/os_flock.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_bytelock --
- * Lock/unlock a byte in a file.
- */
-int
-__wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock)
-{
- WT_DECL_RET;
-
- /*
- * WiredTiger requires this function be able to acquire locks past
- * the end of file.
- *
- * Note we're using fcntl(2) locking: all fcntl locks associated with a
- * file for a given process are removed when any file descriptor for the
- * file is closed by the process, even if a lock was never requested for
- * that file descriptor.
- *
- * http://msdn.microsoft.com/
- * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx
- *
- * You can lock bytes that are beyond the end of the current file.
- * This is useful to coordinate adding records to the end of a file.
- */
- if (lock) {
- ret = LockFile(fhp->filehandle, UINT32_MAX & byte,
- UINT32_MAX & (byte >> 32), 1, 0);
- } else {
- ret = UnlockFile(fhp->filehandle, UINT32_MAX & byte,
- UINT32_MAX & (byte >> 32), 1, 0);
- }
-
- if (ret == FALSE)
- WT_RET_MSG(NULL, __wt_errno(), "%s: LockFile", fhp->name);
-
- return (0);
-}
diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c
new file mode 100644
index 00000000000..bf8232419e9
--- /dev/null
+++ b/src/os_win/os_fs.c
@@ -0,0 +1,705 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __win_directory_sync --
+ * Flush a directory to ensure a file creation is durable.
+ */
+static int
+__win_directory_sync(WT_SESSION_IMPL *session, const char *path)
+{
+ WT_UNUSED(session);
+ WT_UNUSED(path);
+ return (0);
+}
+
+/*
+ * __win_file_exist --
+ * Return if the file exists.
+ */
+static int
+__win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
+{
+ WT_DECL_RET;
+ char *path;
+
+ WT_RET(__wt_filename(session, name, &path));
+
+ ret = GetFileAttributesA(path);
+
+ __wt_free(session, path);
+
+ if (ret != INVALID_FILE_ATTRIBUTES)
+ *existp = true;
+ else
+ *existp = false;
+
+ return (0);
+}
+
+/*
+ * __win_file_remove --
+ * Remove a file.
+ */
+static int
+__win_file_remove(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_DECL_RET;
+ char *path;
+
+#ifdef HAVE_DIAGNOSTIC
+ if (__wt_handle_search(session, name, false, true, NULL, NULL))
+ WT_RET_MSG(session, EINVAL,
+ "%s: file-remove: file has open handles", name);
+#endif
+
+ WT_RET(__wt_filename(session, name, &path));
+ name = path;
+
+ if (DeleteFileA(name) == FALSE) {
+ ret = __wt_getlasterror();
+ __wt_err(session, ret, "%s: file-remove: DeleteFileA", name);
+ }
+
+ __wt_free(session, path);
+ return (ret);
+}
+
+/*
+ * __win_file_rename --
+ * Rename a file.
+ */
+static int
+__win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+{
+ WT_DECL_RET;
+ char *from_path, *to_path;
+
+#ifdef HAVE_DIAGNOSTIC
+ if (__wt_handle_search(session, from, false, true, NULL, NULL))
+ WT_RET_MSG(session, EINVAL,
+ "%s: file-rename: file has open handles", from);
+ if (__wt_handle_search(session, to, false, true, NULL, NULL))
+ WT_RET_MSG(session, EINVAL,
+ "%s: file-rename: file has open handles", to);
+#endif
+
+ from_path = to_path = NULL;
+ WT_ERR(__wt_filename(session, from, &from_path));
+ from = from_path;
+ WT_ERR(__wt_filename(session, to, &to_path));
+ to = to_path;
+
+ /*
+ * Check if file exists since Windows does not override the file if
+ * it exists.
+ */
+ if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES)
+ if (DeleteFileA(to) == FALSE) {
+ ret = __wt_getlasterror();
+ __wt_err(session, ret,
+ "%s to %s: file-rename: rename", from, to);
+ }
+
+ if (ret == 0 && MoveFileA(from, to) == FALSE) {
+ ret = __wt_getlasterror();
+ __wt_err(session, ret,
+ "%s to %s: file-rename: rename", from, to);
+ }
+
+err: __wt_free(session, from_path);
+ __wt_free(session, to_path);
+ return (ret);
+}
+
+/*
+ * __win_file_size --
+ * Get the size of a file in bytes, by file name.
+ */
+static int
+__win_file_size(
+ WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep)
+{
+ WIN32_FILE_ATTRIBUTE_DATA data;
+ WT_DECL_RET;
+ char *path;
+
+ WT_RET(__wt_filename(session, name, &path));
+
+ ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data);
+
+ __wt_free(session, path);
+
+ if (ret != 0) {
+ *sizep =
+ ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow;
+ return (0);
+ }
+
+ /*
+ * Some callers of this function expect failure if the file doesn't
+ * exist, and don't want an error message logged.
+ */
+ ret = __wt_getlasterror();
+ if (!silent)
+ WT_RET_MSG(session, ret,
+ "%s: file-size: GetFileAttributesEx", name);
+ return (ret);
+}
+
+/*
+ * __win_handle_advise --
+ * MSVC fadvise.
+ */
+static int
+__win_handle_advise(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, wt_off_t len, int advice)
+{
+ WT_UNUSED(session);
+ WT_UNUSED(fh);
+ WT_UNUSED(offset);
+ WT_UNUSED(len);
+ WT_UNUSED(advice);
+
+ /* Quietly fail, callers expect not-supported failures. */
+ return (ENOTSUP);
+}
+
+/*
+ * __win_handle_allocate_configure --
+ * Configure fallocate behavior for a file handle.
+ */
+static void
+__win_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh)
+{
+ WT_UNUSED(session);
+
+ /*
+ * fallocate on Windows would be implemented using SetEndOfFile, which
+ * can also truncate the file. WiredTiger expects fallocate to ignore
+ * requests to truncate the file which Windows does not do, so we don't
+ * support the call.
+ */
+ fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE;
+ fh->fallocate_requires_locking = false;
+}
+
+/*
+ * __win_handle_allocate --
+ * Allocate space for a file handle.
+ */
+static int
+__win_handle_allocate(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
+{
+ WT_UNUSED(session);
+ WT_UNUSED(fh);
+ WT_UNUSED(offset);
+ WT_UNUSED(len);
+
+ WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name);
+ return (ENOTSUP);
+}
+
+/*
+ * __win_handle_close --
+ * Close a file handle.
+ */
+static int
+__win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh)
+{
+ WT_DECL_RET;
+
+ if (fh->fp == NULL) {
+ /*
+ * We don't open Windows system handles when opening directories
+ * for flushing, as it is not necessary (or possible) to flush
+ * a directory on Windows. Confirm the file handle is set before
+ * attempting to close it.
+ */
+ if (fh->filehandle != INVALID_HANDLE_VALUE &&
+ CloseHandle(fh->filehandle) == 0) {
+ ret = __wt_getlasterror();
+ __wt_err(session, ret,
+ "%s: handle-close: CloseHandle", fh->name);
+ }
+ } else {
+ /* If the stream was opened for writing, flush the file. */
+ if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) {
+ ret = __wt_errno();
+ __wt_err(session,
+ ret, "%s: handle-close: fflush", fh->name);
+ }
+
+ /* Close the file, closing all the underlying handles. */
+ if (fclose(fh->fp) != 0) {
+ ret = __wt_errno();
+ __wt_err(session,
+ ret, "%s: handle-close: fclose", fh->name);
+ }
+ }
+
+ /* Close the secondary handle. */
+ if (fh->filehandle_secondary != INVALID_HANDLE_VALUE &&
+ CloseHandle(fh->filehandle_secondary) == 0) {
+ ret = __wt_getlasterror();
+ __wt_err(session, ret,
+ "%s: handle-close: secondary: CloseHandle", fh->name);
+ }
+ return (ret);
+}
+
+/*
+ * __win_handle_getc --
+ * ANSI C fgetc.
+ */
+static int
+__win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp)
+{
+ if (fh->fp == NULL)
+ WT_RET_MSG(session,
+ ENOTSUP, "%s: handle-getc: no stream configured", fh->name);
+
+ *chp = fgetc(fh->fp);
+ if (*chp != EOF || !ferror(fh->fp))
+ return (0);
+ WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name);
+}
+
+/*
+ * __win_handle_lock --
+ * Lock/unlock a file.
+ */
+static int
+__win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
+{
+ WT_DECL_RET;
+
+ /*
+ * WiredTiger requires this function be able to acquire locks past
+ * the end of file.
+ *
+ * Note we're using fcntl(2) locking: all fcntl locks associated with a
+ * file for a given process are removed when any file descriptor for the
+ * file is closed by the process, even if a lock was never requested for
+ * that file descriptor.
+ *
+ * http://msdn.microsoft.com/
+ * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx
+ *
+ * You can lock bytes that are beyond the end of the current file.
+ * This is useful to coordinate adding records to the end of a file.
+ */
+ if (lock) {
+ if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) {
+ ret = __wt_getlasterror();
+ __wt_err(session, ret,
+ "%s: handle-lock: LockFile", fh->name);
+ }
+ } else
+ if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) {
+ ret = __wt_getlasterror();
+ __wt_err(session, ret,
+ "%s: handle-lock: UnlockFile", fh->name);
+ }
+ return (ret);
+}
+
+/*
+ * __win_handle_printf --
+ * ANSI C vfprintf.
+ */
+static int
+__win_handle_printf(
+ WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap)
+{
+ if (fh->fp == NULL)
+ WT_RET_MSG(session, ENOTSUP,
+ "%s: vfprintf: no stream configured", fh->name);
+
+ if (vfprintf(fh->fp, fmt, ap) >= 0)
+ return (0);
+ WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name);
+}
+
+/*
+ * __win_handle_read --
+ * Read a chunk.
+ */
+static int
+__win_handle_read(
+ WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+{
+ DWORD chunk, nr;
+ uint8_t *addr;
+ OVERLAPPED overlapped = { 0 };
+
+ nr = 0;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(session,
+ !fh->direct_io ||
+ S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf &
+ (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment &&
+ len % S2C(session)->buffer_alignment == 0));
+
+ /* Break reads larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
+ chunk = (DWORD)WT_MIN(len, WT_GIGABYTE);
+ overlapped.Offset = UINT32_MAX & offset;
+ overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
+
+ if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped))
+ WT_RET_MSG(session,
+ nr == 0 ? WT_ERROR : __wt_getlasterror(),
+ "%s: handle-read: ReadFile: failed to read %lu "
+ "bytes at offset %" PRIuMAX,
+ fh->name, chunk, (uintmax_t)offset);
+ }
+ return (0);
+}
+
+/*
+ * __win_handle_size --
+ * Get the size of a file in bytes, by file handle.
+ */
+static int
+__win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+{
+ LARGE_INTEGER size;
+
+ if (GetFileSizeEx(fh->filehandle, &size) != 0) {
+ *sizep = size.QuadPart;
+ return (0);
+ }
+
+ WT_RET_MSG(session,
+ __wt_getlasterror(), "%s: handle-size: GetFileSizeEx", fh->name);
+}
+
+/*
+ * __win_handle_sync --
+ * MSVC fflush/fsync.
+ */
+static int
+__win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+{
+ WT_DECL_RET;
+
+ /*
+ * We don't open Windows system handles when opening directories
+ * for flushing, as it is not necessary (or possible) to flush
+ * a directory on Windows. Confirm the file handle is set before
+ * attempting to sync it.
+ */
+ if (fh->fp == NULL && fh->filehandle == INVALID_HANDLE_VALUE)
+ return (0);
+
+ if (fh->fp == NULL) {
+ /*
+ * Callers attempting asynchronous flush handle ENOTSUP returns,
+ * and won't make further attempts.
+ */
+ if (!block)
+ return (ENOTSUP);
+
+ if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE)
+ WT_RET_MSG(session, __wt_getlasterror(),
+ "%s handle-sync: FlushFileBuffers error", fh->name);
+ return (0);
+ }
+
+ if (fflush(fh->fp) == 0)
+ return (0);
+ WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name);
+}
+
+/*
+ * __win_handle_truncate --
+ * Truncate a file.
+ */
+static int
+__win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+{
+ WT_DECL_RET;
+ LARGE_INTEGER largeint;
+
+ largeint.QuadPart = len;
+
+ if (fh->filehandle_secondary == INVALID_HANDLE_VALUE)
+ WT_RET_MSG(session, EINVAL,
+ "%s: handle-truncate: read-only", fh->name);
+
+ if (SetFilePointerEx(
+ fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE)
+ WT_RET_MSG(session, __wt_getlasterror(),
+ "%s: handle-truncate: SetFilePointerEx", fh->name);
+
+ if (SetEndOfFile(fh->filehandle_secondary) == FALSE) {
+ if (GetLastError() == ERROR_USER_MAPPED_FILE)
+ return (EBUSY);
+ WT_RET_MSG(session, __wt_getlasterror(),
+ "%s: handle-truncate: SetEndOfFile error", fh->name);
+ }
+ return (0);
+}
+
+/*
+ * __win_handle_write --
+ * Write a chunk.
+ */
+static int
+__win_handle_write(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+{
+ DWORD chunk;
+ DWORD nw;
+ const uint8_t *addr;
+ OVERLAPPED overlapped = { 0 };
+
+ nw = 0;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(session,
+ !fh->direct_io ||
+ S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf &
+ (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment &&
+ len % S2C(session)->buffer_alignment == 0));
+
+ /* Break writes larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
+ chunk = (DWORD)WT_MIN(len, WT_GIGABYTE);
+ overlapped.Offset = UINT32_MAX & offset;
+ overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
+
+ if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped))
+ WT_RET_MSG(session, __wt_getlasterror(),
+ "%s: handle-write: WriteFile: failed to write %lu "
+ "bytes at offset %" PRIuMAX,
+ fh->name, chunk, (uintmax_t)offset);
+ }
+ return (0);
+}
+
+/*
+ * __win_handle_open --
+ * Open a file handle.
+ */
+static int
+__win_handle_open(WT_SESSION_IMPL *session,
+ WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags)
+{
+ DWORD dwCreationDisposition;
+ HANDLE filehandle, filehandle_secondary;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ int desired_access, f, fd;
+ bool direct_io;
+ const char *stream_mode;
+
+ conn = S2C(session);
+ direct_io = false;
+
+ /* Set up error handling. */
+ fh->filehandle = fh->filehandle_secondary =
+ filehandle = filehandle_secondary = INVALID_HANDLE_VALUE;
+ fh->fp = NULL;
+
+ /*
+ * Opening a file handle on a directory is only to support filesystems
+ * that require a directory sync for durability, and Windows doesn't
+ * require that functionality: create an empty WT_FH structure with
+ * invalid handles.
+ */
+ if (file_type == WT_FILE_TYPE_DIRECTORY)
+ goto directory_open;
+
+ desired_access = GENERIC_READ;
+ if (!LF_ISSET(WT_OPEN_READONLY))
+ desired_access |= GENERIC_WRITE;
+
+ /*
+ * Security:
+ * The application may spawn a new process, and we don't want another
+ * process to have access to our file handles.
+ *
+ * TODO: Set tighter file permissions but set bInheritHandle to false
+ * to prevent inheritance
+ */
+ f = FILE_ATTRIBUTE_NORMAL;
+
+ dwCreationDisposition = 0;
+ if (LF_ISSET(WT_OPEN_CREATE)) {
+ dwCreationDisposition = CREATE_NEW;
+ if (LF_ISSET(WT_OPEN_EXCLUSIVE))
+ dwCreationDisposition = CREATE_ALWAYS;
+ } else
+ dwCreationDisposition = OPEN_EXISTING;
+
+ /*
+ * direct_io means no OS file caching. This requires aligned buffer
+ * allocations like O_DIRECT.
+ */
+ if (FLD_ISSET(conn->direct_io, file_type) ||
+ (LF_ISSET(WT_OPEN_READONLY) &&
+ file_type == WT_FILE_TYPE_DATA &&
+ FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) {
+ f |= FILE_FLAG_NO_BUFFERING;
+ direct_io = true;
+ }
+ fh->direct_io = direct_io;
+
+ /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */
+ if (FLD_ISSET(conn->write_through, file_type))
+ f |= FILE_FLAG_WRITE_THROUGH;
+
+ if (file_type == WT_FILE_TYPE_LOG &&
+ FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
+ f |= FILE_FLAG_WRITE_THROUGH;
+
+ /* Disable read-ahead on trees: it slows down random read workloads. */
+ if (file_type == WT_FILE_TYPE_DATA)
+ f |= FILE_FLAG_RANDOM_ACCESS;
+
+ filehandle = CreateFileA(name, desired_access,
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL, dwCreationDisposition, f, NULL);
+ if (filehandle == INVALID_HANDLE_VALUE) {
+ if (LF_ISSET(WT_OPEN_CREATE) &&
+ GetLastError() == ERROR_FILE_EXISTS)
+ filehandle = CreateFileA(name, desired_access,
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL, OPEN_EXISTING, f, NULL);
+ if (filehandle == INVALID_HANDLE_VALUE)
+ WT_ERR_MSG(session, __wt_getlasterror(),
+ direct_io ?
+ "%s: handle-open: CreateFileA: failed with direct "
+ "I/O configured, some filesystem types do not "
+ "support direct I/O" :
+ "%s: handle-open: CreateFileA", name);
+ }
+
+ /*
+ * Open a second handle to file to support allocation/truncation
+ * concurrently with reads on the file. Writes would also move the file
+ * pointer.
+ */
+ if (!LF_ISSET(WT_OPEN_READONLY)) {
+ filehandle_secondary = CreateFileA(name, desired_access,
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL, OPEN_EXISTING, f, NULL);
+ if (filehandle_secondary == INVALID_HANDLE_VALUE)
+ WT_ERR_MSG(session, __wt_getlasterror(),
+ "%s: handle-open: CreateFileA: secondary", name);
+ }
+
+ /* Optionally configure a stdio stream API. */
+ switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) {
+ case WT_STREAM_APPEND:
+ f = _O_APPEND | _O_TEXT;
+ stream_mode = "a";
+ F_SET(fh, WT_FH_FLUSH_ON_CLOSE);
+ break;
+ case WT_STREAM_READ:
+ f = _O_RDONLY | _O_TEXT;
+ stream_mode = "r";
+ break;
+ case WT_STREAM_WRITE:
+ f = _O_TEXT;
+ stream_mode = "w";
+ F_SET(fh, WT_FH_FLUSH_ON_CLOSE);
+ break;
+ case 0:
+ default:
+ stream_mode = NULL;
+ break;
+ }
+ if (stream_mode != NULL) {
+ if ((fd = _open_osfhandle((intptr_t)filehandle, f)) == -1)
+ WT_ERR_MSG(session, __wt_errno(),
+ "%s: handle-open: _open_osfhandle", name);
+ if ((fh->fp = fdopen(fd, stream_mode)) == NULL)
+ WT_ERR_MSG(session, __wt_errno(),
+ "%s: handle-open: fdopen", name);
+ if (LF_ISSET(WT_STREAM_LINE_BUFFER))
+ __wt_stream_set_line_buffer(fh->fp);
+ }
+
+ /* Configure fallocate/posix_fallocate calls. */
+ __win_handle_allocate_configure(session, fh);
+
+directory_open:
+ fh->filehandle = filehandle;
+ fh->filehandle_secondary = filehandle_secondary;
+
+ fh->fh_advise = __win_handle_advise;
+ fh->fh_allocate = __win_handle_allocate;
+ fh->fh_close = __win_handle_close;
+ fh->fh_getc = __win_handle_getc;
+ fh->fh_lock = __win_handle_lock;
+ fh->fh_map = __wt_win_map;
+ fh->fh_map_discard = __wt_win_map_discard;
+ fh->fh_map_preload = __wt_win_map_preload;
+ fh->fh_map_unmap = __wt_win_map_unmap;
+ fh->fh_printf = __win_handle_printf;
+ fh->fh_read = __win_handle_read;
+ fh->fh_size = __win_handle_size;
+ fh->fh_sync = __win_handle_sync;
+ fh->fh_truncate = __win_handle_truncate;
+ fh->fh_write = __win_handle_write;
+
+ return (0);
+
+err: if (filehandle != INVALID_HANDLE_VALUE)
+ (void)CloseHandle(filehandle);
+ if (filehandle_secondary != INVALID_HANDLE_VALUE)
+ (void)CloseHandle(filehandle_secondary);
+
+ return (ret);
+}
+
+/*
+ * __wt_os_win --
+ * Initialize a MSVC configuration.
+ */
+int
+__wt_os_win(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /* Initialize the POSIX jump table. */
+ conn->file_directory_list = __wt_win_directory_list;
+ conn->file_directory_sync = __win_directory_sync;
+ conn->file_exist = __win_file_exist;
+ conn->file_remove = __win_file_remove;
+ conn->file_rename = __win_file_rename;
+ conn->file_size = __win_file_size;
+ conn->handle_open = __win_handle_open;
+
+ return (0);
+}
+
+/*
+ * __wt_os_win_cleanup --
+ * Discard a POSIX configuration.
+ */
+int
+__wt_os_win_cleanup(WT_SESSION_IMPL *session)
+{
+ WT_UNUSED(session);
+
+ return (0);
+}
diff --git a/src/os_win/os_fsync.c b/src/os_win/os_fsync.c
deleted file mode 100644
index c196fc6c06a..00000000000
--- a/src/os_win/os_fsync.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_directory_sync_fh --
- * Flush a directory file handle.
- */
-int
-__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- WT_UNUSED(session);
- WT_UNUSED(fh);
- return (0);
-}
-
-/*
- * __wt_directory_sync --
- * Flush a directory to ensure a file creation is durable.
- */
-int
-__wt_directory_sync(WT_SESSION_IMPL *session, const char *path)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- WT_UNUSED(session);
- WT_UNUSED(path);
- return (0);
-}
-
-/*
- * __wt_fsync --
- * Flush a file handle.
- */
-int
-__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_DECL_RET;
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: FlushFileBuffers",
- fh->name));
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
- WT_STRING_MATCH(fh->name, WT_SINGLETHREAD,
- strlen(WT_SINGLETHREAD)));
- if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE)
- WT_RET_MSG(session,
- __wt_errno(), "%s FlushFileBuffers error", fh->name);
-
- return (0);
-}
-
-/*
- * __wt_fsync_async --
- * Flush a file handle and don't wait for the result.
- */
-int
-__wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh)
-{
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- WT_UNUSED(session);
- WT_UNUSED(fh);
-
- return (0);
-}
diff --git a/src/os_win/os_ftruncate.c b/src/os_win/os_ftruncate.c
deleted file mode 100644
index 88fcf9542c1..00000000000
--- a/src/os_win/os_ftruncate.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_ftruncate --
- * Truncate a file.
- */
-int
-__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
-{
- WT_DECL_RET;
- LARGE_INTEGER largeint;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- largeint.QuadPart = len;
-
- if ((ret = SetFilePointerEx(
- fh->filehandle_secondary, largeint, NULL, FILE_BEGIN)) == FALSE)
- WT_RET_MSG(session, __wt_errno(), "%s SetFilePointerEx error",
- fh->name);
-
- ret = SetEndOfFile(fh->filehandle_secondary);
- if (ret != FALSE)
- return (0);
-
- if (GetLastError() == ERROR_USER_MAPPED_FILE)
- return (EBUSY);
-
- WT_RET_MSG(session, __wt_errno(), "%s SetEndOfFile error", fh->name);
-}
diff --git a/src/os_win/os_getenv.c b/src/os_win/os_getenv.c
index c9084769cd5..9b297ac3a74 100644
--- a/src/os_win/os_getenv.c
+++ b/src/os_win/os_getenv.c
@@ -29,7 +29,7 @@ __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp)
ret = GetEnvironmentVariableA(variable, *envp, size);
/* We expect the number of bytes not including nul terminator. */
if ((ret + 1) != size)
- WT_RET_MSG(session, __wt_errno(),
+ WT_RET_MSG(session, __wt_getlasterror(),
"GetEnvironmentVariableA failed: %s", variable);
return (0);
diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c
index dc040b4fa54..b043f9c9923 100644
--- a/src/os_win/os_map.c
+++ b/src/os_win/os_map.c
@@ -9,102 +9,110 @@
#include "wt_internal.h"
/*
- * __wt_mmap --
+ * __wt_win_map --
* Map a file into memory.
*/
int
-__wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp,
- void** mappingcookie)
+__wt_win_map(WT_SESSION_IMPL *session,
+ WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie)
{
+ WT_DECL_RET;
+ size_t len;
+ wt_off_t file_size;
void *map;
- size_t orig_size;
/*
- * Record the current size and only map and set that as the length, it
- * could change between the map call and when we set the return length.
- * For the same reason we could actually map past the end of the file;
- * we don't read bytes past the end of the file though, so as long as
- * the map call succeeds, it's all OK.
+ * There's no locking here to prevent the underlying file from changing
+ * underneath us, our caller needs to ensure consistency of the mapped
+ * region vs. any other file activity.
*/
- orig_size = (size_t)fh->size;
+ WT_RET(__wt_filesize(session, fh, &file_size));
+ len = (size_t)file_size;
+
+ (void)__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len);
+
*mappingcookie =
CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL);
if (*mappingcookie == NULL)
- WT_RET_MSG(session, __wt_errno(),
- "%s CreateFileMapping error: failed to map %"
- WT_SIZET_FMT " bytes",
- fh->name, orig_size);
+ WT_RET_MSG(session, __wt_getlasterror(),
+ "%s: memory-map: CreateFileMappingA", fh->name);
- if ((map = MapViewOfFile(
- *mappingcookie, FILE_MAP_READ, 0, 0, orig_size)) == NULL) {
+ if ((map =
+ MapViewOfFile(*mappingcookie, FILE_MAP_READ, 0, 0, len)) == NULL) {
+ /* Retrieve the error before cleaning up. */
+ ret = __wt_getlasterror();
CloseHandle(*mappingcookie);
*mappingcookie = NULL;
- WT_RET_MSG(session, __wt_errno(),
- "%s map error: failed to map %" WT_SIZET_FMT " bytes",
- fh->name, orig_size);
+ WT_RET_MSG(session, ret,
+ "%s: memory-map: MapViewOfFile", fh->name);
}
- (void)__wt_verbose(session, WT_VERB_FILEOPS,
- "%s: MapViewOfFile %p: %" WT_SIZET_FMT " bytes",
- fh->name, map, orig_size);
*(void **)mapp = map;
- *lenp = orig_size;
+ *lenp = len;
return (0);
}
/*
- * __wt_mmap_preload --
+ * __wt_win_map_preload --
* Cause a section of a memory map to be faulted in.
*/
int
-__wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size)
+__wt_win_map_preload(
+ WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size)
{
WT_UNUSED(session);
+ WT_UNUSED(fh);
WT_UNUSED(p);
WT_UNUSED(size);
- return (0);
+ return (ENOTSUP);
}
/*
- * __wt_mmap_discard --
+ * __wt_win_map_discard --
* Discard a chunk of the memory map.
*/
int
-__wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size)
+__wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size)
{
WT_UNUSED(session);
+ WT_UNUSED(fh);
WT_UNUSED(p);
WT_UNUSED(size);
- return (0);
+
+ return (ENOTSUP);
}
/*
- * __wt_munmap --
+ * __wt_win_map_unmap --
* Remove a memory mapping.
*/
int
-__wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len,
- void** mappingcookie)
+__wt_win_map_unmap(WT_SESSION_IMPL *session,
+ WT_FH *fh, void *map, size_t len, void **mappingcookie)
{
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS,
- "%s: UnmapViewOfFile %p: %" WT_SIZET_FMT " bytes",
- fh->name, map, len));
+ WT_DECL_RET;
+
+ (void)__wt_verbose(session, WT_VERB_HANDLEOPS,
+ "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len);
+
+ WT_ASSERT(session, *mappingcookie != NULL);
if (UnmapViewOfFile(map) == 0) {
- WT_RET_MSG(session, __wt_errno(),
- "%s UnmapViewOfFile error: failed to unmap %" WT_SIZET_FMT
- " bytes",
- fh->name, len);
+ ret = __wt_getlasterror();
+ __wt_err(session, ret,
+ "%s: memory-unmap: UnmapViewOfFile", fh->name);
}
if (CloseHandle(*mappingcookie) == 0) {
- WT_RET_MSG(session, __wt_errno(),
- "CloseHandle: MapViewOfFile: %s", fh->name);
+ ret = __wt_getlasterror();
+ __wt_err(session, ret,
+ "%s: memory-unmap: CloseHandle", fh->name);
}
- *mappingcookie = 0;
+ *mappingcookie = NULL;
- return (0);
+ return (ret);
}
diff --git a/src/os_win/os_mtx_cond.c b/src/os_win/os_mtx_cond.c
index 14bac2a99d9..af4a5035076 100644
--- a/src/os_win/os_mtx_cond.c
+++ b/src/os_win/os_mtx_cond.c
@@ -103,7 +103,7 @@ __wt_cond_wait_signal(
if ((err = GetLastError()) == ERROR_TIMEOUT)
*signalled = false;
else
- ret = __wt_errno();
+ ret = __wt_getlasterror();
} else
ret = 0;
diff --git a/src/os_win/os_once.c b/src/os_win/os_once.c
index 9ea3fe044eb..347d1883cca 100644
--- a/src/os_win/os_once.c
+++ b/src/os_win/os_once.c
@@ -32,7 +32,7 @@ BOOL CALLBACK _wt_init_once_callback(
* One-time initialization per process.
*/
int
-__wt_once(void(*init_routine)(void))
+__wt_once(void (*init_routine)(void))
{
INIT_ONCE once_control = INIT_ONCE_STATIC_INIT;
PVOID lpContext = NULL;
diff --git a/src/os_win/os_open.c b/src/os_win/os_open.c
deleted file mode 100644
index f10582c5bd1..00000000000
--- a/src/os_win/os_open.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_open --
- * Open a file handle.
- */
-int
-__wt_open(WT_SESSION_IMPL *session,
- const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp)
-{
- DWORD dwCreationDisposition;
- HANDLE filehandle, filehandle_secondary;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *fh, *tfh;
- uint64_t bucket, hash;
- int f, share_mode;
- bool direct_io, matched;
- char *path;
-
- conn = S2C(session);
- fh = NULL;
- path = NULL;
- filehandle = INVALID_HANDLE_VALUE;
- filehandle_secondary = INVALID_HANDLE_VALUE;
- direct_io = false;
- hash = __wt_hash_city64(name, strlen(name));
- bucket = hash % WT_HASH_ARRAY_SIZE;
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name));
-
- /* Increment the reference count if we already have the file open. */
- matched = false;
- __wt_spin_lock(session, &conn->fh_lock);
- TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq)
- if (strcmp(name, tfh->name) == 0) {
- ++tfh->ref;
- *fhp = tfh;
- matched = true;
- break;
- }
- __wt_spin_unlock(session, &conn->fh_lock);
- if (matched)
- return (0);
-
- /* For directories, create empty file handles with invalid handles */
- if (dio_type == WT_FILE_TYPE_DIRECTORY) {
- goto setupfh;
- }
-
- WT_RET(__wt_filename(session, name, &path));
-
- /*
- * If this is a read-only connection, open all files read-only
- * except the lock file.
- */
- if (F_ISSET(conn, WT_CONN_READONLY) &&
- !WT_STRING_MATCH(name, WT_SINGLETHREAD,
- strlen(WT_SINGLETHREAD)))
- share_mode = FILE_SHARE_READ;
- else
- share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
-
- /*
- * Security:
- * The application may spawn a new process, and we don't want another
- * process to have access to our file handles.
- *
- * TODO: Set tighter file permissions but set bInheritHandle to false
- * to prevent inheritance
- */
-
- f = FILE_ATTRIBUTE_NORMAL;
-
- dwCreationDisposition = 0;
- if (ok_create) {
- WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) ||
- WT_STRING_MATCH(name, WT_SINGLETHREAD,
- strlen(WT_SINGLETHREAD)));
- dwCreationDisposition = CREATE_NEW;
- if (exclusive)
- dwCreationDisposition = CREATE_ALWAYS;
- } else
- dwCreationDisposition = OPEN_EXISTING;
-
- /*
- * direct_io means no OS file caching. This requires aligned buffer
- * allocations like O_DIRECT.
- */
- if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) {
- f |= FILE_FLAG_NO_BUFFERING;
- direct_io = true;
- }
-
- /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */
- if (dio_type && FLD_ISSET(conn->write_through, dio_type)) {
- f |= FILE_FLAG_WRITE_THROUGH;
- }
-
- if (dio_type == WT_FILE_TYPE_LOG &&
- FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
- f |= FILE_FLAG_WRITE_THROUGH;
- }
-
- /* Disable read-ahead on trees: it slows down random read workloads. */
- if (dio_type == WT_FILE_TYPE_DATA ||
- dio_type == WT_FILE_TYPE_CHECKPOINT)
- f |= FILE_FLAG_RANDOM_ACCESS;
-
- filehandle = CreateFileA(path,
- (GENERIC_READ | GENERIC_WRITE),
- share_mode,
- NULL,
- dwCreationDisposition,
- f,
- NULL);
- if (filehandle == INVALID_HANDLE_VALUE) {
- if (GetLastError() == ERROR_FILE_EXISTS && ok_create)
- filehandle = CreateFileA(path,
- (GENERIC_READ | GENERIC_WRITE),
- share_mode,
- NULL,
- OPEN_EXISTING,
- f,
- NULL);
-
- if (filehandle == INVALID_HANDLE_VALUE)
- WT_ERR_MSG(session, __wt_errno(),
- direct_io ?
- "%s: open failed with direct I/O configured, some "
- "filesystem types do not support direct I/O" :
- "%s", path);
- }
-
- /*
- * Open a second handle to file to support allocation/truncation
- * concurrently with reads on the file. Writes would also move the file
- * pointer.
- */
- filehandle_secondary = CreateFileA(path,
- (GENERIC_READ | GENERIC_WRITE),
- share_mode,
- NULL,
- OPEN_EXISTING,
- f,
- NULL);
- if (filehandle == INVALID_HANDLE_VALUE)
- WT_ERR_MSG(session, __wt_errno(),
- "open failed for secondary handle: %s", path);
-
-setupfh:
- WT_ERR(__wt_calloc_one(session, &fh));
- WT_ERR(__wt_strdup(session, name, &fh->name));
- fh->name_hash = hash;
- fh->filehandle = filehandle;
- fh->filehandle_secondary = filehandle_secondary;
- fh->ref = 1;
- fh->direct_io = direct_io;
-
- /* Set the file's size. */
- if (dio_type != WT_FILE_TYPE_DIRECTORY)
- WT_ERR(__wt_filesize(session, fh, &fh->size));
-
- /* Configure file extension. */
- if (dio_type == WT_FILE_TYPE_DATA ||
- dio_type == WT_FILE_TYPE_CHECKPOINT)
- fh->extend_len = conn->data_extend_len;
-
- /* Configure fallocate/posix_fallocate calls. */
- __wt_fallocate_config(session, fh);
-
- /*
- * Repeat the check for a match, but then link onto the database's list
- * of files.
- */
- matched = false;
- __wt_spin_lock(session, &conn->fh_lock);
- TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq)
- if (strcmp(name, tfh->name) == 0) {
- ++tfh->ref;
- *fhp = tfh;
- matched = true;
- break;
- }
- if (!matched) {
- WT_CONN_FILE_INSERT(conn, fh, bucket);
- (void)__wt_atomic_add32(&conn->open_file_count, 1);
-
- *fhp = fh;
- }
- __wt_spin_unlock(session, &conn->fh_lock);
- if (matched) {
-err: if (fh != NULL) {
- __wt_free(session, fh->name);
- __wt_free(session, fh);
- }
- if (filehandle != INVALID_HANDLE_VALUE)
- (void)CloseHandle(filehandle);
- if (filehandle_secondary != INVALID_HANDLE_VALUE)
- (void)CloseHandle(filehandle_secondary);
- }
-
- __wt_free(session, path);
- return (ret);
-}
-
-/*
- * __wt_close --
- * Close a file handle.
- */
-int
-__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *fh;
- uint64_t bucket;
-
- conn = S2C(session);
-
- if (*fhp == NULL)
- return (0);
- fh = *fhp;
- *fhp = NULL;
-
- __wt_spin_lock(session, &conn->fh_lock);
- if (fh == NULL || fh->ref == 0 || --fh->ref > 0) {
- __wt_spin_unlock(session, &conn->fh_lock);
- return (0);
- }
-
- /* Remove from the list. */
- bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
- WT_CONN_FILE_REMOVE(conn, fh, bucket);
- (void)__wt_atomic_sub32(&conn->open_file_count, 1);
-
- __wt_spin_unlock(session, &conn->fh_lock);
-
- /* Discard the memory.
- * Note: For directories, we do not open valid directory handles on
- * windows since it is not possible to sync a directory
- */
- if (fh->filehandle != INVALID_HANDLE_VALUE &&
- CloseHandle(fh->filehandle) == 0) {
- ret = __wt_errno();
- __wt_err(session, ret, "CloseHandle: %s", fh->name);
- }
-
- if (fh->filehandle_secondary != INVALID_HANDLE_VALUE &&
- CloseHandle(fh->filehandle_secondary) == 0) {
- ret = __wt_errno();
- __wt_err(session, ret, "CloseHandle: secondary: %s", fh->name);
- }
-
- __wt_free(session, fh->name);
- __wt_free(session, fh);
- return (ret);
-}
diff --git a/src/os_win/os_remove.c b/src/os_win/os_remove.c
deleted file mode 100644
index 84f1dd86674..00000000000
--- a/src/os_win/os_remove.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __remove_file_check --
- * Check if the file is currently open before removing it.
- */
-static inline void
-__remove_file_check(WT_SESSION_IMPL *session, const char *name)
-{
-#ifdef HAVE_DIAGNOSTIC
- WT_CONNECTION_IMPL *conn;
- WT_FH *fh;
- uint64_t bucket;
-
- conn = S2C(session);
- WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY));
- fh = NULL;
- bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE;
-
- /*
- * Check if the file is open: it's an error if it is, since a higher
- * level should have closed it before removing.
- */
- __wt_spin_lock(session, &conn->fh_lock);
- TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
- if (strcmp(name, fh->name) == 0)
- break;
- __wt_spin_unlock(session, &conn->fh_lock);
-
- WT_ASSERT(session, fh == NULL);
-#else
- WT_UNUSED(session);
- WT_UNUSED(name);
-#endif
-}
-
-/*
- * __wt_remove --
- * Remove a file.
- */
-int
-__wt_remove(WT_SESSION_IMPL *session, const char *name)
-{
- WT_DECL_RET;
- char *path;
- uint32_t lasterror;
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: remove", name));
-
- __remove_file_check(session, name);
-
- WT_RET(__wt_filename(session, name, &path));
-
- if ((ret = DeleteFileA(path)) == FALSE)
- lasterror = __wt_errno();
-
- __wt_free(session, path);
-
- if (ret != FALSE)
- return (0);
-
- WT_RET_MSG(session, lasterror, "%s: remove", name);
-}
diff --git a/src/os_win/os_rename.c b/src/os_win/os_rename.c
deleted file mode 100644
index b4be2dba24c..00000000000
--- a/src/os_win/os_rename.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_rename --
- * Rename a file.
- */
-int
-__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
-{
- WT_DECL_RET;
- uint32_t lasterror;
- char *from_path, *to_path;
-
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "rename %s to %s", from, to));
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- from_path = to_path = NULL;
-
- WT_RET(__wt_filename(session, from, &from_path));
- WT_TRET(__wt_filename(session, to, &to_path));
-
- /*
- * Check if file exists since Windows does not override the file if
- * it exists.
- */
- if ((ret = GetFileAttributesA(to_path)) != INVALID_FILE_ATTRIBUTES) {
- if ((ret = DeleteFileA(to_path)) == FALSE) {
- lasterror = __wt_errno();
- goto err;
- }
- }
-
- if ((MoveFileA(from_path, to_path)) == FALSE)
- lasterror = __wt_errno();
-
-err:
- __wt_free(session, from_path);
- __wt_free(session, to_path);
-
- if (ret != FALSE)
- return (0);
-
- WT_RET_MSG(session, lasterror, "MoveFile %s to %s", from, to);
-}
diff --git a/src/os_win/os_rw.c b/src/os_win/os_rw.c
deleted file mode 100644
index a9537a648f9..00000000000
--- a/src/os_win/os_rw.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_read --
- * Read a chunk.
- */
-int
-__wt_read(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
-{
- DWORD chunk;
- DWORD nr;
- uint8_t *addr;
- OVERLAPPED overlapped = { 0 };
-
- nr = 0;
-
- WT_STAT_FAST_CONN_INCR(session, read_io);
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS,
- "%s: read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- fh->name, len, (uintmax_t)offset));
-
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !fh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break reads larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
- chunk = (DWORD)WT_MIN(len, WT_GIGABYTE);
- overlapped.Offset = UINT32_MAX & offset;
- overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
-
- if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped))
- WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(),
- "%s read error: failed to read %" WT_SIZET_FMT
- " bytes at offset %" PRIuMAX,
- fh->name, chunk, (uintmax_t)offset);
- }
- return (0);
-}
-
-/*
- * __wt_write --
- * Write a chunk.
- */
-int
-__wt_write(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
-{
- DWORD chunk;
- DWORD nw;
- const uint8_t *addr;
- OVERLAPPED overlapped = { 0 };
-
- nw = 0;
-
- WT_STAT_FAST_CONN_INCR(session, write_io);
-
- WT_RET(__wt_verbose(session, WT_VERB_FILEOPS,
- "%s: write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- fh->name, len, (uintmax_t)offset));
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
- WT_STRING_MATCH(fh->name, WT_SINGLETHREAD,
- strlen(WT_SINGLETHREAD)));
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !fh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break writes larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
- chunk = (DWORD)WT_MIN(len, WT_GIGABYTE);
- overlapped.Offset = UINT32_MAX & offset;
- overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
-
- if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped))
- WT_RET_MSG(session, __wt_errno(),
- "%s write error: failed to write %" WT_SIZET_FMT
- " bytes at offset %" PRIuMAX,
- fh->name, chunk, (uintmax_t)offset);
- }
- return (0);
-}
diff --git a/src/os_win/os_setvbuf.c b/src/os_win/os_setvbuf.c
new file mode 100644
index 00000000000..b38ab1ebee2
--- /dev/null
+++ b/src/os_win/os_setvbuf.c
@@ -0,0 +1,38 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_stream_set_line_buffer --
+ * Set line buffering on a stream.
+ */
+void
+__wt_stream_set_line_buffer(FILE *fp)
+{
+ /*
+ * This function exists because MSVC doesn't support buffer sizes of 0
+ * to the setvbuf call. To avoid re-introducing the bug, we have helper
+ * functions and disallow calling setvbuf directly in WiredTiger code.
+ *
+ * Additionally, MSVC doesn't support line buffering, the result is the
+ * same as full-buffering. We assume our caller wants immediate output,
+ * set no-buffering instead.
+ */
+ __wt_stream_set_no_buffer(fp);
+}
+
+/*
+ * __wt_stream_set_no_buffer --
+ * Turn off buffering on a stream.
+ */
+void
+__wt_stream_set_no_buffer(FILE *fp)
+{
+ (void)setvbuf(fp, NULL, _IONBF, 0);
+}
diff --git a/src/os_win/os_sleep.c b/src/os_win/os_sleep.c
index 1d4b316488a..1cb61f7c4aa 100644
--- a/src/os_win/os_sleep.c
+++ b/src/os_win/os_sleep.c
@@ -15,11 +15,15 @@
void
__wt_sleep(uint64_t seconds, uint64_t micro_seconds)
{
+ DWORD dwMilliseconds;
+
/*
* If the caller wants a small pause, set to our
* smallest granularity.
*/
if (seconds == 0 && micro_seconds < WT_THOUSAND)
micro_seconds = WT_THOUSAND;
- Sleep(seconds * WT_THOUSAND + micro_seconds / WT_THOUSAND);
+ dwMilliseconds = (DWORD)
+ (seconds * WT_THOUSAND + micro_seconds / WT_THOUSAND);
+ Sleep(dwMilliseconds);
}
diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c
index 3be0ccb9393..94c5a8b0ab2 100644
--- a/src/os_win/os_thread.c
+++ b/src/os_win/os_thread.c
@@ -21,7 +21,7 @@ __wt_thread_create(WT_SESSION_IMPL *session,
if (*tidret != 0)
return (0);
- WT_RET_MSG(session, errno, "_beginthreadex");
+ WT_RET_MSG(session, __wt_errno, "thread create: _beginthreadex");
}
/*
@@ -37,12 +37,13 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid)
/*
* If we fail to wait, we will leak handles so do not continue
*/
- WT_PANIC_RET(session, ret == WAIT_FAILED ? __wt_errno() : ret,
- "Wait for thread join failed");
+ WT_PANIC_RET(session,
+ ret == WAIT_FAILED ? __wt_getlasterror() : ret,
+ "thread join: WaitForSingleObject");
if (CloseHandle(tid) == 0) {
- WT_RET_MSG(session, __wt_errno(),
- "CloseHandle: thread join");
+ WT_RET_MSG(session,
+ __wt_getlasterror(), "thread join: CloseHandle");
}
return (0);
@@ -53,7 +54,7 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid)
* Fill in a printable version of the process and thread IDs.
*/
void
-__wt_thread_id(char* buf, size_t buflen)
+__wt_thread_id(char *buf, size_t buflen)
{
(void)snprintf(buf, buflen,
"%" PRIu64 ":%" PRIu64,
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index a69f335c9b3..26123f6b66d 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -2409,8 +2409,8 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
/* Finalize the header information and write the page. */
dsk->recno = last->recno;
dsk->u.entries = r->entries;
- dsk->mem_size =
- r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk);
+ dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk);
+ r->disk_image.size = dsk->mem_size;
WT_RET(
__rec_split_write(session, r, last, &r->disk_image, false));
@@ -2790,9 +2790,9 @@ no_slots:
WT_STAT_FAST_DATA_INCR(session, compress_raw_fail);
dsk->recno = last->recno;
- dsk->mem_size =
- r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk);
+ dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk);
dsk->u.entries = r->entries;
+ r->disk_image.size = dsk->mem_size;
r->entries = 0;
r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk);
@@ -2972,7 +2972,8 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r)
dsk = r->disk_image.mem;
dsk->recno = bnd->recno;
dsk->u.entries = r->entries;
- dsk->mem_size = r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk);
+ dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk);
+ r->disk_image.size = dsk->mem_size;
/* If this is a checkpoint, we're done, otherwise write the page. */
return (__rec_is_checkpoint(session, r, bnd) ?
@@ -6086,8 +6087,9 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session,
dsk->u.datalen = (uint32_t)kv->buf.size;
memcpy(WT_PAGE_HEADER_BYTE(btree, dsk),
kv->buf.data, kv->buf.size);
- dsk->mem_size = tmp->size =
+ dsk->mem_size =
WT_PAGE_HEADER_BYTE_SIZE(btree) + (uint32_t)kv->buf.size;
+ tmp->size = dsk->mem_size;
/* Write the buffer. */
addr = buf;
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index 2a53ad58f52..3f7b34d132f 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -267,8 +267,9 @@ __wt_session_compact(
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, compact, config, cfg);
+ /* In-memory is already as compact as it's going to get. */
if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- WT_ERR(ENOTSUP);
+ goto err;
/* Disallow objects in the WiredTiger name space. */
WT_ERR(__wt_str_name_check(session, uri));
diff --git a/src/support/err.c b/src/support/err.c
index 875bd3efcf3..f64492f1561 100644
--- a/src/support/err.c
+++ b/src/support/err.c
@@ -16,12 +16,15 @@ static int
__handle_error_default(WT_EVENT_HANDLER *handler,
WT_SESSION *wt_session, int error, const char *errmsg)
{
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(handler);
- WT_UNUSED(wt_session);
WT_UNUSED(error);
- WT_RET(__wt_fprintf(stderr, "%s\n", errmsg));
- WT_RET(__wt_fflush(stderr));
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_RET(__wt_fprintf(session, WT_STDERR(session), "%s\n", errmsg));
+ WT_RET(__wt_fsync(session, WT_STDERR(session), true));
return (0);
}
@@ -33,11 +36,13 @@ static int
__handle_message_default(WT_EVENT_HANDLER *handler,
WT_SESSION *wt_session, const char *message)
{
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(handler);
- WT_UNUSED(wt_session);
- WT_RET(__wt_fprintf(stdout, "%s\n", message));
- WT_RET(__wt_fflush(stdout));
+ session = (WT_SESSION_IMPL *)wt_session;
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), "%s\n", message));
+ WT_RET(__wt_fsync(session, WT_STDOUT(session), true));
return (0);
}
@@ -175,13 +180,19 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
* example, we can end up here without a session.)
*/
if (session == NULL) {
- WT_RET(__wt_fprintf(stderr,
+ if (fprintf(stderr,
"WiredTiger Error%s%s: ",
error == 0 ? "" : ": ",
- error == 0 ? "" : __wt_strerror(session, error, NULL, 0)));
- WT_RET(__wt_vfprintf(stderr, fmt, ap));
- WT_RET(__wt_fprintf(stderr, "\n"));
- return (__wt_fflush(stderr));
+ error == 0 ? "" :
+ __wt_strerror(session, error, NULL, 0)) < 0)
+ ret = EIO;
+ if (vfprintf(stderr, fmt, ap) < 0)
+ ret = EIO;
+ if (fprintf(stderr, "\n") < 0)
+ ret = EIO;
+ if (fflush(stderr) != 0)
+ ret = EIO;
+ return (ret);
}
p = s;
diff --git a/src/os_posix/os_mtx_rw.c b/src/support/mtx_rw.c
index b6876cdfbdc..b6876cdfbdc 100644
--- a/src/os_posix/os_mtx_rw.c
+++ b/src/support/mtx_rw.c
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 1eebc9e9d04..fdbda26b781 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -1246,7 +1246,7 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[])
if (!F_ISSET(S2C(session), WT_CONN_CKPT_SYNC))
return (0);
- return (bm->sync(bm, session, false));
+ return (bm->sync(bm, session, true));
}
/*
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index 37a6e0b3711..da2670fb344 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -10,7 +10,6 @@
/* Cookie passed to __txn_printlog. */
typedef struct {
- FILE *out;
uint32_t flags;
} WT_TXN_PRINTLOG_ARGS;
@@ -69,28 +68,28 @@ err: __wt_buf_free(session, &key);
* Print a commit log record.
*/
static int
-__txn_commit_printlog(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out,
- uint32_t flags)
+__txn_commit_printlog(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
bool firstrecord;
firstrecord = true;
- WT_RET(__wt_fprintf(out, " \"ops\": [\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"ops\": [\n"));
/* The logging subsystem zero-pads records. */
while (*pp < end && **pp) {
if (!firstrecord)
- WT_RET(__wt_fprintf(out, ",\n"));
- WT_RET(__wt_fprintf(out, " {"));
+ WT_RET(__wt_fprintf(
+ session, WT_STDOUT(session), ",\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), " {"));
firstrecord = false;
- WT_RET(__wt_txn_op_printlog(session, pp, end, out, flags));
- WT_RET(__wt_fprintf(out, "\n }"));
+ WT_RET(__wt_txn_op_printlog(session, pp, end, flags));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), "\n }"));
}
- WT_RET(__wt_fprintf(out, "\n ]\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), "\n ]\n"));
return (0);
}
@@ -465,7 +464,6 @@ __txn_printlog(WT_SESSION_IMPL *session,
WT_ITEM *rawrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
void *cookie, int firstrecord)
{
- FILE *out;
WT_LOG_RECORD *logrec;
WT_TXN_PRINTLOG_ARGS *args;
const uint8_t *end, *p;
@@ -477,7 +475,6 @@ __txn_printlog(WT_SESSION_IMPL *session,
WT_UNUSED(next_lsnp);
args = cookie;
- out = args->out;
p = WT_LOG_SKIP_HEADER(rawrec->data);
end = (const uint8_t *)rawrec->data + rawrec->size;
@@ -488,16 +485,16 @@ __txn_printlog(WT_SESSION_IMPL *session,
WT_RET(__wt_logrec_read(session, &p, end, &rectype));
if (!firstrecord)
- WT_RET(__wt_fprintf(out, ",\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), ",\n"));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" { \"lsn\" : [%" PRIu32 ",%" PRIu32 "],\n",
lsnp->l.file, lsnp->l.offset));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"hdr_flags\" : \"%s\",\n", compressed ? "compressed" : ""));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"rec_len\" : %" PRIu32 ",\n", logrec->len));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"mem_len\" : %" PRIu32 ",\n",
compressed ? logrec->mem_len : logrec->len));
@@ -505,40 +502,44 @@ __txn_printlog(WT_SESSION_IMPL *session,
case WT_LOGREC_CHECKPOINT:
WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset));
- WT_RET(__wt_fprintf(out, " \"type\" : \"checkpoint\",\n"));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"type\" : \"checkpoint\",\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"ckpt_lsn\" : [%" PRIu32 ",%" PRIu32 "]\n",
lsnfile, lsnoffset));
break;
case WT_LOGREC_COMMIT:
WT_RET(__wt_vunpack_uint(&p, WT_PTRDIFF(end, p), &txnid));
- WT_RET(__wt_fprintf(out, " \"type\" : \"commit\",\n"));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"type\" : \"commit\",\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"txnid\" : %" PRIu64 ",\n", txnid));
- WT_RET(__txn_commit_printlog(session, &p, end, out,
- args->flags));
+ WT_RET(__txn_commit_printlog(session, &p, end, args->flags));
break;
case WT_LOGREC_FILE_SYNC:
WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
WT_UNCHECKED_STRING(Ii), &fileid, &start));
- WT_RET(__wt_fprintf(out, " \"type\" : \"file_sync\",\n"));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"type\" : \"file_sync\",\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"fileid\" : %" PRIu32 ",\n", fileid));
- WT_RET(__wt_fprintf(out,
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"start\" : %" PRId32 "\n", start));
break;
case WT_LOGREC_MESSAGE:
WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
WT_UNCHECKED_STRING(S), &msg));
- WT_RET(__wt_fprintf(out, " \"type\" : \"message\",\n"));
- WT_RET(__wt_fprintf(out, " \"message\" : \"%s\"\n", msg));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"type\" : \"message\",\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"message\" : \"%s\"\n", msg));
break;
}
- WT_RET(__wt_fprintf(out, " }"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), " }"));
return (0);
}
@@ -548,19 +549,18 @@ __txn_printlog(WT_SESSION_IMPL *session,
* Print the log in a human-readable format.
*/
int
-__wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags)
+__wt_txn_printlog(WT_SESSION *wt_session, uint32_t flags)
{
WT_SESSION_IMPL *session;
WT_TXN_PRINTLOG_ARGS args;
session = (WT_SESSION_IMPL *)wt_session;
- args.out = out;
args.flags = flags;
- WT_RET(__wt_fprintf(out, "[\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), "[\n"));
WT_RET(__wt_log_scan(
session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, &args));
- WT_RET(__wt_fprintf(out, "\n]\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), "\n]\n"));
return (0);
}
diff --git a/src/utilities/util_backup.c b/src/utilities/util_backup.c
index b3afc78e9e8..190c0878f38 100644
--- a/src/utilities/util_backup.c
+++ b/src/utilities/util_backup.c
@@ -8,12 +8,9 @@
#include "util.h"
-static int copy(const char *, const char *);
+static int copy(WT_SESSION *, const char *, const char *);
static int usage(void);
-#define CBUF_LEN (128 * 1024) /* Copy buffer and size. */
-static char *cbuf;
-
/*
* append_target --
* Build a list of comma-separated targets.
@@ -86,7 +83,7 @@ util_backup(WT_SESSION *session, int argc, char *argv[])
while (
(ret = cursor->next(cursor)) == 0 &&
(ret = cursor->get_key(cursor, &name)) == 0)
- if ((ret = copy(name, directory)) != 0)
+ if ((ret = copy(session, directory, name)) != 0)
goto err;
if (ret == WT_NOTFOUND)
ret = 0;
@@ -98,97 +95,41 @@ util_backup(WT_SESSION *session, int argc, char *argv[])
}
err: free(config);
- free(cbuf);
-
return (ret);
}
static int
-copy(const char *name, const char *directory)
+copy(WT_SESSION *session, const char *directory, const char *name)
{
WT_DECL_RET;
- ssize_t n;
- int ifd, ofd;
-
- ret = 1;
- ifd = ofd = -1;
-
- if (verbose &&
- printf("Backing up %s/%s to %s\n", home, name, directory) < 0) {
- fprintf(stderr, "%s: %s\n", progname, strerror(errno));
- return (1);
- }
+ size_t len;
+ char *to;
- /* Allocate a large copy buffer (use it to build pathnames as well. */
- if (cbuf == NULL && (cbuf = malloc(CBUF_LEN)) == NULL)
- goto memerr;
-
- /* Open the read file. */
- if (snprintf(cbuf, CBUF_LEN, "%s/%s", home, name) >= CBUF_LEN)
- goto memerr;
- if ((ifd = open(cbuf, O_BINARY | O_RDONLY, 0)) < 0)
- goto readerr;
+ to = NULL;
- /* Open the write file. */
- if (snprintf(cbuf, CBUF_LEN, "%s/%s", directory, name) >= CBUF_LEN)
+ /* Build the target pathname. */
+ len = strlen(directory) + strlen(name) + 2;
+ if ((to = malloc(len)) == NULL)
goto memerr;
- if ((ofd = open(
- cbuf, O_BINARY | O_CREAT | O_WRONLY | O_TRUNC, 0666)) < 0)
- goto writerr;
+ (void)snprintf(to, len, "%s/%s", directory, name);
- /* Copy the file. */
- while ((n = read(ifd, cbuf, CBUF_LEN)) > 0)
- if (write(ofd, cbuf, (size_t)n) != n)
- goto writerr;
- if (n != 0)
- goto readerr;
-
- /*
- * Close file descriptors (forcing a flush on the write side), and
- * check for any errors.
- */
- ret = close(ifd);
- ifd = -1;
- if (ret != 0)
- goto readerr;
+ if (verbose && printf("Backing up %s/%s to %s\n", home, name, to) < 0) {
+ fprintf(stderr, "%s: %s\n", progname, strerror(EIO));
+ goto err;
+ }
/*
- * We need to know this file was successfully written, it's a backup.
+ * Use WiredTiger to copy the file: ensuring stability of the copied
+ * file on disk requires care, and WiredTiger knows how to do it.
*/
-#ifdef _WIN32
- if (FlushFileBuffers((HANDLE)_get_osfhandle(ofd)) == 0) {
- DWORD err = GetLastError();
- ret = err;
- goto writerr;
- }
-#else
- if (fsync(ofd))
- goto writerr;
-#endif
- ret = close(ofd);
- ofd = -1;
- if (ret != 0)
- goto writerr;
-
- /* Success. */
- ret = 0;
+ if ((ret = __wt_copy_and_sync(session, name, to)) != 0)
+ fprintf(stderr, "%s/%s to %s: backup copy: %s\n",
+ home, name, to, session->strerror(session, ret));
if (0) {
-readerr: fprintf(stderr,
- "%s: %s/%s: %s\n", progname, home, name, strerror(errno));
- }
- if (0) {
-writerr: fprintf(stderr, "%s: %s/%s: %s\n",
- progname, directory, name, strerror(errno));
- }
- if (0) {
memerr: fprintf(stderr, "%s: %s\n", progname, strerror(errno));
}
-
- if (ifd >= 0)
- (void)close(ifd);
- if (ofd >= 0)
- (void)close(ofd);
+err: free(to);
return (ret);
}
diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c
index 9349d39bb1e..3a1f847a95f 100644
--- a/src/utilities/util_load_json.c
+++ b/src/utilities/util_load_json.c
@@ -213,8 +213,7 @@ json_data(WT_SESSION *session,
{
WT_CURSOR *cursor;
WT_DECL_RET;
- size_t keystrlen;
- ssize_t gotnolen;
+ size_t gotnolen, keystrlen;
uint64_t gotno, recno;
int nfield, nkeys, toktype, tret;
bool isrec;
@@ -274,9 +273,8 @@ json_data(WT_SESSION *session,
/* Verify the dump has recnos in order. */
recno++;
gotno = __wt_strtouq(ins->tokstart, &endp, 0);
- gotnolen = (endp - ins->tokstart);
- if (recno != gotno ||
- ins->toklen != (size_t)gotnolen) {
+ gotnolen = (size_t)(endp - ins->tokstart);
+ if (recno != gotno || ins->toklen != gotnolen) {
ret = util_err(session, 0,
"%s: recno out of order", uri);
goto err;
diff --git a/src/utilities/util_printlog.c b/src/utilities/util_printlog.c
index 9a2bdc8a9ba..e7fa2134934 100644
--- a/src/utilities/util_printlog.c
+++ b/src/utilities/util_printlog.c
@@ -41,7 +41,7 @@ util_printlog(WT_SESSION *session, int argc, char *argv[])
if (argc != 0)
return (usage());
- ret = __wt_txn_printlog(session, stdout, flags);
+ ret = __wt_txn_printlog(session, flags);
if (ret != 0) {
fprintf(stderr, "%s: printlog failed: %s\n",
diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c
index f9c3ed28814..cd7d1b08708 100644
--- a/test/recovery/random-abort.c
+++ b/test/recovery/random-abort.c
@@ -110,7 +110,7 @@ fill_db(void)
/*
* Set to no buffering.
*/
- (void)setvbuf(fp, NULL, _IONBF, 0);
+ __wt_stream_set_no_buffer(fp);
/*
* Write data into the table until we are killed by the parent.
diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c
index 67fdb932c27..e099873e5b9 100644
--- a/test/recovery/truncated-log.c
+++ b/test/recovery/truncated-log.c
@@ -107,7 +107,7 @@ fill_db(void)
/*
* Set to no buffering.
*/
- (void)setvbuf(fp, NULL, _IONBF, 0);
+ __wt_stream_set_no_buffer(fp);
save_lsn.l.file = 0;
/*
diff --git a/test/utility/test_util.i b/test/utility/test_util.i
index c5cebadcb5c..43982d9e4a1 100644
--- a/test/utility/test_util.i
+++ b/test/utility/test_util.i
@@ -101,13 +101,13 @@ testutil_die(int e, const char *fmt, ...)
* Creates the full intended work directory in buffer.
*/
static inline void
-testutil_work_dir_from_path(char *buffer, size_t inputSize, const char *dir)
+testutil_work_dir_from_path(char *buffer, size_t len, const char *dir)
{
/* If no directory is provided, use the default. */
if (dir == NULL)
dir = DEFAULT_DIR;
- if (inputSize < strlen(dir) + 1)
+ if (len < strlen(dir) + 1)
testutil_die(ENOMEM,
"Not enough memory in buffer for directory %s", dir);
@@ -116,55 +116,48 @@ testutil_work_dir_from_path(char *buffer, size_t inputSize, const char *dir)
/*
* testutil_clean_work_dir --
- * Remove any existing work directories, can optionally fail on error
+ * Remove the work directory.
*/
static inline void
testutil_clean_work_dir(char *dir)
{
- size_t inputSize;
+ size_t len;
int ret;
- bool exist;
- char *buffer;
+ char *buf;
/* Additional bytes for the Windows rd command. */
- inputSize = strlen(dir) + sizeof(RM_COMMAND);
- if ((buffer = malloc(inputSize)) == NULL)
+ len = strlen(dir) + strlen(RM_COMMAND) + 1;
+ if ((buf = malloc(len)) == NULL)
testutil_die(ENOMEM, "Failed to allocate memory");
- snprintf(buffer, inputSize, "%s%s", RM_COMMAND, dir);
+ snprintf(buf, len, "%s%s", RM_COMMAND, dir);
- exist = 0;
- if ((ret = __wt_exist(NULL, dir, &exist)) != 0)
- testutil_die(ret,
- "Unable to check if directory exists");
- if (exist == 1 && (ret = system(buffer)) != 0)
- testutil_die(ret,
- "System call to remove directory failed");
- free(buffer);
+ if ((ret = system(buf)) != 0 && ret != ENOENT)
+ testutil_die(ret, "%s", buf);
+ free(buf);
}
/*
* testutil_make_work_dir --
- * Delete the existing work directory if it exists, then create a new one.
+ * Delete the existing work directory, then create a new one.
*/
static inline void
testutil_make_work_dir(char *dir)
{
- size_t inputSize;
+ size_t len;
int ret;
- char *buffer;
+ char *buf;
testutil_clean_work_dir(dir);
/* Additional bytes for the mkdir command */
- inputSize = strlen(dir) + sizeof(MKDIR_COMMAND);
- if ((buffer = malloc(inputSize)) == NULL)
+ len = strlen(dir) + strlen(MKDIR_COMMAND) + 1;
+ if ((buf = malloc(len)) == NULL)
testutil_die(ENOMEM, "Failed to allocate memory");
/* mkdir shares syntax between Windows and Linux */
- snprintf(buffer, inputSize, "%s%s", MKDIR_COMMAND, dir);
- if ((ret = system(buffer)) != 0)
- testutil_die(ret, "directory create call of '%s%s' failed",
- MKDIR_COMMAND, dir);
- free(buffer);
+ snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir);
+ if ((ret = system(buf)) != 0)
+ testutil_die(ret, "%s", buf);
+ free(buf);
}